From 064464fc38269e70f7e3a34cb25fc9085ab85782 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 17 Sep 2021 12:20:51 +0100 Subject: [PATCH] bpo-45219: Factor dictkey indexing (GH-28389) --- Include/cpython/dictobject.h | 4 +- Objects/dictobject.c | 126 +++++++++++++++++++++++------------ Python/specialize.c | 39 ++++++----- 3 files changed, 106 insertions(+), 63 deletions(-) diff --git a/Include/cpython/dictobject.h b/Include/cpython/dictobject.h index de3c1160b48..7c63374c566 100644 --- a/Include/cpython/dictobject.h +++ b/Include/cpython/dictobject.h @@ -85,4 +85,6 @@ PyAPI_FUNC(PyObject *) _PyDictView_Intersect(PyObject* self, PyObject *other); /* Gets a version number unique to the current state of the keys of dict, if possible. * Returns the version number, or zero if it was not possible to get a version number. */ -uint32_t _PyDictKeys_GetVersionForCurrentState(PyDictObject *dict); +uint32_t _PyDictKeys_GetVersionForCurrentState(PyDictKeysObject *dictkeys); + +Py_ssize_t _PyDictKeys_StringLookup(PyDictKeysObject* dictkeys, PyObject *key); diff --git a/Objects/dictobject.c b/Objects/dictobject.c index c78cbafe583..85122eca7b7 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -734,6 +734,73 @@ lookdict_index(PyDictKeysObject *k, Py_hash_t hash, Py_ssize_t index) Py_UNREACHABLE(); } +static Py_ssize_t +dictkeys_stringlookup(PyDictKeysObject* dk, PyObject *key, Py_hash_t hash) +{ + PyDictKeyEntry *ep0 = DK_ENTRIES(dk); + size_t mask = DK_MASK(dk); + size_t perturb = hash; + size_t i = (size_t)hash & mask; + Py_ssize_t ix; + for (;;) { + ix = dictkeys_get_index(dk, i); + if (ix >= 0) { + PyDictKeyEntry *ep = &ep0[ix]; + assert(ep->me_key != NULL); + assert(PyUnicode_CheckExact(ep->me_key)); + if (ep->me_key == key || + (ep->me_hash == hash && unicode_eq(ep->me_key, key))) { + return ix; + } + } + else if (ix == DKIX_EMPTY) { + return DKIX_EMPTY; + } + perturb >>= PERTURB_SHIFT; + i = mask & (i*5 + perturb + 1); + ix = dictkeys_get_index(dk, i); + if (ix >= 0) { + PyDictKeyEntry *ep = &ep0[ix]; + assert(ep->me_key != NULL); + assert(PyUnicode_CheckExact(ep->me_key)); + if (ep->me_key == key || + (ep->me_hash == hash && unicode_eq(ep->me_key, key))) { + return ix; + } + } + else if (ix == DKIX_EMPTY) { + return DKIX_EMPTY; + } + perturb >>= PERTURB_SHIFT; + i = mask & (i*5 + perturb + 1); + } + Py_UNREACHABLE(); +} + +/* Lookup a string in a (all unicode) dict keys. + * Returns DKIX_ERROR if key is not a string, + * or if the dict keys is not all strings. + * If the keys is present then return the index of key. + * If the key is not present then return DKIX_EMPTY. + */ +Py_ssize_t +_PyDictKeys_StringLookup(PyDictKeysObject* dk, PyObject *key) +{ + DictKeysKind kind = dk->dk_kind; + if (!PyUnicode_CheckExact(key) || kind == DICT_KEYS_GENERAL) { + return DKIX_ERROR; + } + Py_hash_t hash = ((PyASCIIObject *)key)->hash; + if (hash == -1) { + hash = PyUnicode_Type.tp_hash(key); + if (hash == -1) { + PyErr_Clear(); + return DKIX_ERROR; + } + } + return dictkeys_stringlookup(dk, key, hash); +} + /* The basic lookup function used by all operations. This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4. @@ -756,49 +823,24 @@ _Py_dict_lookup(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject **valu start: dk = mp->ma_keys; DictKeysKind kind = dk->dk_kind; + if (PyUnicode_CheckExact(key) && kind != DICT_KEYS_GENERAL) { + Py_ssize_t ix = dictkeys_stringlookup(dk, key, hash); + if (ix == DKIX_EMPTY) { + *value_addr = NULL; + } + else if (kind == DICT_KEYS_SPLIT) { + *value_addr = mp->ma_values[ix]; + } + else { + *value_addr = DK_ENTRIES(dk)[ix].me_value; + } + return ix; + } PyDictKeyEntry *ep0 = DK_ENTRIES(dk); size_t mask = DK_MASK(dk); size_t perturb = hash; size_t i = (size_t)hash & mask; Py_ssize_t ix; - if (PyUnicode_CheckExact(key) && kind != DICT_KEYS_GENERAL) { - /* Strings only */ - for (;;) { - ix = dictkeys_get_index(mp->ma_keys, i); - if (ix >= 0) { - PyDictKeyEntry *ep = &ep0[ix]; - assert(ep->me_key != NULL); - assert(PyUnicode_CheckExact(ep->me_key)); - if (ep->me_key == key || - (ep->me_hash == hash && unicode_eq(ep->me_key, key))) { - goto found; - } - } - else if (ix == DKIX_EMPTY) { - *value_addr = NULL; - return DKIX_EMPTY; - } - perturb >>= PERTURB_SHIFT; - i = mask & (i*5 + perturb + 1); - ix = dictkeys_get_index(mp->ma_keys, i); - if (ix >= 0) { - PyDictKeyEntry *ep = &ep0[ix]; - assert(ep->me_key != NULL); - assert(PyUnicode_CheckExact(ep->me_key)); - if (ep->me_key == key || - (ep->me_hash == hash && unicode_eq(ep->me_key, key))) { - goto found; - } - } - else if (ix == DKIX_EMPTY) { - *value_addr = NULL; - return DKIX_EMPTY; - } - perturb >>= PERTURB_SHIFT; - i = mask & (i*5 + perturb + 1); - } - Py_UNREACHABLE(); - } for (;;) { ix = dictkeys_get_index(dk, i); if (ix == DKIX_EMPTY) { @@ -4997,15 +5039,15 @@ _PyDictKeys_DecRef(PyDictKeysObject *keys) static uint32_t next_dict_keys_version = 2; -uint32_t _PyDictKeys_GetVersionForCurrentState(PyDictObject *dict) +uint32_t _PyDictKeys_GetVersionForCurrentState(PyDictKeysObject *dictkeys) { - if (dict->ma_keys->dk_version != 0) { - return dict->ma_keys->dk_version; + if (dictkeys->dk_version != 0) { + return dictkeys->dk_version; } if (next_dict_keys_version == 0) { return 0; } uint32_t v = next_dict_keys_version++; - dict->ma_keys->dk_version = v; + dictkeys->dk_version = v; return v; } diff --git a/Python/specialize.c b/Python/specialize.c index 398524cdb5b..1ab79bf3ea0 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -489,7 +489,7 @@ specialize_module_load_attr( SPECIALIZATION_FAIL(opcode, SPEC_FAIL_OUT_OF_RANGE); return -1; } - uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState(dict); + uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState(dict->ma_keys); if (keys_version == 0) { SPECIALIZATION_FAIL(opcode, SPEC_FAIL_OUT_OF_VERSIONS); return -1; @@ -601,23 +601,19 @@ specialize_dict_access( } // We found an instance with a __dict__. PyDictObject *dict = (PyDictObject *)*dictptr; + PyDictKeysObject *keys = dict->ma_keys; if ((type->tp_flags & Py_TPFLAGS_HEAPTYPE) - && dict->ma_keys == ((PyHeapTypeObject*)type)->ht_cached_keys + && keys == ((PyHeapTypeObject*)type)->ht_cached_keys ) { // Keys are shared assert(PyUnicode_CheckExact(name)); - Py_hash_t hash = PyObject_Hash(name); - if (hash == -1) { - return -1; - } - PyObject *value; - Py_ssize_t index = _Py_dict_lookup(dict, name, hash, &value); + Py_ssize_t index = _PyDictKeys_StringLookup(keys, name); assert (index != DKIX_ERROR); if (index != (uint16_t)index) { SPECIALIZATION_FAIL(base_op, SPEC_FAIL_OUT_OF_RANGE); return 0; } - uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState(dict); + uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState(keys); if (keys_version == 0) { SPECIALIZATION_FAIL(base_op, SPEC_FAIL_OUT_OF_VERSIONS); return 0; @@ -966,7 +962,7 @@ _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, goto fail; } } - keys_version = _PyDictKeys_GetVersionForCurrentState(owner_dict); + keys_version = _PyDictKeys_GetVersionForCurrentState(owner_dict->ma_keys); if (keys_version == 0) { SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_OUT_OF_VERSIONS); goto fail; @@ -1020,17 +1016,17 @@ _Py_Specialize_LoadGlobal( if (!PyDict_CheckExact(globals)) { goto fail; } - if (((PyDictObject *)globals)->ma_keys->dk_kind != DICT_KEYS_UNICODE) { + PyDictKeysObject * globals_keys = ((PyDictObject *)globals)->ma_keys; + Py_ssize_t index = _PyDictKeys_StringLookup(globals_keys, name); + if (index == DKIX_ERROR) { + SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_NON_STRING_OR_SPLIT); goto fail; } - PyObject *value = NULL; - Py_ssize_t index = _PyDict_GetItemHint((PyDictObject *)globals, name, -1, &value); - assert (index != DKIX_ERROR); if (index != DKIX_EMPTY) { if (index != (uint16_t)index) { goto fail; } - uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState((PyDictObject *)globals); + uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState(globals_keys); if (keys_version == 0) { goto fail; } @@ -1042,20 +1038,23 @@ _Py_Specialize_LoadGlobal( if (!PyDict_CheckExact(builtins)) { goto fail; } - if (((PyDictObject *)builtins)->ma_keys->dk_kind != DICT_KEYS_UNICODE) { + PyDictKeysObject * builtin_keys = ((PyDictObject *)builtins)->ma_keys; + index = _PyDictKeys_StringLookup(builtin_keys, name); + if (index == DKIX_ERROR) { + SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_NON_STRING_OR_SPLIT); goto fail; } - index = _PyDict_GetItemHint((PyDictObject *)builtins, name, -1, &value); - assert (index != DKIX_ERROR); if (index != (uint16_t)index) { goto fail; } - uint32_t globals_version = _PyDictKeys_GetVersionForCurrentState((PyDictObject *)globals); + uint32_t globals_version = _PyDictKeys_GetVersionForCurrentState(globals_keys); if (globals_version == 0) { + SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_VERSIONS); goto fail; } - uint32_t builtins_version = _PyDictKeys_GetVersionForCurrentState((PyDictObject *)builtins); + uint32_t builtins_version = _PyDictKeys_GetVersionForCurrentState(builtin_keys); if (builtins_version == 0) { + SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_VERSIONS); goto fail; } cache1->module_keys_version = globals_version;