mirror of https://github.com/python/cpython.git
bpo-40602: Optimize _Py_hashtable for pointer keys (GH-20051)
Optimize _Py_hashtable_get() and _Py_hashtable_get_entry() for pointer keys: * key_size == sizeof(void*) * hash_func == _Py_hashtable_hash_ptr * compare_func == _Py_hashtable_compare_direct Changes: * Add get_func and get_entry_func members to _Py_hashtable_t * Convert _Py_hashtable_get() and _Py_hashtable_get_entry() functions to static nline functions. * Add specialized get and get entry for pointer keys.
This commit is contained in:
parent
74ea6b5a75
commit
7c6e970775
|
@ -76,12 +76,17 @@ typedef struct {
|
|||
|
||||
/* Forward declaration */
|
||||
struct _Py_hashtable_t;
|
||||
typedef struct _Py_hashtable_t _Py_hashtable_t;
|
||||
|
||||
typedef Py_uhash_t (*_Py_hashtable_hash_func) (struct _Py_hashtable_t *ht,
|
||||
typedef Py_uhash_t (*_Py_hashtable_hash_func) (_Py_hashtable_t *ht,
|
||||
const void *pkey);
|
||||
typedef int (*_Py_hashtable_compare_func) (struct _Py_hashtable_t *ht,
|
||||
typedef int (*_Py_hashtable_compare_func) (_Py_hashtable_t *ht,
|
||||
const void *pkey,
|
||||
const _Py_hashtable_entry_t *he);
|
||||
typedef _Py_hashtable_entry_t* (*_Py_hashtable_get_entry_func)(_Py_hashtable_t *ht,
|
||||
const void *pkey);
|
||||
typedef int (*_Py_hashtable_get_func) (_Py_hashtable_t *ht,
|
||||
const void *pkey, void *data);
|
||||
|
||||
typedef struct {
|
||||
/* allocate a memory block */
|
||||
|
@ -93,18 +98,19 @@ typedef struct {
|
|||
|
||||
|
||||
/* _Py_hashtable: table */
|
||||
|
||||
typedef struct _Py_hashtable_t {
|
||||
struct _Py_hashtable_t {
|
||||
size_t num_buckets;
|
||||
size_t entries; /* Total number of entries in the table. */
|
||||
_Py_slist_t *buckets;
|
||||
size_t key_size;
|
||||
size_t data_size;
|
||||
|
||||
_Py_hashtable_get_func get_func;
|
||||
_Py_hashtable_get_entry_func get_entry_func;
|
||||
_Py_hashtable_hash_func hash_func;
|
||||
_Py_hashtable_compare_func compare_func;
|
||||
_Py_hashtable_allocator_t alloc;
|
||||
} _Py_hashtable_t;
|
||||
};
|
||||
|
||||
/* hash a pointer (void*) */
|
||||
PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_ptr(
|
||||
|
@ -176,10 +182,12 @@ PyAPI_FUNC(int) _Py_hashtable_set(
|
|||
|
||||
Don't call directly this function, but use _Py_HASHTABLE_GET_ENTRY()
|
||||
macro */
|
||||
PyAPI_FUNC(_Py_hashtable_entry_t*) _Py_hashtable_get_entry(
|
||||
_Py_hashtable_t *ht,
|
||||
size_t key_size,
|
||||
const void *pkey);
|
||||
static inline _Py_hashtable_entry_t *
|
||||
_Py_hashtable_get_entry(_Py_hashtable_t *ht, size_t key_size, const void *pkey)
|
||||
{
|
||||
assert(key_size == ht->key_size);
|
||||
return ht->get_entry_func(ht, pkey);
|
||||
}
|
||||
|
||||
#define _Py_HASHTABLE_GET_ENTRY(TABLE, KEY) \
|
||||
_Py_hashtable_get_entry(TABLE, sizeof(KEY), &(KEY))
|
||||
|
@ -189,12 +197,14 @@ PyAPI_FUNC(_Py_hashtable_entry_t*) _Py_hashtable_get_entry(
|
|||
exists, return 0 if the entry does not exist.
|
||||
|
||||
Don't call directly this function, but use _Py_HASHTABLE_GET() macro */
|
||||
PyAPI_FUNC(int) _Py_hashtable_get(
|
||||
_Py_hashtable_t *ht,
|
||||
size_t key_size,
|
||||
const void *pkey,
|
||||
size_t data_size,
|
||||
void *data);
|
||||
static inline int
|
||||
_Py_hashtable_get(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
|
||||
size_t data_size, void *data)
|
||||
{
|
||||
assert(key_size == ht->key_size);
|
||||
assert(data_size == ht->data_size);
|
||||
return ht->get_func(ht, pkey, data);
|
||||
}
|
||||
|
||||
#define _Py_HASHTABLE_GET(TABLE, KEY, DATA) \
|
||||
_Py_hashtable_get(TABLE, sizeof(KEY), &(KEY), sizeof(DATA), &(DATA))
|
||||
|
|
|
@ -108,7 +108,6 @@ Py_uhash_t
|
|||
_Py_hashtable_hash_ptr(struct _Py_hashtable_t *ht, const void *pkey)
|
||||
{
|
||||
void *key;
|
||||
|
||||
_Py_HASHTABLE_READ_KEY(ht, pkey, key);
|
||||
return (Py_uhash_t)_Py_HashPointer(key);
|
||||
}
|
||||
|
@ -137,61 +136,6 @@ round_size(size_t s)
|
|||
}
|
||||
|
||||
|
||||
_Py_hashtable_t *
|
||||
_Py_hashtable_new_full(size_t key_size, size_t data_size,
|
||||
size_t init_size,
|
||||
_Py_hashtable_hash_func hash_func,
|
||||
_Py_hashtable_compare_func compare_func,
|
||||
_Py_hashtable_allocator_t *allocator)
|
||||
{
|
||||
_Py_hashtable_t *ht;
|
||||
size_t buckets_size;
|
||||
_Py_hashtable_allocator_t alloc;
|
||||
|
||||
if (allocator == NULL) {
|
||||
alloc.malloc = PyMem_Malloc;
|
||||
alloc.free = PyMem_Free;
|
||||
}
|
||||
else {
|
||||
alloc = *allocator;
|
||||
}
|
||||
|
||||
ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
|
||||
if (ht == NULL)
|
||||
return ht;
|
||||
|
||||
ht->num_buckets = round_size(init_size);
|
||||
ht->entries = 0;
|
||||
ht->key_size = key_size;
|
||||
ht->data_size = data_size;
|
||||
|
||||
buckets_size = ht->num_buckets * sizeof(ht->buckets[0]);
|
||||
ht->buckets = alloc.malloc(buckets_size);
|
||||
if (ht->buckets == NULL) {
|
||||
alloc.free(ht);
|
||||
return NULL;
|
||||
}
|
||||
memset(ht->buckets, 0, buckets_size);
|
||||
|
||||
ht->hash_func = hash_func;
|
||||
ht->compare_func = compare_func;
|
||||
ht->alloc = alloc;
|
||||
return ht;
|
||||
}
|
||||
|
||||
|
||||
_Py_hashtable_t *
|
||||
_Py_hashtable_new(size_t key_size, size_t data_size,
|
||||
_Py_hashtable_hash_func hash_func,
|
||||
_Py_hashtable_compare_func compare_func)
|
||||
{
|
||||
return _Py_hashtable_new_full(key_size, data_size,
|
||||
HASHTABLE_MIN_SIZE,
|
||||
hash_func, compare_func,
|
||||
NULL);
|
||||
}
|
||||
|
||||
|
||||
size_t
|
||||
_Py_hashtable_size(_Py_hashtable_t *ht)
|
||||
{
|
||||
|
@ -251,23 +195,20 @@ _Py_hashtable_print_stats(_Py_hashtable_t *ht)
|
|||
|
||||
|
||||
_Py_hashtable_entry_t *
|
||||
_Py_hashtable_get_entry(_Py_hashtable_t *ht,
|
||||
size_t key_size, const void *pkey)
|
||||
_Py_hashtable_get_entry_generic(_Py_hashtable_t *ht, const void *pkey)
|
||||
{
|
||||
Py_uhash_t key_hash;
|
||||
size_t index;
|
||||
_Py_hashtable_entry_t *entry;
|
||||
|
||||
assert(key_size == ht->key_size);
|
||||
|
||||
key_hash = ht->hash_func(ht, pkey);
|
||||
index = key_hash & (ht->num_buckets - 1);
|
||||
|
||||
for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
|
||||
if (entry->key_hash == key_hash && ht->compare_func(ht, pkey, entry))
|
||||
Py_uhash_t key_hash = ht->hash_func(ht, pkey);
|
||||
size_t index = key_hash & (ht->num_buckets - 1);
|
||||
_Py_hashtable_entry_t *entry = entry = TABLE_HEAD(ht, index);
|
||||
while (1) {
|
||||
if (entry == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
if (entry->key_hash == key_hash && ht->compare_func(ht, pkey, entry)) {
|
||||
break;
|
||||
}
|
||||
entry = ENTRY_NEXT(entry);
|
||||
}
|
||||
|
||||
return entry;
|
||||
}
|
||||
|
||||
|
@ -324,7 +265,7 @@ _Py_hashtable_set(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
|
|||
/* Don't write the assertion on a single line because it is interesting
|
||||
to know the duplicated entry if the assertion failed. The entry can
|
||||
be read using a debugger. */
|
||||
entry = _Py_hashtable_get_entry(ht, key_size, pkey);
|
||||
entry = ht->get_entry_func(ht, pkey);
|
||||
assert(entry == NULL);
|
||||
#endif
|
||||
|
||||
|
@ -352,18 +293,62 @@ _Py_hashtable_set(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
|
|||
|
||||
|
||||
int
|
||||
_Py_hashtable_get(_Py_hashtable_t *ht, size_t key_size,const void *pkey,
|
||||
size_t data_size, void *data)
|
||||
_Py_hashtable_get_generic(_Py_hashtable_t *ht, const void *pkey, void *data)
|
||||
{
|
||||
_Py_hashtable_entry_t *entry;
|
||||
|
||||
assert(data != NULL);
|
||||
|
||||
entry = _Py_hashtable_get_entry(ht, key_size, pkey);
|
||||
if (entry == NULL)
|
||||
_Py_hashtable_entry_t *entry = ht->get_entry_func(ht, pkey);
|
||||
if (entry != NULL) {
|
||||
ENTRY_READ_PDATA(ht, entry, ht->data_size, data);
|
||||
return 1;
|
||||
}
|
||||
else {
|
||||
return 0;
|
||||
ENTRY_READ_PDATA(ht, entry, data_size, data);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Specialized for:
|
||||
// key_size == sizeof(void*)
|
||||
// hash_func == _Py_hashtable_hash_ptr
|
||||
// compare_func == _Py_hashtable_compare_direct
|
||||
_Py_hashtable_entry_t *
|
||||
_Py_hashtable_get_entry_ptr(_Py_hashtable_t *ht, const void *pkey)
|
||||
{
|
||||
Py_uhash_t key_hash = _Py_hashtable_hash_ptr(ht, pkey);
|
||||
size_t index = key_hash & (ht->num_buckets - 1);
|
||||
_Py_hashtable_entry_t *entry = entry = TABLE_HEAD(ht, index);
|
||||
while (1) {
|
||||
if (entry == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
if (entry->key_hash == key_hash) {
|
||||
const void *pkey2 = _Py_HASHTABLE_ENTRY_PKEY(entry);
|
||||
if (memcmp(pkey, pkey2, sizeof(void*)) == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
entry = ENTRY_NEXT(entry);
|
||||
}
|
||||
return entry;
|
||||
}
|
||||
|
||||
|
||||
// Specialized for:
|
||||
// key_size == sizeof(void*)
|
||||
// hash_func == _Py_hashtable_hash_ptr
|
||||
// compare_func == _Py_hashtable_compare_direct
|
||||
int
|
||||
_Py_hashtable_get_ptr(_Py_hashtable_t *ht, const void *pkey, void *data)
|
||||
{
|
||||
assert(data != NULL);
|
||||
_Py_hashtable_entry_t *entry = _Py_hashtable_get_entry_ptr(ht, pkey);
|
||||
if (entry != NULL) {
|
||||
ENTRY_READ_PDATA(ht, entry, ht->data_size, data);
|
||||
return 1;
|
||||
}
|
||||
else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -454,6 +439,70 @@ hashtable_rehash(_Py_hashtable_t *ht)
|
|||
}
|
||||
|
||||
|
||||
_Py_hashtable_t *
|
||||
_Py_hashtable_new_full(size_t key_size, size_t data_size,
|
||||
size_t init_size,
|
||||
_Py_hashtable_hash_func hash_func,
|
||||
_Py_hashtable_compare_func compare_func,
|
||||
_Py_hashtable_allocator_t *allocator)
|
||||
{
|
||||
_Py_hashtable_t *ht;
|
||||
size_t buckets_size;
|
||||
_Py_hashtable_allocator_t alloc;
|
||||
|
||||
if (allocator == NULL) {
|
||||
alloc.malloc = PyMem_Malloc;
|
||||
alloc.free = PyMem_Free;
|
||||
}
|
||||
else {
|
||||
alloc = *allocator;
|
||||
}
|
||||
|
||||
ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
|
||||
if (ht == NULL)
|
||||
return ht;
|
||||
|
||||
ht->num_buckets = round_size(init_size);
|
||||
ht->entries = 0;
|
||||
ht->key_size = key_size;
|
||||
ht->data_size = data_size;
|
||||
|
||||
buckets_size = ht->num_buckets * sizeof(ht->buckets[0]);
|
||||
ht->buckets = alloc.malloc(buckets_size);
|
||||
if (ht->buckets == NULL) {
|
||||
alloc.free(ht);
|
||||
return NULL;
|
||||
}
|
||||
memset(ht->buckets, 0, buckets_size);
|
||||
|
||||
ht->get_func = _Py_hashtable_get_generic;
|
||||
ht->get_entry_func = _Py_hashtable_get_entry_generic;
|
||||
ht->hash_func = hash_func;
|
||||
ht->compare_func = compare_func;
|
||||
ht->alloc = alloc;
|
||||
if (ht->key_size == sizeof(void*)
|
||||
&& ht->hash_func == _Py_hashtable_hash_ptr
|
||||
&& ht->compare_func == _Py_hashtable_compare_direct)
|
||||
{
|
||||
ht->get_func = _Py_hashtable_get_ptr;
|
||||
ht->get_entry_func = _Py_hashtable_get_entry_ptr;
|
||||
}
|
||||
return ht;
|
||||
}
|
||||
|
||||
|
||||
_Py_hashtable_t *
|
||||
_Py_hashtable_new(size_t key_size, size_t data_size,
|
||||
_Py_hashtable_hash_func hash_func,
|
||||
_Py_hashtable_compare_func compare_func)
|
||||
{
|
||||
return _Py_hashtable_new_full(key_size, data_size,
|
||||
HASHTABLE_MIN_SIZE,
|
||||
hash_func, compare_func,
|
||||
NULL);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
_Py_hashtable_clear(_Py_hashtable_t *ht)
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue