From 5ad0c86aa11af73950804cadc284b2242684a231 Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Fri, 9 Aug 2019 09:04:46 +0200 Subject: erts: Change the default hash implementation to use fibonacci hash Fibonacci hashing is a variant of multiplicative hashing that uses a constant based on the golden ratio. --- erts/emulator/beam/export.c | 2 +- erts/emulator/beam/hash.c | 96 +++++++++++++++++-------------------------- erts/emulator/beam/hash.h | 46 +++++++++++++++++---- erts/emulator/beam/register.c | 4 +- 4 files changed, 78 insertions(+), 70 deletions(-) diff --git a/erts/emulator/beam/export.c b/erts/emulator/beam/export.c index 946ffeffb8..c17fb436e2 100644 --- a/erts/emulator/beam/export.c +++ b/erts/emulator/beam/export.c @@ -218,7 +218,7 @@ erts_find_export_entry(Eterm m, Eterm f, unsigned int a, ErtsCodeIndex code_ix) int ix; HashBucket* b; - ix = hval % export_tables[code_ix].htable.size; + ix = hash_get_slot(&export_tables[code_ix].htable, hval); b = export_tables[code_ix].htable.bucket[ix]; /* diff --git a/erts/emulator/beam/hash.c b/erts/emulator/beam/hash.c index cc816ac163..cdab53b3f2 100644 --- a/erts/emulator/beam/hash.c +++ b/erts/emulator/beam/hash.c @@ -29,38 +29,20 @@ #include "sys.h" #include "hash.h" -/* -** List of sizes (all are primes) -*/ -static const int h_size_table[] = { - 2, 5, 11, 23, 47, 97, 197, 397, 797, /* double upto here */ - 1201, 1597, - 2411, 3203, - 4813, 6421, - 9643, 12853, - 19289, 25717, - 51437, - 102877, - 205759, - 411527, - 823117, - 1646237, - 3292489, - 6584983, - 13169977, - 26339969, - 52679969, - -1 -}; - /* ** Get info about hash ** */ +#define MAX_SHIFT (ERTS_SIZEOF_TERM * 8) + +static int hash_get_slots(Hash *h) { + return UWORD_CONSTANT(1) << (MAX_SHIFT - h->shift); +} + void hash_get_info(HashInfo *hi, Hash *h) { - int size = h->size; + int size = hash_get_slots(h); int i; int max_depth = 0; int objects = 0; @@ -84,7 +66,7 @@ void hash_get_info(HashInfo *hi, Hash *h) ASSERT(objects == h->nobjs); hi->name = h->name; - hi->size = h->size; + hi->size = hash_get_slots(h); hi->used = used; hi->objs = h->nobjs; hi->depth = max_depth; @@ -118,15 +100,15 @@ hash_table_sz(Hash *h) int i; for(i=0;h->name[i];i++); i++; - return sizeof(Hash) + h->size*sizeof(HashBucket*) + i; + return sizeof(Hash) + hash_get_slots(h)*sizeof(HashBucket*) + i; } static ERTS_INLINE void set_thresholds(Hash* h) { - h->grow_threshold = (8*h->size)/5; /* grow at 160% load */ - if (h->size_ix > h->min_size_ix) - h->shrink_threshold = h->size / 5; /* shrink at 20% load */ + h->grow_threshold = (8*hash_get_slots(h))/5; /* grow at 160% load */ + if (h->shift < h->max_shift) + h->shrink_threshold = hash_get_slots(h) / 5; /* shrink at 20% load */ else h->shrink_threshold = -1; /* never shrink below inital size */ } @@ -138,29 +120,27 @@ static ERTS_INLINE void set_thresholds(Hash* h) Hash* hash_init(int type, Hash* h, char* name, int size, HashFunctions fun) { int sz; - int ix = 0; + int shift = 1; h->meta_alloc_type = type; - while (h_size_table[ix] != -1 && h_size_table[ix] < size) - ix++; - if (h_size_table[ix] == -1) - return NULL; - - size = h_size_table[ix]; - sz = size*sizeof(HashBucket*); - - h->bucket = (HashBucket**) fun.meta_alloc(h->meta_alloc_type, sz); + while ((UWORD_CONSTANT(1) << shift) < size) + shift++; - memzero(h->bucket, sz); h->is_allocated = 0; h->name = name; h->fun = fun; - h->size = size; - h->size_ix = ix; - h->min_size_ix = ix; + h->shift = MAX_SHIFT - shift; + h->max_shift = h->shift; h->nobjs = 0; set_thresholds(h); + + sz = hash_get_slots(h) * sizeof(HashBucket*); + h->bucket = (HashBucket**) fun.meta_alloc(h->meta_alloc_type, sz); + memzero(h->bucket, sz); + + ASSERT(h->shift > 0 && h->shift < 64); + return h; } @@ -183,7 +163,7 @@ Hash* hash_new(int type, char* name, int size, HashFunctions fun) */ void hash_delete(Hash* h) { - int old_size = h->size; + int old_size = hash_get_slots(h); int i; for (i = 0; i < old_size; i++) { @@ -206,22 +186,20 @@ void hash_delete(Hash* h) static void rehash(Hash* h, int grow) { int sz; - int old_size = h->size; + int old_size = hash_get_slots(h); HashBucket** new_bucket; int i; if (grow) { - if ((h_size_table[h->size_ix+1]) == -1) - return; - h->size_ix++; + h->shift--; } else { - if (h->size_ix == 0) + if (h->shift == h->max_shift) return; - h->size_ix--; + h->shift++; } - h->size = h_size_table[h->size_ix]; - sz = h->size*sizeof(HashBucket*); + + sz = hash_get_slots(h)*sizeof(HashBucket*); new_bucket = (HashBucket **) h->fun.meta_alloc(h->meta_alloc_type, sz); memzero(new_bucket, sz); @@ -230,7 +208,7 @@ static void rehash(Hash* h, int grow) HashBucket* b = h->bucket[i]; while (b != (HashBucket*) 0) { HashBucket* b_next = b->next; - int ix = b->hvalue % h->size; + Uint ix = hash_get_slot(h, b->hvalue); b->next = new_bucket[ix]; new_bucket[ix] = b; b = b_next; @@ -248,7 +226,7 @@ static void rehash(Hash* h, int grow) void* hash_get(Hash* h, void* tmpl) { HashValue hval = h->fun.hash(tmpl); - int ix = hval % h->size; + int ix = hash_get_slot(h, hval); HashBucket* b = h->bucket[ix]; while(b != (HashBucket*) 0) { @@ -265,7 +243,7 @@ void* hash_get(Hash* h, void* tmpl) void* hash_put(Hash* h, void* tmpl) { HashValue hval = h->fun.hash(tmpl); - int ix = hval % h->size; + Uint ix = hash_get_slot(h, hval); HashBucket* b = h->bucket[ix]; while(b != (HashBucket*) 0) { @@ -291,7 +269,7 @@ void* hash_put(Hash* h, void* tmpl) void* hash_erase(Hash* h, void* tmpl) { HashValue hval = h->fun.hash(tmpl); - int ix = hval % h->size; + Uint ix = hash_get_slot(h, hval); HashBucket* b = h->bucket[ix]; HashBucket* prev = 0; @@ -323,7 +301,7 @@ void * hash_remove(Hash *h, void *tmpl) { HashValue hval = h->fun.hash(tmpl); - int ix = hval % h->size; + Uint ix = hash_get_slot(h, hval); HashBucket *b = h->bucket[ix]; HashBucket *prev = NULL; @@ -347,7 +325,7 @@ void hash_foreach(Hash* h, HFOREACH_FUN func, void *func_arg2) { int i; - for (i = 0; i < h->size; i++) { + for (i = 0; i < hash_get_slots(h); i++) { HashBucket* b = h->bucket[i]; while(b != (HashBucket*) 0) { (*func)((void *) b, func_arg2); diff --git a/erts/emulator/beam/hash.h b/erts/emulator/beam/hash.h index 38b401548a..cbd75f3025 100644 --- a/erts/emulator/beam/hash.h +++ b/erts/emulator/beam/hash.h @@ -18,16 +18,16 @@ * %CopyrightEnd% */ -/* -** General hash functions -** -*/ +/** + * General hash functions + * + **/ #ifndef __HASH_H__ #define __HASH_H__ #include "sys.h" -typedef unsigned long HashValue; +typedef UWord HashValue; typedef struct hash Hash; typedef int (*HCMP_FUN)(void*, void*); @@ -76,11 +76,10 @@ struct hash int is_allocated; /* 0 iff hash structure is on stack or is static */ int meta_alloc_type; /* argument to pass to meta_alloc and meta_free */ char* name; /* Table name (static string, for debugging) */ - int size; /* Number of slots */ + int shift; /* How much to shift the hash value */ + int max_shift; /* Never shift more than this value */ int shrink_threshold; int grow_threshold; - int size_ix; /* Size index in size table */ - int min_size_ix; /* Never shrink table smaller than this */ int nobjs; /* Number of objects in table */ HashBucket** bucket; /* Vector of bucket pointers (objects) */ }; @@ -99,4 +98,35 @@ void* hash_erase(Hash*, void*); void* hash_remove(Hash*, void*); void hash_foreach(Hash*, HFOREACH_FUN, void *); +ERTS_GLB_INLINE Uint hash_get_slot(Hash *h, HashValue hv); + +#if ERTS_GLB_INLINE_INCL_FUNC_DEF + +ERTS_GLB_INLINE Uint +hash_get_slot(Hash *h, HashValue hv) +{ + /* This slot mapping function uses fibonacci hashing in order to + * protect itself against a very bad hash function. This is not + * a hash function, so the user of hash.h should still spend time + * to figure out a good hash function for its data. + * + * See https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/ + * for some thoughts and ideas about fibonacci hashing. + */ + + /* This is not strictly part of the fibonacci hashing algorithm + * but it does help to spread the values of the mapping function better. + */ + hv ^= hv >> h->shift; +#ifdef ARCH_64 + /* 2^64 / 1.61803398875 = 11400714819323198485.... */ + return (UWORD_CONSTANT(11400714819323198485) * hv) >> h->shift; +#else + /* 2^32 / 1.61803398875 = 2654435769.... */ + return (UWORD_CONSTANT(2654435769) * hv) >> h->shift; +#endif +} + +#endif /* ERTS_GLB_INLINE_INCL_FUNC_DEF */ + #endif diff --git a/erts/emulator/beam/register.c b/erts/emulator/beam/register.c index d69632d8c8..edf1e5b0cb 100644 --- a/erts/emulator/beam/register.c +++ b/erts/emulator/beam/register.c @@ -279,7 +279,7 @@ erts_whereis_name_to_id(Process *c_p, Eterm name) erts_proc_lock(c_p, ERTS_PROC_LOCK_MAIN); hval = REG_HASH(name); - ix = hval % process_reg.size; + ix = hash_get_slot(&process_reg, hval); b = process_reg.bucket[ix]; /* @@ -343,7 +343,7 @@ erts_whereis_name(Process *c_p, */ hval = REG_HASH(name); - ix = hval % process_reg.size; + ix = hash_get_slot(&process_reg, hval); b = process_reg.bucket[ix]; /* -- cgit v1.2.3