aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLukas Larsson <[email protected]>2019-08-09 09:04:46 +0200
committerLukas Larsson <[email protected]>2019-08-09 15:21:42 +0200
commit5ad0c86aa11af73950804cadc284b2242684a231 (patch)
tree6689ebf6d24cc3aad8b30e2fea65a81c79ff6fba
parent0cf06f07be27de4219743df3098c8cf66b3848c2 (diff)
downloadotp-5ad0c86aa11af73950804cadc284b2242684a231.tar.gz
otp-5ad0c86aa11af73950804cadc284b2242684a231.tar.bz2
otp-5ad0c86aa11af73950804cadc284b2242684a231.zip
erts: Change the default hash implementation to use fibonacci hash
Fibonacci hashing is a variant of multiplicative hashing that uses a constant based on the golden ratio.
-rw-r--r--erts/emulator/beam/export.c2
-rw-r--r--erts/emulator/beam/hash.c96
-rw-r--r--erts/emulator/beam/hash.h46
-rw-r--r--erts/emulator/beam/register.c4
4 files changed, 78 insertions, 70 deletions
diff --git a/erts/emulator/beam/export.c b/erts/emulator/beam/export.c
index 946ffeffb8..c17fb436e2 100644
--- a/erts/emulator/beam/export.c
+++ b/erts/emulator/beam/export.c
@@ -218,7 +218,7 @@ erts_find_export_entry(Eterm m, Eterm f, unsigned int a, ErtsCodeIndex code_ix)
int ix;
HashBucket* b;
- ix = hval % export_tables[code_ix].htable.size;
+ ix = hash_get_slot(&export_tables[code_ix].htable, hval);
b = export_tables[code_ix].htable.bucket[ix];
/*
diff --git a/erts/emulator/beam/hash.c b/erts/emulator/beam/hash.c
index cc816ac163..cdab53b3f2 100644
--- a/erts/emulator/beam/hash.c
+++ b/erts/emulator/beam/hash.c
@@ -30,37 +30,19 @@
#include "hash.h"
/*
-** List of sizes (all are primes)
-*/
-static const int h_size_table[] = {
- 2, 5, 11, 23, 47, 97, 197, 397, 797, /* double upto here */
- 1201, 1597,
- 2411, 3203,
- 4813, 6421,
- 9643, 12853,
- 19289, 25717,
- 51437,
- 102877,
- 205759,
- 411527,
- 823117,
- 1646237,
- 3292489,
- 6584983,
- 13169977,
- 26339969,
- 52679969,
- -1
-};
-
-/*
** Get info about hash
**
*/
+#define MAX_SHIFT (ERTS_SIZEOF_TERM * 8)
+
+static int hash_get_slots(Hash *h) {
+ return UWORD_CONSTANT(1) << (MAX_SHIFT - h->shift);
+}
+
void hash_get_info(HashInfo *hi, Hash *h)
{
- int size = h->size;
+ int size = hash_get_slots(h);
int i;
int max_depth = 0;
int objects = 0;
@@ -84,7 +66,7 @@ void hash_get_info(HashInfo *hi, Hash *h)
ASSERT(objects == h->nobjs);
hi->name = h->name;
- hi->size = h->size;
+ hi->size = hash_get_slots(h);
hi->used = used;
hi->objs = h->nobjs;
hi->depth = max_depth;
@@ -118,15 +100,15 @@ hash_table_sz(Hash *h)
int i;
for(i=0;h->name[i];i++);
i++;
- return sizeof(Hash) + h->size*sizeof(HashBucket*) + i;
+ return sizeof(Hash) + hash_get_slots(h)*sizeof(HashBucket*) + i;
}
static ERTS_INLINE void set_thresholds(Hash* h)
{
- h->grow_threshold = (8*h->size)/5; /* grow at 160% load */
- if (h->size_ix > h->min_size_ix)
- h->shrink_threshold = h->size / 5; /* shrink at 20% load */
+ h->grow_threshold = (8*hash_get_slots(h))/5; /* grow at 160% load */
+ if (h->shift < h->max_shift)
+ h->shrink_threshold = hash_get_slots(h) / 5; /* shrink at 20% load */
else
h->shrink_threshold = -1; /* never shrink below inital size */
}
@@ -138,29 +120,27 @@ static ERTS_INLINE void set_thresholds(Hash* h)
Hash* hash_init(int type, Hash* h, char* name, int size, HashFunctions fun)
{
int sz;
- int ix = 0;
+ int shift = 1;
h->meta_alloc_type = type;
- while (h_size_table[ix] != -1 && h_size_table[ix] < size)
- ix++;
- if (h_size_table[ix] == -1)
- return NULL;
-
- size = h_size_table[ix];
- sz = size*sizeof(HashBucket*);
-
- h->bucket = (HashBucket**) fun.meta_alloc(h->meta_alloc_type, sz);
+ while ((UWORD_CONSTANT(1) << shift) < size)
+ shift++;
- memzero(h->bucket, sz);
h->is_allocated = 0;
h->name = name;
h->fun = fun;
- h->size = size;
- h->size_ix = ix;
- h->min_size_ix = ix;
+ h->shift = MAX_SHIFT - shift;
+ h->max_shift = h->shift;
h->nobjs = 0;
set_thresholds(h);
+
+ sz = hash_get_slots(h) * sizeof(HashBucket*);
+ h->bucket = (HashBucket**) fun.meta_alloc(h->meta_alloc_type, sz);
+ memzero(h->bucket, sz);
+
+ ASSERT(h->shift > 0 && h->shift < 64);
+
return h;
}
@@ -183,7 +163,7 @@ Hash* hash_new(int type, char* name, int size, HashFunctions fun)
*/
void hash_delete(Hash* h)
{
- int old_size = h->size;
+ int old_size = hash_get_slots(h);
int i;
for (i = 0; i < old_size; i++) {
@@ -206,22 +186,20 @@ void hash_delete(Hash* h)
static void rehash(Hash* h, int grow)
{
int sz;
- int old_size = h->size;
+ int old_size = hash_get_slots(h);
HashBucket** new_bucket;
int i;
if (grow) {
- if ((h_size_table[h->size_ix+1]) == -1)
- return;
- h->size_ix++;
+ h->shift--;
}
else {
- if (h->size_ix == 0)
+ if (h->shift == h->max_shift)
return;
- h->size_ix--;
+ h->shift++;
}
- h->size = h_size_table[h->size_ix];
- sz = h->size*sizeof(HashBucket*);
+
+ sz = hash_get_slots(h)*sizeof(HashBucket*);
new_bucket = (HashBucket **) h->fun.meta_alloc(h->meta_alloc_type, sz);
memzero(new_bucket, sz);
@@ -230,7 +208,7 @@ static void rehash(Hash* h, int grow)
HashBucket* b = h->bucket[i];
while (b != (HashBucket*) 0) {
HashBucket* b_next = b->next;
- int ix = b->hvalue % h->size;
+ Uint ix = hash_get_slot(h, b->hvalue);
b->next = new_bucket[ix];
new_bucket[ix] = b;
b = b_next;
@@ -248,7 +226,7 @@ static void rehash(Hash* h, int grow)
void* hash_get(Hash* h, void* tmpl)
{
HashValue hval = h->fun.hash(tmpl);
- int ix = hval % h->size;
+ int ix = hash_get_slot(h, hval);
HashBucket* b = h->bucket[ix];
while(b != (HashBucket*) 0) {
@@ -265,7 +243,7 @@ void* hash_get(Hash* h, void* tmpl)
void* hash_put(Hash* h, void* tmpl)
{
HashValue hval = h->fun.hash(tmpl);
- int ix = hval % h->size;
+ Uint ix = hash_get_slot(h, hval);
HashBucket* b = h->bucket[ix];
while(b != (HashBucket*) 0) {
@@ -291,7 +269,7 @@ void* hash_put(Hash* h, void* tmpl)
void* hash_erase(Hash* h, void* tmpl)
{
HashValue hval = h->fun.hash(tmpl);
- int ix = hval % h->size;
+ Uint ix = hash_get_slot(h, hval);
HashBucket* b = h->bucket[ix];
HashBucket* prev = 0;
@@ -323,7 +301,7 @@ void *
hash_remove(Hash *h, void *tmpl)
{
HashValue hval = h->fun.hash(tmpl);
- int ix = hval % h->size;
+ Uint ix = hash_get_slot(h, hval);
HashBucket *b = h->bucket[ix];
HashBucket *prev = NULL;
@@ -347,7 +325,7 @@ void hash_foreach(Hash* h, HFOREACH_FUN func, void *func_arg2)
{
int i;
- for (i = 0; i < h->size; i++) {
+ for (i = 0; i < hash_get_slots(h); i++) {
HashBucket* b = h->bucket[i];
while(b != (HashBucket*) 0) {
(*func)((void *) b, func_arg2);
diff --git a/erts/emulator/beam/hash.h b/erts/emulator/beam/hash.h
index 38b401548a..cbd75f3025 100644
--- a/erts/emulator/beam/hash.h
+++ b/erts/emulator/beam/hash.h
@@ -18,16 +18,16 @@
* %CopyrightEnd%
*/
-/*
-** General hash functions
-**
-*/
+/**
+ * General hash functions
+ *
+ **/
#ifndef __HASH_H__
#define __HASH_H__
#include "sys.h"
-typedef unsigned long HashValue;
+typedef UWord HashValue;
typedef struct hash Hash;
typedef int (*HCMP_FUN)(void*, void*);
@@ -76,11 +76,10 @@ struct hash
int is_allocated; /* 0 iff hash structure is on stack or is static */
int meta_alloc_type; /* argument to pass to meta_alloc and meta_free */
char* name; /* Table name (static string, for debugging) */
- int size; /* Number of slots */
+ int shift; /* How much to shift the hash value */
+ int max_shift; /* Never shift more than this value */
int shrink_threshold;
int grow_threshold;
- int size_ix; /* Size index in size table */
- int min_size_ix; /* Never shrink table smaller than this */
int nobjs; /* Number of objects in table */
HashBucket** bucket; /* Vector of bucket pointers (objects) */
};
@@ -99,4 +98,35 @@ void* hash_erase(Hash*, void*);
void* hash_remove(Hash*, void*);
void hash_foreach(Hash*, HFOREACH_FUN, void *);
+ERTS_GLB_INLINE Uint hash_get_slot(Hash *h, HashValue hv);
+
+#if ERTS_GLB_INLINE_INCL_FUNC_DEF
+
+ERTS_GLB_INLINE Uint
+hash_get_slot(Hash *h, HashValue hv)
+{
+ /* This slot mapping function uses fibonacci hashing in order to
+ * protect itself against a very bad hash function. This is not
+ * a hash function, so the user of hash.h should still spend time
+ * to figure out a good hash function for its data.
+ *
+ * See https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
+ * for some thoughts and ideas about fibonacci hashing.
+ */
+
+ /* This is not strictly part of the fibonacci hashing algorithm
+ * but it does help to spread the values of the mapping function better.
+ */
+ hv ^= hv >> h->shift;
+#ifdef ARCH_64
+ /* 2^64 / 1.61803398875 = 11400714819323198485.... */
+ return (UWORD_CONSTANT(11400714819323198485) * hv) >> h->shift;
+#else
+ /* 2^32 / 1.61803398875 = 2654435769.... */
+ return (UWORD_CONSTANT(2654435769) * hv) >> h->shift;
+#endif
+}
+
+#endif /* ERTS_GLB_INLINE_INCL_FUNC_DEF */
+
#endif
diff --git a/erts/emulator/beam/register.c b/erts/emulator/beam/register.c
index d69632d8c8..edf1e5b0cb 100644
--- a/erts/emulator/beam/register.c
+++ b/erts/emulator/beam/register.c
@@ -279,7 +279,7 @@ erts_whereis_name_to_id(Process *c_p, Eterm name)
erts_proc_lock(c_p, ERTS_PROC_LOCK_MAIN);
hval = REG_HASH(name);
- ix = hval % process_reg.size;
+ ix = hash_get_slot(&process_reg, hval);
b = process_reg.bucket[ix];
/*
@@ -343,7 +343,7 @@ erts_whereis_name(Process *c_p,
*/
hval = REG_HASH(name);
- ix = hval % process_reg.size;
+ ix = hash_get_slot(&process_reg, hval);
b = process_reg.bucket[ix];
/*