aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSverker Eriksson <[email protected]>2015-03-10 18:07:11 +0100
committerBjörn-Egil Dahlberg <[email protected]>2015-03-12 19:15:33 +0100
commit7478569d0a7d619d600816f3a75e56922d8ed428 (patch)
tree7c2a3783fd87c3b720464f748cf57a25d07ade6d
parent01e843722aa39b3411d349c6fbea80ad87a6e9ef (diff)
downloadotp-7478569d0a7d619d600816f3a75e56922d8ed428.tar.gz
otp-7478569d0a7d619d600816f3a75e56922d8ed428.tar.bz2
otp-7478569d0a7d619d600816f3a75e56922d8ed428.zip
erts: Tweak over estimation of hashmap size for binary_to_term
Strategy: Calculate an over estimation of heap size that will give such a low probability for overflow, that "it will not happen". Scary assumption 1: Uniformly distributed hash values. Scary assumption 2: Tree size is normally distributed (right?)
-rw-r--r--erts/emulator/beam/erl_map.c37
-rw-r--r--erts/emulator/beam/erl_map.h1
-rw-r--r--erts/emulator/beam/erl_utils.h1
-rw-r--r--erts/emulator/beam/external.c60
-rw-r--r--erts/emulator/beam/utils.c11
5 files changed, 81 insertions, 29 deletions
diff --git a/erts/emulator/beam/erl_map.c b/erts/emulator/beam/erl_map.c
index 3ccfc61b08..5eafa69b06 100644
--- a/erts/emulator/beam/erl_map.c
+++ b/erts/emulator/beam/erl_map.c
@@ -2437,6 +2437,43 @@ int erts_validate_and_sort_map(map_t* mp)
return 1;
}
+/* Really rough estimate of sqrt(x)
+ * Guaranteed not to be less than sqrt(x)
+ */
+static int int_sqrt_ceiling(Uint x)
+{
+ int n;
+
+ if (x <= 2)
+ return x;
+
+ n = erts_fit_in_bits_uint(x-1);
+ if (n & 1) {
+ /* Calc: sqrt(2^n) = 2^(n/2) * sqrt(2) ~= 2^(n/2) * 3 / 2 */
+ return (1 << (n/2 - 1)) * 3;
+ }
+ else {
+ /* Calc: sqrt(2^n) = 2^(n/2) */
+ return 1 << (n / 2);
+ }
+}
+
+Uint hashmap_over_estimated_heap_size(Uint n)
+{
+ /* n is nr of key-value pairs.
+ Average nr of nodes is about n/3.
+ Standard deviation is about sqrt(n)/3.
+ Assuming normal probability distribution,
+ we overestimate nr of nodes by 14 std.devs, which gives a probability
+ for overrun of 1.0e-49 (same magnitude as a git SHA1 collision).
+ */
+ Uint nodes = (n + int_sqrt_ceiling(n)*14) / 3;
+ return (n*2 + /* leaf cons cells */
+ n + /* leaf list terms */
+ nodes*2); /* headers + parent refs */
+}
+
+
BIF_RETTYPE erts_debug_map_info_1(BIF_ALIST_1) {
if (is_hashmap(BIF_ARG_1)) {
BIF_RET(hashmap_info(BIF_P,BIF_ARG_1));
diff --git a/erts/emulator/beam/erl_map.h b/erts/emulator/beam/erl_map.h
index 39d98b9e03..659295184d 100644
--- a/erts/emulator/beam/erl_map.h
+++ b/erts/emulator/beam/erl_map.h
@@ -99,6 +99,7 @@ Eterm erts_hashmap_insert_up(Eterm *hp, Eterm key, Eterm value,
Uint *upsz, struct ErtsEStack_ *sp);
int erts_validate_and_sort_map(map_t* map);
+Uint hashmap_over_estimated_heap_size(Uint n);
void hashmap_iterator_init(struct ErtsWStack_* s, Eterm node);
Eterm* hashmap_iterator_next(struct ErtsWStack_* s);
int hashmap_key_hash_cmp(Eterm* ap, Eterm* bp);
diff --git a/erts/emulator/beam/erl_utils.h b/erts/emulator/beam/erl_utils.h
index c772a750f1..7cb8972e29 100644
--- a/erts/emulator/beam/erl_utils.h
+++ b/erts/emulator/beam/erl_utils.h
@@ -113,6 +113,7 @@ void erts_silence_warn_unused_result(long unused);
int erts_fit_in_bits_int64(Sint64);
int erts_fit_in_bits_int32(Sint32);
+int erts_fit_in_bits_uint(Uint);
int erts_list_length(Eterm);
int erts_is_builtin(Eterm, Eterm, int);
Uint32 make_broken_hash(Eterm);
diff --git a/erts/emulator/beam/external.c b/erts/emulator/beam/external.c
index 9bcd9a7a50..bd9ad65086 100644
--- a/erts/emulator/beam/external.c
+++ b/erts/emulator/beam/external.c
@@ -2945,13 +2945,11 @@ struct dec_term_hamt_placeholder
struct dec_term_hamt_placeholder* next;
Eterm* objp; /* write result here */
Uint size; /* nr of leafs */
- Eterm* leaf_array;
-
- Eterm _heap_capacity_[1];
+ Eterm leafs[1];
};
-#define DEC_TERM_HAMT_PLACEHOLDER_SIZE(SZ) \
- (offsetof(struct dec_term_hamt_placeholder, _heap_capacity_) + (SZ))
+#define DEC_TERM_HAMT_PLACEHOLDER_SIZE \
+ (offsetof(struct dec_term_hamt_placeholder, leafs) / sizeof(Eterm))
/* Decode term from external format into *objp.
** On failure return NULL and *hpp will be unchanged.
@@ -3646,8 +3644,7 @@ dec_term_atom_common:
holder->objp = objp;
holder->size = size;
- hp += DEC_TERM_HAMT_PLACEHOLDER_SIZE(size);
- holder->leaf_array = hp;
+ hp += DEC_TERM_HAMT_PLACEHOLDER_SIZE;
for (n = size; n; n--) {
CDR(hp) = (Eterm) COMPRESS_POINTER(next);
@@ -3899,30 +3896,33 @@ dec_term_atom_common:
maps_list = next;
}
- while (hamt_list) {
- ErtsHeapFactory factory;
- int residue;
-
- factory.p = NULL;
- factory.hp = (Eterm*) hamt_list;
- factory.hp_end = hamt_list->leaf_array;
+ /* Iterate through all the hamts and build tree nodes.
+ */
+ if (hamt_list) {
+ struct dec_term_hamt_placeholder* hamt = hamt_list;
+ ErtsHeapFactory factory;
+
+ factory.p = NULL;
+ factory.hp = hp;
+ /* We assume heap will suffice (see hashmap_over_estimated_heap_size) */
+ factory.hp_end = hp + (ERTS_SWORD_MAX / sizeof(Eterm));
+
+ do {
+ *hamt->objp = erts_hashmap_from_array(&factory,
+ hamt->leafs,
+ hamt->size,
+ 1);
+ if (is_non_value(*hamt->objp))
+ goto error;
- next = (Eterm *) hamt_list->next;
- objp = hamt_list->objp;
+ hamt_list = hamt->next;
- *objp = erts_hashmap_from_array(&factory,
- hamt_list->leaf_array,
- hamt_list->size,
- 1);
- if (is_non_value(*objp))
- goto error;
+ /* Yes, we waste a couple of heap words per hamt
+ for the temporary placeholder */
+ *(Eterm*)hamt = make_pos_bignum_header(DEC_TERM_HAMT_PLACEHOLDER_SIZE-1);
+ } while (hamt_list);
- residue = factory.hp_end - factory.hp;
- if (residue) {
- ASSERT(residue > 0);
- *factory.hp = make_pos_bignum_header(residue-1);
- }
- hamt_list = (struct dec_term_hamt_placeholder*) next;
+ hp = factory.hp;
}
if (ctx) {
@@ -4292,6 +4292,8 @@ encode_size_struct_int(TTBSizeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj,
return 0;
}
+
+
static Sint
decoded_size(byte *ep, byte* endp, int internal_tags, B2TContext* ctx)
{
@@ -4488,7 +4490,7 @@ init_done:
if (n <= MAP_SMALL_MAP_LIMIT) {
heap_size += 3 + n + 1 + n;
} else {
- heap_size += DEC_TERM_HAMT_PLACEHOLDER_SIZE(n) + 2*n;
+ heap_size += hashmap_over_estimated_heap_size(n);
}
break;
case STRING_EXT:
diff --git a/erts/emulator/beam/utils.c b/erts/emulator/beam/utils.c
index 674dddc86f..66f13461a1 100644
--- a/erts/emulator/beam/utils.c
+++ b/erts/emulator/beam/utils.c
@@ -352,6 +352,17 @@ int erts_fit_in_bits_int32(Sint32 value)
return fit_in_bits((Sint64) (Uint32) value, 4);
}
+int erts_fit_in_bits_uint(Uint value)
+{
+#if ERTS_SIZEOF_ETERM == 4
+ return fit_in_bits((Sint64) (Uint32) value, 4);
+#elif ERTS_SIZEOF_ETERM == 8
+ return fit_in_bits(value, 5);
+#else
+# error "No way, Jose"
+#endif
+}
+
int
erts_print(int to, void *arg, char *format, ...)
{