From 7478569d0a7d619d600816f3a75e56922d8ed428 Mon Sep 17 00:00:00 2001 From: Sverker Eriksson Date: Tue, 10 Mar 2015 18:07:11 +0100 Subject: erts: Tweak over estimation of hashmap size for binary_to_term Strategy: Calculate an over estimation of heap size that will give such a low probability for overflow, that "it will not happen". Scary assumption 1: Uniformly distributed hash values. Scary assumption 2: Tree size is normally distributed (right?) --- erts/emulator/beam/erl_map.c | 37 ++++++++++++++++++++++++++ erts/emulator/beam/erl_map.h | 1 + erts/emulator/beam/erl_utils.h | 1 + erts/emulator/beam/external.c | 60 ++++++++++++++++++++++-------------------- erts/emulator/beam/utils.c | 11 ++++++++ 5 files changed, 81 insertions(+), 29 deletions(-) diff --git a/erts/emulator/beam/erl_map.c b/erts/emulator/beam/erl_map.c index 3ccfc61b08..5eafa69b06 100644 --- a/erts/emulator/beam/erl_map.c +++ b/erts/emulator/beam/erl_map.c @@ -2437,6 +2437,43 @@ int erts_validate_and_sort_map(map_t* mp) return 1; } +/* Really rough estimate of sqrt(x) + * Guaranteed not to be less than sqrt(x) + */ +static int int_sqrt_ceiling(Uint x) +{ + int n; + + if (x <= 2) + return x; + + n = erts_fit_in_bits_uint(x-1); + if (n & 1) { + /* Calc: sqrt(2^n) = 2^(n/2) * sqrt(2) ~= 2^(n/2) * 3 / 2 */ + return (1 << (n/2 - 1)) * 3; + } + else { + /* Calc: sqrt(2^n) = 2^(n/2) */ + return 1 << (n / 2); + } +} + +Uint hashmap_over_estimated_heap_size(Uint n) +{ + /* n is nr of key-value pairs. + Average nr of nodes is about n/3. + Standard deviation is about sqrt(n)/3. + Assuming normal probability distribution, + we overestimate nr of nodes by 14 std.devs, which gives a probability + for overrun of 1.0e-49 (same magnitude as a git SHA1 collision). + */ + Uint nodes = (n + int_sqrt_ceiling(n)*14) / 3; + return (n*2 + /* leaf cons cells */ + n + /* leaf list terms */ + nodes*2); /* headers + parent refs */ +} + + BIF_RETTYPE erts_debug_map_info_1(BIF_ALIST_1) { if (is_hashmap(BIF_ARG_1)) { BIF_RET(hashmap_info(BIF_P,BIF_ARG_1)); diff --git a/erts/emulator/beam/erl_map.h b/erts/emulator/beam/erl_map.h index 39d98b9e03..659295184d 100644 --- a/erts/emulator/beam/erl_map.h +++ b/erts/emulator/beam/erl_map.h @@ -99,6 +99,7 @@ Eterm erts_hashmap_insert_up(Eterm *hp, Eterm key, Eterm value, Uint *upsz, struct ErtsEStack_ *sp); int erts_validate_and_sort_map(map_t* map); +Uint hashmap_over_estimated_heap_size(Uint n); void hashmap_iterator_init(struct ErtsWStack_* s, Eterm node); Eterm* hashmap_iterator_next(struct ErtsWStack_* s); int hashmap_key_hash_cmp(Eterm* ap, Eterm* bp); diff --git a/erts/emulator/beam/erl_utils.h b/erts/emulator/beam/erl_utils.h index c772a750f1..7cb8972e29 100644 --- a/erts/emulator/beam/erl_utils.h +++ b/erts/emulator/beam/erl_utils.h @@ -113,6 +113,7 @@ void erts_silence_warn_unused_result(long unused); int erts_fit_in_bits_int64(Sint64); int erts_fit_in_bits_int32(Sint32); +int erts_fit_in_bits_uint(Uint); int erts_list_length(Eterm); int erts_is_builtin(Eterm, Eterm, int); Uint32 make_broken_hash(Eterm); diff --git a/erts/emulator/beam/external.c b/erts/emulator/beam/external.c index 9bcd9a7a50..bd9ad65086 100644 --- a/erts/emulator/beam/external.c +++ b/erts/emulator/beam/external.c @@ -2945,13 +2945,11 @@ struct dec_term_hamt_placeholder struct dec_term_hamt_placeholder* next; Eterm* objp; /* write result here */ Uint size; /* nr of leafs */ - Eterm* leaf_array; - - Eterm _heap_capacity_[1]; + Eterm leafs[1]; }; -#define DEC_TERM_HAMT_PLACEHOLDER_SIZE(SZ) \ - (offsetof(struct dec_term_hamt_placeholder, _heap_capacity_) + (SZ)) +#define DEC_TERM_HAMT_PLACEHOLDER_SIZE \ + (offsetof(struct dec_term_hamt_placeholder, leafs) / sizeof(Eterm)) /* Decode term from external format into *objp. ** On failure return NULL and *hpp will be unchanged. @@ -3646,8 +3644,7 @@ dec_term_atom_common: holder->objp = objp; holder->size = size; - hp += DEC_TERM_HAMT_PLACEHOLDER_SIZE(size); - holder->leaf_array = hp; + hp += DEC_TERM_HAMT_PLACEHOLDER_SIZE; for (n = size; n; n--) { CDR(hp) = (Eterm) COMPRESS_POINTER(next); @@ -3899,30 +3896,33 @@ dec_term_atom_common: maps_list = next; } - while (hamt_list) { - ErtsHeapFactory factory; - int residue; - - factory.p = NULL; - factory.hp = (Eterm*) hamt_list; - factory.hp_end = hamt_list->leaf_array; + /* Iterate through all the hamts and build tree nodes. + */ + if (hamt_list) { + struct dec_term_hamt_placeholder* hamt = hamt_list; + ErtsHeapFactory factory; + + factory.p = NULL; + factory.hp = hp; + /* We assume heap will suffice (see hashmap_over_estimated_heap_size) */ + factory.hp_end = hp + (ERTS_SWORD_MAX / sizeof(Eterm)); + + do { + *hamt->objp = erts_hashmap_from_array(&factory, + hamt->leafs, + hamt->size, + 1); + if (is_non_value(*hamt->objp)) + goto error; - next = (Eterm *) hamt_list->next; - objp = hamt_list->objp; + hamt_list = hamt->next; - *objp = erts_hashmap_from_array(&factory, - hamt_list->leaf_array, - hamt_list->size, - 1); - if (is_non_value(*objp)) - goto error; + /* Yes, we waste a couple of heap words per hamt + for the temporary placeholder */ + *(Eterm*)hamt = make_pos_bignum_header(DEC_TERM_HAMT_PLACEHOLDER_SIZE-1); + } while (hamt_list); - residue = factory.hp_end - factory.hp; - if (residue) { - ASSERT(residue > 0); - *factory.hp = make_pos_bignum_header(residue-1); - } - hamt_list = (struct dec_term_hamt_placeholder*) next; + hp = factory.hp; } if (ctx) { @@ -4292,6 +4292,8 @@ encode_size_struct_int(TTBSizeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj, return 0; } + + static Sint decoded_size(byte *ep, byte* endp, int internal_tags, B2TContext* ctx) { @@ -4488,7 +4490,7 @@ init_done: if (n <= MAP_SMALL_MAP_LIMIT) { heap_size += 3 + n + 1 + n; } else { - heap_size += DEC_TERM_HAMT_PLACEHOLDER_SIZE(n) + 2*n; + heap_size += hashmap_over_estimated_heap_size(n); } break; case STRING_EXT: diff --git a/erts/emulator/beam/utils.c b/erts/emulator/beam/utils.c index 674dddc86f..66f13461a1 100644 --- a/erts/emulator/beam/utils.c +++ b/erts/emulator/beam/utils.c @@ -352,6 +352,17 @@ int erts_fit_in_bits_int32(Sint32 value) return fit_in_bits((Sint64) (Uint32) value, 4); } +int erts_fit_in_bits_uint(Uint value) +{ +#if ERTS_SIZEOF_ETERM == 4 + return fit_in_bits((Sint64) (Uint32) value, 4); +#elif ERTS_SIZEOF_ETERM == 8 + return fit_in_bits(value, 5); +#else +# error "No way, Jose" +#endif +} + int erts_print(int to, void *arg, char *format, ...) { -- cgit v1.2.3