From 7478569d0a7d619d600816f3a75e56922d8ed428 Mon Sep 17 00:00:00 2001 From: Sverker Eriksson Date: Tue, 10 Mar 2015 18:07:11 +0100 Subject: erts: Tweak over estimation of hashmap size for binary_to_term Strategy: Calculate an over estimation of heap size that will give such a low probability for overflow, that "it will not happen". Scary assumption 1: Uniformly distributed hash values. Scary assumption 2: Tree size is normally distributed (right?) --- erts/emulator/beam/erl_map.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'erts/emulator/beam/erl_map.c') diff --git a/erts/emulator/beam/erl_map.c b/erts/emulator/beam/erl_map.c index 3ccfc61b08..5eafa69b06 100644 --- a/erts/emulator/beam/erl_map.c +++ b/erts/emulator/beam/erl_map.c @@ -2437,6 +2437,43 @@ int erts_validate_and_sort_map(map_t* mp) return 1; } +/* Really rough estimate of sqrt(x) + * Guaranteed not to be less than sqrt(x) + */ +static int int_sqrt_ceiling(Uint x) +{ + int n; + + if (x <= 2) + return x; + + n = erts_fit_in_bits_uint(x-1); + if (n & 1) { + /* Calc: sqrt(2^n) = 2^(n/2) * sqrt(2) ~= 2^(n/2) * 3 / 2 */ + return (1 << (n/2 - 1)) * 3; + } + else { + /* Calc: sqrt(2^n) = 2^(n/2) */ + return 1 << (n / 2); + } +} + +Uint hashmap_over_estimated_heap_size(Uint n) +{ + /* n is nr of key-value pairs. + Average nr of nodes is about n/3. + Standard deviation is about sqrt(n)/3. + Assuming normal probability distribution, + we overestimate nr of nodes by 14 std.devs, which gives a probability + for overrun of 1.0e-49 (same magnitude as a git SHA1 collision). + */ + Uint nodes = (n + int_sqrt_ceiling(n)*14) / 3; + return (n*2 + /* leaf cons cells */ + n + /* leaf list terms */ + nodes*2); /* headers + parent refs */ +} + + BIF_RETTYPE erts_debug_map_info_1(BIF_ALIST_1) { if (is_hashmap(BIF_ARG_1)) { BIF_RET(hashmap_info(BIF_P,BIF_ARG_1)); -- cgit v1.2.3