diff options
-rw-r--r-- | erts/emulator/beam/erl_bif_info.c | 20 | ||||
-rw-r--r-- | erts/emulator/beam/erl_map.c | 27 | ||||
-rw-r--r-- | erts/emulator/test/map_SUITE.erl | 71 | ||||
-rw-r--r-- | erts/emulator/test/nif_SUITE_data/nif_SUITE.c | 6 |
4 files changed, 111 insertions, 13 deletions
diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c index d750e34be3..0774ab51af 100644 --- a/erts/emulator/beam/erl_bif_info.c +++ b/erts/emulator/beam/erl_bif_info.c @@ -3597,6 +3597,26 @@ BIF_RETTYPE erts_debug_get_internal_state_1(BIF_ALIST_1) BIF_RET(erts_debug_reader_groups_map(BIF_P, (int) groups)); } + else if (ERTS_IS_ATOM_STR("internal_hash", tp[1])) { + Uint hash = (Uint) make_internal_hash(tp[2]); + Uint hsz = 0; + Eterm* hp; + erts_bld_uint(NULL, &hsz, hash); + hp = HAlloc(BIF_P,hsz); + return erts_bld_uint(&hp, NULL, hash); + } + else if (ERTS_IS_ATOM_STR("atom", tp[1])) { + Uint ix; + if (!term_to_Uint(tp[2], &ix)) + BIF_ERROR(BIF_P, BADARG); + while (ix >= atom_table_size()) { + char tmp[20]; + erts_snprintf(tmp, sizeof(tmp), "am%x", atom_table_size()); + erts_atom_put((byte *) tmp, strlen(tmp), ERTS_ATOM_ENC_LATIN1, 1); + } + return make_atom(ix); + } + break; } default: diff --git a/erts/emulator/beam/erl_map.c b/erts/emulator/beam/erl_map.c index bd6da0a6f1..bcbda65da0 100644 --- a/erts/emulator/beam/erl_map.c +++ b/erts/emulator/beam/erl_map.c @@ -2549,19 +2549,24 @@ static int int_sqrt_ceiling(Uint x) } } -Uint hashmap_over_estimated_heap_size(Uint n) +Uint hashmap_over_estimated_heap_size(Uint k) { - /* n is nr of key-value pairs. - Average nr of nodes is about n/3. - Standard deviation is about sqrt(n)/3. - Assuming normal probability distribution, - we overestimate nr of nodes by 14 std.devs, which gives a probability - for overrun of 1.0e-49 (same magnitude as a git SHA1 collision). + /* k is nr of key-value pairs. + N(k) is expected nr of nodes in hamt. + + Observation: + For uniformly distributed hash values, average of N varies between + 0.3*k and 0.4*k (with a beautiful sine curve) + and standard deviation of N is about sqrt(k)/3. + + Assuming normal probability distribution, we overestimate nr of nodes + by 15 std.devs above the average, which gives a probability for overrun + less than 1.0e-49 (same magnitude as a git SHA1 collision). */ - Uint nodes = (n + int_sqrt_ceiling(n)*14) / 3; - return (n*2 + /* leaf cons cells */ - n + /* leaf list terms */ - nodes*2); /* headers + parent refs */ + Uint max_nodes = 2*k/5 + (15/3)*int_sqrt_ceiling(k); + return (k*2 + /* leaf cons cells */ + k + /* leaf list terms */ + max_nodes*2); /* headers + parent boxed terms */ } diff --git a/erts/emulator/test/map_SUITE.erl b/erts/emulator/test/map_SUITE.erl index 91a5706320..1da08beb8b 100644 --- a/erts/emulator/test/map_SUITE.erl +++ b/erts/emulator/test/map_SUITE.erl @@ -62,6 +62,7 @@ t_maps_without/1, %% misc + t_hashmap_balance/1, t_pdict/1, t_ets/1, t_dets/1, @@ -111,6 +112,7 @@ all() -> [ %% Other functions + t_hashmap_balance, t_pdict, t_ets, t_tracing @@ -1421,6 +1423,75 @@ t_maps_without(_Config) -> %% MISC + +%% Verify that the the number of nodes in hashmaps +%% of different types and sizes does not deviate too +%% much from the theoretical model. +t_hashmap_balance(_Config) -> + io:format("Integer keys\n", []), + hashmap_balance(fun(I) -> I end), + io:format("Float keys\n", []), + hashmap_balance(fun(I) -> float(I) end), + io:format("String keys\n", []), + hashmap_balance(fun(I) -> integer_to_list(I) end), + io:format("Binary (big) keys\n", []), + hashmap_balance(fun(I) -> <<I:16/big>> end), + io:format("Binary (little) keys\n", []), + hashmap_balance(fun(I) -> <<I:16/little>> end), + io:format("Atom keys\n", []), + erts_debug:set_internal_state(available_internal_state, true), + hashmap_balance(fun(I) -> erts_debug:get_internal_state({atom,I}) end), + erts_debug:set_internal_state(available_internal_state, false), + + ok. + +hashmap_balance(KeyFun) -> + F = fun(I, {M0,Max0}) -> + Key = KeyFun(I), + M1 = M0#{Key => Key}, + Max1 = case erts_internal:map_type(M1) of + hashmap -> + Nodes = hashmap_nodes(M1), + Avg = maps:size(M1) * 0.4, + StdDev = math:sqrt(maps:size(M1)) / 3, + SD_diff = abs(Nodes - Avg) / StdDev, + %%io:format("~p keys: ~p nodes avg=~p SD_diff=~p\n", + %% [maps:size(M1), Nodes, Avg, SD_diff]), + {MaxDiff0, _} = Max0, + case {Nodes > Avg, SD_diff > MaxDiff0} of + {true, true} -> {SD_diff, M1}; + _ -> Max0 + end; + + flatmap -> Max0 + end, + {M1, Max1} + end, + + {_,{MaxDiff,MaxMap}} = lists:foldl(F, + {#{}, {0, 0}}, + lists:seq(1,10000)), + io:format("Max std dev diff ~p for map of size ~p (nodes=~p, flatsize=~p)\n", + [MaxDiff, maps:size(MaxMap), hashmap_nodes(MaxMap), erts_debug:flat_size(MaxMap)]), + + true = (MaxDiff < 6), % The probability of this line failing is about 0.000000001 + % for a uniform hash. I've set the probability this "high" for now + % to detect flaws in our make_internal_hash. + % Hard limit is 15 (see hashmap_over_estimated_heap_size). + ok. + +hashmap_nodes(M) -> + Info = erts_debug:map_info(M), + lists:foldl(fun(Tpl,Acc) -> + case element(1,Tpl) of + bitmaps -> Acc + element(2,Tpl); + arrays -> Acc + element(2,Tpl); + _ -> Acc + end + end, + 0, + Info). + t_pdict(_Config) -> put(#{ a => b, b => a},#{ c => d}), diff --git a/erts/emulator/test/nif_SUITE_data/nif_SUITE.c b/erts/emulator/test/nif_SUITE_data/nif_SUITE.c index 7b49f23d0a..5a3be84825 100644 --- a/erts/emulator/test/nif_SUITE_data/nif_SUITE.c +++ b/erts/emulator/test/nif_SUITE_data/nif_SUITE.c @@ -1717,8 +1717,9 @@ static ERL_NIF_TERM sorted_list_from_maps_nif(ErlNifEnv* env, int argc, const ER return enif_make_int(env, __LINE__); cnt = 0; + next_ret = 1; while(enif_map_iterator_get_pair(env,&iter_f,&key,&value)) { - if (cnt && !next_ret) + if (!next_ret) return enif_make_int(env, __LINE__); list_f = enif_make_list_cell(env, enif_make_tuple2(env, key, value), list_f); next_ret = enif_map_iterator_next(env,&iter_f); @@ -1731,8 +1732,9 @@ static ERL_NIF_TERM sorted_list_from_maps_nif(ErlNifEnv* env, int argc, const ER return enif_make_int(env, __LINE__); cnt = 0; + prev_ret = 1; while(enif_map_iterator_get_pair(env,&iter_b,&key,&value)) { - if (cnt && !prev_ret) + if (!prev_ret) return enif_make_int(env, __LINE__); /* Test that iter_f can step "backwards" */ |