aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--erts/emulator/beam/erl_bif_info.c20
-rw-r--r--erts/emulator/beam/erl_map.c27
-rw-r--r--erts/emulator/test/map_SUITE.erl71
-rw-r--r--erts/emulator/test/nif_SUITE_data/nif_SUITE.c6
4 files changed, 111 insertions, 13 deletions
diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c
index d750e34be3..0774ab51af 100644
--- a/erts/emulator/beam/erl_bif_info.c
+++ b/erts/emulator/beam/erl_bif_info.c
@@ -3597,6 +3597,26 @@ BIF_RETTYPE erts_debug_get_internal_state_1(BIF_ALIST_1)
BIF_RET(erts_debug_reader_groups_map(BIF_P, (int) groups));
}
+ else if (ERTS_IS_ATOM_STR("internal_hash", tp[1])) {
+ Uint hash = (Uint) make_internal_hash(tp[2]);
+ Uint hsz = 0;
+ Eterm* hp;
+ erts_bld_uint(NULL, &hsz, hash);
+ hp = HAlloc(BIF_P,hsz);
+ return erts_bld_uint(&hp, NULL, hash);
+ }
+ else if (ERTS_IS_ATOM_STR("atom", tp[1])) {
+ Uint ix;
+ if (!term_to_Uint(tp[2], &ix))
+ BIF_ERROR(BIF_P, BADARG);
+ while (ix >= atom_table_size()) {
+ char tmp[20];
+ erts_snprintf(tmp, sizeof(tmp), "am%x", atom_table_size());
+ erts_atom_put((byte *) tmp, strlen(tmp), ERTS_ATOM_ENC_LATIN1, 1);
+ }
+ return make_atom(ix);
+ }
+
break;
}
default:
diff --git a/erts/emulator/beam/erl_map.c b/erts/emulator/beam/erl_map.c
index bd6da0a6f1..bcbda65da0 100644
--- a/erts/emulator/beam/erl_map.c
+++ b/erts/emulator/beam/erl_map.c
@@ -2549,19 +2549,24 @@ static int int_sqrt_ceiling(Uint x)
}
}
-Uint hashmap_over_estimated_heap_size(Uint n)
+Uint hashmap_over_estimated_heap_size(Uint k)
{
- /* n is nr of key-value pairs.
- Average nr of nodes is about n/3.
- Standard deviation is about sqrt(n)/3.
- Assuming normal probability distribution,
- we overestimate nr of nodes by 14 std.devs, which gives a probability
- for overrun of 1.0e-49 (same magnitude as a git SHA1 collision).
+ /* k is nr of key-value pairs.
+ N(k) is expected nr of nodes in hamt.
+
+ Observation:
+ For uniformly distributed hash values, average of N varies between
+ 0.3*k and 0.4*k (with a beautiful sine curve)
+ and standard deviation of N is about sqrt(k)/3.
+
+ Assuming normal probability distribution, we overestimate nr of nodes
+ by 15 std.devs above the average, which gives a probability for overrun
+ less than 1.0e-49 (same magnitude as a git SHA1 collision).
*/
- Uint nodes = (n + int_sqrt_ceiling(n)*14) / 3;
- return (n*2 + /* leaf cons cells */
- n + /* leaf list terms */
- nodes*2); /* headers + parent refs */
+ Uint max_nodes = 2*k/5 + (15/3)*int_sqrt_ceiling(k);
+ return (k*2 + /* leaf cons cells */
+ k + /* leaf list terms */
+ max_nodes*2); /* headers + parent boxed terms */
}
diff --git a/erts/emulator/test/map_SUITE.erl b/erts/emulator/test/map_SUITE.erl
index 91a5706320..1da08beb8b 100644
--- a/erts/emulator/test/map_SUITE.erl
+++ b/erts/emulator/test/map_SUITE.erl
@@ -62,6 +62,7 @@
t_maps_without/1,
%% misc
+ t_hashmap_balance/1,
t_pdict/1,
t_ets/1,
t_dets/1,
@@ -111,6 +112,7 @@ all() -> [
%% Other functions
+ t_hashmap_balance,
t_pdict,
t_ets,
t_tracing
@@ -1421,6 +1423,75 @@ t_maps_without(_Config) ->
%% MISC
+
+%% Verify that the the number of nodes in hashmaps
+%% of different types and sizes does not deviate too
+%% much from the theoretical model.
+t_hashmap_balance(_Config) ->
+ io:format("Integer keys\n", []),
+ hashmap_balance(fun(I) -> I end),
+ io:format("Float keys\n", []),
+ hashmap_balance(fun(I) -> float(I) end),
+ io:format("String keys\n", []),
+ hashmap_balance(fun(I) -> integer_to_list(I) end),
+ io:format("Binary (big) keys\n", []),
+ hashmap_balance(fun(I) -> <<I:16/big>> end),
+ io:format("Binary (little) keys\n", []),
+ hashmap_balance(fun(I) -> <<I:16/little>> end),
+ io:format("Atom keys\n", []),
+ erts_debug:set_internal_state(available_internal_state, true),
+ hashmap_balance(fun(I) -> erts_debug:get_internal_state({atom,I}) end),
+ erts_debug:set_internal_state(available_internal_state, false),
+
+ ok.
+
+hashmap_balance(KeyFun) ->
+ F = fun(I, {M0,Max0}) ->
+ Key = KeyFun(I),
+ M1 = M0#{Key => Key},
+ Max1 = case erts_internal:map_type(M1) of
+ hashmap ->
+ Nodes = hashmap_nodes(M1),
+ Avg = maps:size(M1) * 0.4,
+ StdDev = math:sqrt(maps:size(M1)) / 3,
+ SD_diff = abs(Nodes - Avg) / StdDev,
+ %%io:format("~p keys: ~p nodes avg=~p SD_diff=~p\n",
+ %% [maps:size(M1), Nodes, Avg, SD_diff]),
+ {MaxDiff0, _} = Max0,
+ case {Nodes > Avg, SD_diff > MaxDiff0} of
+ {true, true} -> {SD_diff, M1};
+ _ -> Max0
+ end;
+
+ flatmap -> Max0
+ end,
+ {M1, Max1}
+ end,
+
+ {_,{MaxDiff,MaxMap}} = lists:foldl(F,
+ {#{}, {0, 0}},
+ lists:seq(1,10000)),
+ io:format("Max std dev diff ~p for map of size ~p (nodes=~p, flatsize=~p)\n",
+ [MaxDiff, maps:size(MaxMap), hashmap_nodes(MaxMap), erts_debug:flat_size(MaxMap)]),
+
+ true = (MaxDiff < 6), % The probability of this line failing is about 0.000000001
+ % for a uniform hash. I've set the probability this "high" for now
+ % to detect flaws in our make_internal_hash.
+ % Hard limit is 15 (see hashmap_over_estimated_heap_size).
+ ok.
+
+hashmap_nodes(M) ->
+ Info = erts_debug:map_info(M),
+ lists:foldl(fun(Tpl,Acc) ->
+ case element(1,Tpl) of
+ bitmaps -> Acc + element(2,Tpl);
+ arrays -> Acc + element(2,Tpl);
+ _ -> Acc
+ end
+ end,
+ 0,
+ Info).
+
t_pdict(_Config) ->
put(#{ a => b, b => a},#{ c => d}),
diff --git a/erts/emulator/test/nif_SUITE_data/nif_SUITE.c b/erts/emulator/test/nif_SUITE_data/nif_SUITE.c
index 7b49f23d0a..5a3be84825 100644
--- a/erts/emulator/test/nif_SUITE_data/nif_SUITE.c
+++ b/erts/emulator/test/nif_SUITE_data/nif_SUITE.c
@@ -1717,8 +1717,9 @@ static ERL_NIF_TERM sorted_list_from_maps_nif(ErlNifEnv* env, int argc, const ER
return enif_make_int(env, __LINE__);
cnt = 0;
+ next_ret = 1;
while(enif_map_iterator_get_pair(env,&iter_f,&key,&value)) {
- if (cnt && !next_ret)
+ if (!next_ret)
return enif_make_int(env, __LINE__);
list_f = enif_make_list_cell(env, enif_make_tuple2(env, key, value), list_f);
next_ret = enif_map_iterator_next(env,&iter_f);
@@ -1731,8 +1732,9 @@ static ERL_NIF_TERM sorted_list_from_maps_nif(ErlNifEnv* env, int argc, const ER
return enif_make_int(env, __LINE__);
cnt = 0;
+ prev_ret = 1;
while(enif_map_iterator_get_pair(env,&iter_b,&key,&value)) {
- if (cnt && !prev_ret)
+ if (!prev_ret)
return enif_make_int(env, __LINE__);
/* Test that iter_f can step "backwards" */