From c94506ccc003016db49fa2dbe5741196e071d45a Mon Sep 17 00:00:00 2001 From: Guilherme Andrade Date: Sat, 22 Apr 2017 18:40:26 +0100 Subject: erts: Add test cases for salted enif_hash calls --- erts/emulator/test/nif_SUITE.erl | 148 ++++++++++++++++++++++++++++----------- 1 file changed, 107 insertions(+), 41 deletions(-) (limited to 'erts/emulator/test') diff --git a/erts/emulator/test/nif_SUITE.erl b/erts/emulator/test/nif_SUITE.erl index 36af68be60..6e4866bfd8 100644 --- a/erts/emulator/test/nif_SUITE.erl +++ b/erts/emulator/test/nif_SUITE.erl @@ -58,7 +58,9 @@ nif_port_command/1, nif_snprintf/1, nif_internal_hash/1, - nif_phash2/1 + nif_internal_hash_salted/1, + nif_phash2/1, + nif_phash2_salted/1 ]). -export([many_args_100/100]). @@ -94,7 +96,9 @@ all() -> nif_port_command, nif_snprintf, nif_internal_hash, - nif_phash2]. + nif_internal_hash_salted, + nif_phash2, + nif_phash2_salted]. groups() -> [{G, [], api_repeaters()} || G <- api_groups()] @@ -2616,57 +2620,119 @@ nif_snprintf(Config) -> nif_internal_hash(Config) -> ensure_lib_loaded(Config), - Terms = - [random_term() || _ <- lists:seq(1, 5000)], + HashValueBitSize = nif_hash_result_bitsize(internal), + Terms = unique([random_term() || _ <- lists:seq(1, 5000)]), + HashValues = [hash_nif(internal, Term, 0) || Term <- Terms], + test_bit_distribution_fitness(HashValues, HashValueBitSize, 0.05). - % Unlike the phash2 hash, in this case we - % have nothing to compare to, so let's try - % and at least make sure the distribution - % isn't outright wrong, statistical nuances - % aside. +nif_internal_hash_salted(Config) -> + ensure_lib_loaded(Config), + test_salted_nif_hash(internal). + +nif_phash2(Config) -> + ensure_lib_loaded(Config), + HashValueBitSize = nif_hash_result_bitsize(phash2), + Terms = unique([random_term() || _ <- lists:seq(1, 5000)]), + HashValues = + lists:map( + fun (Term) -> + HashValue = erlang:phash2(Term), + NifHashValue = hash_nif(phash2, Term, 0), + (HashValue =:= NifHashValue + orelse ct:fail("Expected: ~p\nActual: ~p", + [HashValue, NifHashValue])), + HashValue + end, + Terms), + test_bit_distribution_fitness(HashValues, HashValueBitSize, 0.05). + +nif_phash2_salted(Config) -> + ensure_lib_loaded(Config), + test_salted_nif_hash(phash2). + +test_salted_nif_hash(HashType) -> + HashValueBitSize = nif_hash_result_bitsize(HashType), + Terms = unique([random_term() || _ <- lists:seq(1, 5000)]), + Salts = unique([random_uint32() || _ <- lists:seq(1, 100)]), + {HashValuesPerSalt, HashValuesPerTerm} = + lists:mapfoldl( + fun (Salt, Acc) -> + {HashValues, NewAcc} = + lists:mapfoldl( + fun (Term, AccB) -> + HashValue = hash_nif(HashType, Term, Salt), + NewAccB = dict:append(Term, HashValue, AccB), + {HashValue, NewAccB} + end, + Acc, + Terms), + {{Salt, HashValues}, NewAcc} + end, + dict:new(), + Salts), + + % Test per-salt hash distribution of different terms + lists:foreach( + fun ({_Salt, HashValues}) -> + test_bit_distribution_fitness(HashValues, HashValueBitSize, 0.05) + end, + HashValuesPerSalt), + % Test per-term hash distribution of different salts + dict:fold( + fun (_Term, HashValues, Acc) -> + % Be more tolerant of relative deviation, + % as there's fewer hash values here. + test_bit_distribution_fitness(HashValues, HashValueBitSize, 0.30), + Acc + end, + ok, + HashValuesPerTerm). + +test_bit_distribution_fitness(Integers, BitSize, MaxRelativeDeviation) -> + MaxInteger = (1 bsl BitSize) - 1, OnesPerBit = lists:foldl( - fun (Term, Acc) -> - NifHashValue = hash_nif(internal, Term, 0), + fun (Integer, Acc) when Integer >= 0, Integer =< MaxInteger -> lists:foldl( fun (BitIndex, AccB) -> - BitValue = (NifHashValue band (1 bsl BitIndex)) bsr BitIndex, - dict:update_counter(BitIndex, BitValue, AccB) + BitValue = (Integer band (1 bsl BitIndex)) bsr BitIndex, + orddict:update_counter(BitIndex, BitValue, AccB) end, Acc, - lists:seq(0, 31)) + lists:seq(0, BitSize - 1)) end, - dict:new(), - Terms), + orddict:new(), + Integers), + + ExpectedNrOfOnes = length(Integers) div 2, + FailureText = + orddict:fold( + fun (BitIndex, NrOfOnes, Acc) -> + RelativeDeviation = abs(NrOfOnes - ExpectedNrOfOnes) / length(Integers), + case RelativeDeviation >= MaxRelativeDeviation of + false -> Acc; + true -> + [Acc, + io_lib:format( + "Unreasonable deviation on number of set bits (i=~p): " + "expected ~p, got ~p (relative dev. ~.3f)~n", + [BitIndex, ExpectedNrOfOnes, NrOfOnes, RelativeDeviation])] + end + end, + [], + OnesPerBit), - ExpectedNrOfOnes = length(Terms) div 2, - dict:fold( - fun (BitIndex, NrOfOnes, Acc) -> - RelativeDeviation = abs(NrOfOnes - ExpectedNrOfOnes) / ExpectedNrOfOnes, - (RelativeDeviation < 0.10 - orelse ct:fail("Unreasonable deviation on number of set bits (i=~p): " - "expected ~p, got ~p (relative dev. ~.3f)", - [BitIndex, ExpectedNrOfOnes, NrOfOnes, RelativeDeviation])), - Acc - end, - ok, - OnesPerBit). + (FailureText =:= [] orelse ct:fail(FailureText)). -nif_phash2(Config) -> - ensure_lib_loaded(Config), - Terms = - [random_term() || _ <- lists:seq(1, 5000)], +nif_hash_result_bitsize(internal) -> 32; +nif_hash_result_bitsize(phash2) -> 27. - lists:foreach( - fun (Term) -> - HashValue = erlang:phash2(Term), - NifHashValue = hash_nif(phash2, Term, 0), - (HashValue =:= NifHashValue - orelse ct:fail("Expected: ~p\nActual: ~p", - [HashValue, NifHashValue])) - end, - Terms). +unique(List) -> + lists:usort(List). + +random_uint32() -> + rand:uniform(1 bsl 32) - 1. random_term() -> case rand:uniform(6) of -- cgit v1.2.3