From c94506ccc003016db49fa2dbe5741196e071d45a Mon Sep 17 00:00:00 2001
From: Guilherme Andrade <g@gandrade.net>
Date: Sat, 22 Apr 2017 18:40:26 +0100
Subject: erts: Add test cases for salted enif_hash calls

---
 erts/emulator/test/nif_SUITE.erl | 148 ++++++++++++++++++++++++++++-----------
 1 file changed, 107 insertions(+), 41 deletions(-)

(limited to 'erts/emulator/test')

diff --git a/erts/emulator/test/nif_SUITE.erl b/erts/emulator/test/nif_SUITE.erl
index 36af68be60..6e4866bfd8 100644
--- a/erts/emulator/test/nif_SUITE.erl
+++ b/erts/emulator/test/nif_SUITE.erl
@@ -58,7 +58,9 @@
          nif_port_command/1,
          nif_snprintf/1,
          nif_internal_hash/1,
-         nif_phash2/1
+         nif_internal_hash_salted/1,
+         nif_phash2/1,
+         nif_phash2_salted/1
 	]).
 
 -export([many_args_100/100]).
@@ -94,7 +96,9 @@ all() ->
      nif_port_command,
      nif_snprintf,
      nif_internal_hash,
-     nif_phash2].
+     nif_internal_hash_salted,
+     nif_phash2,
+     nif_phash2_salted].
 
 groups() ->
     [{G, [], api_repeaters()} || G <- api_groups()]
@@ -2616,57 +2620,119 @@ nif_snprintf(Config) ->
 
 nif_internal_hash(Config) ->
     ensure_lib_loaded(Config),
-    Terms =
-        [random_term() || _ <- lists:seq(1, 5000)],
+    HashValueBitSize = nif_hash_result_bitsize(internal),
+    Terms = unique([random_term() || _ <- lists:seq(1, 5000)]),
+    HashValues = [hash_nif(internal, Term, 0) || Term <- Terms],
+    test_bit_distribution_fitness(HashValues, HashValueBitSize, 0.05).
 
-    % Unlike the phash2 hash, in this case we
-    % have nothing to compare to, so let's try
-    % and at least make sure the distribution
-    % isn't outright wrong, statistical nuances
-    % aside.
+nif_internal_hash_salted(Config) ->
+    ensure_lib_loaded(Config),
+    test_salted_nif_hash(internal).
+
+nif_phash2(Config) ->
+    ensure_lib_loaded(Config),
+    HashValueBitSize = nif_hash_result_bitsize(phash2),
+    Terms = unique([random_term() || _ <- lists:seq(1, 5000)]),
+    HashValues =
+        lists:map(
+          fun (Term) ->
+                  HashValue = erlang:phash2(Term),
+                  NifHashValue = hash_nif(phash2, Term, 0),
+                  (HashValue =:= NifHashValue
+                   orelse ct:fail("Expected: ~p\nActual:   ~p",
+                                  [HashValue, NifHashValue])),
+                  HashValue
+          end,
+          Terms),
+    test_bit_distribution_fitness(HashValues, HashValueBitSize, 0.05).
+
+nif_phash2_salted(Config) ->
+    ensure_lib_loaded(Config),
+    test_salted_nif_hash(phash2).
+
+test_salted_nif_hash(HashType) ->
+    HashValueBitSize = nif_hash_result_bitsize(HashType),
+    Terms = unique([random_term() || _ <- lists:seq(1, 5000)]),
+    Salts = unique([random_uint32() || _ <- lists:seq(1, 100)]),
+    {HashValuesPerSalt, HashValuesPerTerm} =
+        lists:mapfoldl(
+          fun (Salt, Acc) ->
+                  {HashValues, NewAcc} =
+                    lists:mapfoldl(
+                      fun (Term, AccB) ->
+                              HashValue = hash_nif(HashType, Term, Salt),
+                              NewAccB = dict:append(Term, HashValue, AccB),
+                              {HashValue, NewAccB}
+                      end,
+                      Acc,
+                      Terms),
+                  {{Salt, HashValues}, NewAcc}
+          end,
+          dict:new(),
+          Salts),
+
+    % Test per-salt hash distribution of different terms
+    lists:foreach(
+      fun ({_Salt, HashValues}) ->
+              test_bit_distribution_fitness(HashValues, HashValueBitSize, 0.05)
+      end,
+      HashValuesPerSalt),
 
+    % Test per-term hash distribution of different salts
+    dict:fold(
+      fun (_Term, HashValues, Acc) ->
+              % Be more tolerant of relative deviation,
+              % as there's fewer hash values here.
+              test_bit_distribution_fitness(HashValues, HashValueBitSize, 0.30),
+              Acc
+      end,
+      ok,
+      HashValuesPerTerm).
+
+test_bit_distribution_fitness(Integers, BitSize, MaxRelativeDeviation) ->
+    MaxInteger = (1 bsl BitSize) - 1,
     OnesPerBit =
         lists:foldl(
-          fun (Term, Acc) ->
-                  NifHashValue = hash_nif(internal, Term, 0),
+          fun (Integer, Acc) when Integer >= 0, Integer =< MaxInteger ->
                   lists:foldl(
                     fun (BitIndex, AccB) ->
-                            BitValue = (NifHashValue band (1 bsl BitIndex)) bsr BitIndex,
-                            dict:update_counter(BitIndex, BitValue, AccB)
+                            BitValue = (Integer band (1 bsl BitIndex)) bsr BitIndex,
+                            orddict:update_counter(BitIndex, BitValue, AccB)
                     end,
                     Acc,
-                    lists:seq(0, 31))
+                    lists:seq(0, BitSize - 1))
           end,
-          dict:new(),
-          Terms),
+          orddict:new(),
+          Integers),
+
+    ExpectedNrOfOnes = length(Integers) div 2,
+    FailureText =
+        orddict:fold(
+          fun (BitIndex, NrOfOnes, Acc) ->
+                  RelativeDeviation = abs(NrOfOnes - ExpectedNrOfOnes) / length(Integers),
+                  case RelativeDeviation >= MaxRelativeDeviation of
+                      false -> Acc;
+                      true ->
+                          [Acc,
+                           io_lib:format(
+                             "Unreasonable deviation on number of set bits (i=~p): "
+                             "expected ~p, got ~p (relative dev. ~.3f)~n",
+                             [BitIndex, ExpectedNrOfOnes, NrOfOnes, RelativeDeviation])]
+                  end
+          end,
+          [],
+          OnesPerBit),
 
-    ExpectedNrOfOnes = length(Terms) div 2,
-    dict:fold(
-      fun (BitIndex, NrOfOnes, Acc) ->
-              RelativeDeviation = abs(NrOfOnes - ExpectedNrOfOnes) / ExpectedNrOfOnes,
-              (RelativeDeviation < 0.10
-               orelse ct:fail("Unreasonable deviation on number of set bits (i=~p): "
-                              "expected ~p, got ~p (relative dev. ~.3f)",
-                              [BitIndex, ExpectedNrOfOnes, NrOfOnes, RelativeDeviation])),
-              Acc
-      end,
-      ok,
-      OnesPerBit).
+    (FailureText =:= [] orelse ct:fail(FailureText)).
 
-nif_phash2(Config) ->
-    ensure_lib_loaded(Config),
-    Terms =
-        [random_term() || _ <- lists:seq(1, 5000)],
+nif_hash_result_bitsize(internal) -> 32;
+nif_hash_result_bitsize(phash2) -> 27.
 
-    lists:foreach(
-      fun (Term) ->
-              HashValue = erlang:phash2(Term),
-              NifHashValue = hash_nif(phash2, Term, 0),
-              (HashValue =:= NifHashValue
-               orelse ct:fail("Expected: ~p\nActual:   ~p",
-                              [HashValue, NifHashValue]))
-      end,
-      Terms).
+unique(List) ->
+    lists:usort(List).
+
+random_uint32() ->
+    rand:uniform(1 bsl 32) - 1.
 
 random_term() ->
     case rand:uniform(6) of
-- 
cgit v1.2.3