aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator/test/hash_SUITE.erl
diff options
context:
space:
mode:
authorErlang/OTP <[email protected]>2009-11-20 14:54:40 +0000
committerErlang/OTP <[email protected]>2009-11-20 14:54:40 +0000
commit84adefa331c4159d432d22840663c38f155cd4c1 (patch)
treebff9a9c66adda4df2106dfd0e5c053ab182a12bd /erts/emulator/test/hash_SUITE.erl
downloadotp-84adefa331c4159d432d22840663c38f155cd4c1.tar.gz
otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.bz2
otp-84adefa331c4159d432d22840663c38f155cd4c1.zip
The R13B03 release.OTP_R13B03
Diffstat (limited to 'erts/emulator/test/hash_SUITE.erl')
-rw-r--r--erts/emulator/test/hash_SUITE.erl717
1 files changed, 717 insertions, 0 deletions
diff --git a/erts/emulator/test/hash_SUITE.erl b/erts/emulator/test/hash_SUITE.erl
new file mode 100644
index 0000000000..85bdb8bff8
--- /dev/null
+++ b/erts/emulator/test/hash_SUITE.erl
@@ -0,0 +1,717 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2000-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+%% Verifying erlang:phash/2. And now also phash2/2, to some extent.
+%% Test the hashing algorithm for integer numbers in 2 ways:
+%% 1 Test that numbers in diferent sequences get sufficiently spread
+%% in a "bit pattern" way (modulo 256 etc).
+%% 2 Test that numbers are correctly hashed compared to a reference implementation,
+%% regardless of their internal representation. The hashing algorithm should never
+%% change.
+%% The hashing of other datatypes is tested with a few samples, so that we are sure
+%% it does not change across versions.
+%% Also tests that the limit can be between 0 and 16#FFFFFFFF.
+%%
+-module(hash_SUITE).
+-export([basic_test/0,cmp_test/1,range_test/0,spread_test/1,
+ phash2_test/0, otp_5292_test/0, bit_level_binaries/0,
+ otp_7127_test/0]).
+-compile({nowarn_deprecated_function, {erlang,hash,2}}).
+
+%%
+%% Define to run outside of test server
+%%
+%-define(STANDALONE,1).
+
+%%
+%% Define for debug output
+%%
+%-define(debug,1).
+
+-ifdef(STANDALONE).
+-define(config(A,B),config(A,B)).
+-export([config/2]).
+-else.
+-include("test_server.hrl").
+-endif.
+
+-ifdef(debug).
+-ifdef(STANDALONE).
+-define(line, erlang:display({?MODULE,?LINE}), ).
+-endif.
+-define(dbgformat(A,B),io:format(A,B)).
+-else.
+-ifdef(STANDALONE).
+-define(line, noop, ).
+-endif.
+-define(dbgformat(A,B),noop).
+-endif.
+
+-ifdef(STANDALONE).
+config(priv_dir,_) ->
+ ".".
+-else.
+%% When run in test server.
+-export([all/1,test_basic/1,test_cmp/1,test_range/1,test_spread/1,
+ test_phash2/1,otp_5292/1,bit_level_binaries/1,otp_7127/1,
+ fin_per_testcase/2,init_per_testcase/2]).
+init_per_testcase(_Case, Config) ->
+ ?line Dog=test_server:timetrap(test_server:minutes(10)),
+ [{watchdog, Dog}|Config].
+
+fin_per_testcase(_Case, Config) ->
+ Dog=?config(watchdog, Config),
+ test_server:timetrap_cancel(Dog),
+ ok.
+all(doc) ->
+ ["Test erlang:phash"];
+all(suite) ->
+ [test_basic, test_cmp, test_range, test_spread, test_phash2, otp_5292,
+ bit_level_binaries, otp_7127].
+
+test_basic(suite) ->
+ [];
+test_basic(doc) ->
+ ["Tests basic functionality of erlang:phash and that the "
+ "hashes has not changed (neither hash nor phash)"];
+test_basic(Config) when is_list(Config) ->
+ basic_test().
+
+
+test_cmp(suite) ->
+ [];
+test_cmp(doc) ->
+ ["Compares integer hashes made by erlang:phash with those of a reference "
+ "implementation"];
+test_cmp(Config) when is_list(Config) ->
+ cmp_test(10000).
+
+test_range(suite) ->
+ [];
+test_range(doc) ->
+ ["Tests ranges on erlang:phash from 1 to 2^32"];
+test_range(Config) when is_list(Config) ->
+ range_test().
+
+test_spread(suite) ->
+ [];
+test_spread(doc) ->
+ ["Tests that the hashes are spread ok"];
+test_spread(Config) when is_list(Config) ->
+ spread_test(10).
+
+test_phash2(suite) ->
+ [];
+test_phash2(doc) ->
+ ["Tests phash2"];
+test_phash2(Config) when is_list(Config) ->
+ phash2_test().
+
+otp_5292(suite) ->
+ [];
+otp_5292(doc) ->
+ ["Tests hash, phash and phash2 regarding integers."];
+otp_5292(Config) when is_list(Config) ->
+ otp_5292_test().
+
+%% Test hashing bit-level binaries.
+bit_level_binaries(Config) when is_list(Config) ->
+ bit_level_binaries().
+
+otp_7127(suite) ->
+ [];
+otp_7127(doc) ->
+ ["Tests phash2/1."];
+otp_7127(Config) when is_list(Config) ->
+ otp_7127_test().
+
+-endif.
+
+
+
+%%
+%% Here are the real tests, they can be run without test_server,
+%% define -DSTANDALONE when compiling.
+%%
+basic_test() ->
+ ?line 685556714 = erlang:phash({a,b,c},16#FFFFFFFF),
+ ?line 14468079 = erlang:hash({a,b,c},16#7FFFFFF),
+ ?line 37442646 = erlang:phash([a,b,c,{1,2,3},c:pid(0,2,3),
+ 16#77777777777777],16#FFFFFFFF),
+ ?line Comment = case erlang:hash([a,b,c,{1,2,3},c:pid(0,2,3),
+ 16#77777777777777],16#7FFFFFF) of
+ 102727602 ->
+ ?line big = erlang:system_info(endian),
+ "Big endian machine";
+ 105818829 ->
+ ?line little = erlang:system_info(endian),
+ "Little endian machine"
+ end,
+ ExternalReference = <<131,114,0,3,100,0,13,110,111,110,111,100,101,64,
+ 110,111,104,111,115,116,0,0,0,0,122,0,0,0,0,0,0,0,0>>,
+ ?line 1113403635 = erlang:phash(binary_to_term(ExternalReference),
+ 16#FFFFFFFF),
+ ?line 123 = erlang:hash(binary_to_term(ExternalReference),
+ 16#7FFFFFF),
+ ExternalFun = <<131,117,0,0,0,3,103,100,0,13,110,111,110,111,100,101,64,
+ 110,111,104,111,115,116,0,0,0,38,0,0,0,0,0,100,0,8,101,
+ 114,108,95,101,118,97,108,97,20,98,5,182,139,98,108,0,0,
+ 0,3,104,2,100,0,1,66,109,0,0,0,33,131,114,0,3,100,0,13,
+ 110,111,110,111,100,101,64,110,111,104,111,115,116,0,0,
+ 0,0,122,0,0,0,0,0,0,0,0,104,2,100,0,1,76,107,0,33,131,
+ 114,0,3,100,0,13,110,111,110,111,100,101,64,110,111,104,
+ 111,115,116,0,0,0,0,122,0,0,0,0,0,0,0,0,104,2,100,0,1,82,
+ 114,0,3,100,0,13,110,111,110,111,100,101,64,110,111,104,
+ 111,115,116,0,0,0,0,122,0,0,0,0,0,0,0,0,106,108,0,0,0,1,
+ 104,5,100,0,6,99,108,97,117,115,101,97,1,106,106,108,0,0,
+ 0,1,104,3,100,0,7,105,110,116,101,103,101,114,97,1,97,1,
+ 106,106,104,3,100,0,4,101,118,97,108,104,2,100,0,5,115,
+ 104,101,108,108,100,0,10,108,111,99,97,108,95,102,117,
+ 110,99,108,0,0,0,1,103,100,0,13,110,111,110,111,100,101,
+ 64,110,111,104,111,115,116,0,0,0,22,0,0,0,0,0,106>>,
+ ?line 170987488 = erlang:phash(binary_to_term(ExternalFun),
+ 16#FFFFFFFF),
+ ?line 124460689 = erlang:hash(binary_to_term(ExternalFun),
+ 16#7FFFFFF),
+ case (catch erlang:phash(1,0)) of
+ {'EXIT',{badarg, _}} ->
+ {comment, Comment};
+ _ ->
+ exit(phash_accepted_zero_as_range)
+ end.
+
+
+range_test() ->
+ random:seed(),
+ F = fun(From,From,_FF) ->
+ ok;
+ (From,To,FF) ->
+ R = random:uniform(16#FFFFFFFFFFFFFFFF),
+ X = erlang:phash(R, From),
+ Y = erlang:phash(R, 16#100000000) - 1,
+ Z = (Y rem From) + 1,
+ case X =:= Z of
+ true ->
+ FF(From*2,To,FF);
+ _ ->
+ exit({range_test_failed, hash_on, R, range, From})
+ end
+ end,
+ F(1,16#100000000,F).
+
+
+
+spread_test(N) ->
+ ?line test_fun(N,{erlang,phash},16#50000000000,fun(X) ->
+ X
+ end),
+ ?line test_fun(N,{erlang,phash},0,fun(X) ->
+ X
+ end),
+ ?line test_fun(N,{erlang,phash},16#123456789ABCDEF123456789ABCDEF,fun(X) ->
+ X
+ end),
+ ?line test_fun(N,{erlang,phash},16#50000000000,fun(X) ->
+ integer_to_list(X)
+ end),
+ ?line test_fun(N,{erlang,phash},16#50000000000,fun(X) ->
+ integer_to_bytelist(X,[])
+ end),
+ ?line test_fun(N,{erlang,phash},16#50000000000,fun(X) ->
+ integer_to_binary(X)
+ end).
+
+
+
+cmp_test(N) ->
+ % No need to save seed, the error indicates what number caused it.
+ random:seed(),
+ do_cmp_hashes(N,8).
+do_cmp_hashes(0,_) ->
+ ok;
+do_cmp_hashes(N,Steps) ->
+ ?line R0 = random:uniform(1 bsl Steps - 1) + random:uniform(16#FFFFFFFF),
+ ?line R = case random:uniform(2) of
+ 1 ->
+ R0;
+ _ ->
+ -R0
+ end,
+ ?line NSteps = case N rem 10 of
+ 0 ->
+ case (Steps + 8) rem 1024 of
+ 0 ->
+ 8;
+ OK ->
+ OK
+ end;
+ _ ->
+ Steps
+ end,
+ ?line X = erlang:phash(R,16#FFFFFFFF),
+ ?line Y = make_hash(R,16#FFFFFFFF),
+ ?line case X =:= Y of
+ true ->
+ do_cmp_hashes(N - 1, NSteps);
+ _ ->
+ exit({missmatch_on_input, R, phash, X, make_hash, Y})
+ end.
+
+phash2_test() ->
+ Max = 1 bsl 32,
+ BPort = <<131,102,100,0,13,110,111,110,111,100,101,64,110,111,104,
+ 111,115,116,0,0,0,1,0>>,
+ Port = binary_to_term(BPort),
+
+ BXPort = <<131,102,100,0,11,97,112,97,64,108,101,103,111,108,97,115,
+ 0,0,0,24,3>>,
+ XPort = binary_to_term(BXPort),
+
+ BRef = <<131,114,0,3,100,0,13,110,111,110,111,100,101,64,110,111,104,
+ 111,115,116,0,0,0,1,255,0,0,0,0,0,0,0,0>>,
+ Ref = binary_to_term(BRef),
+
+ BXRef = <<131,114,0,3,100,0,11,97,112,97,64,108,101,103,111,108,97,115,
+ 2,0,0,0,155,0,0,0,0,0,0,0,0>>,
+ XRef = binary_to_term(BXRef),
+
+ BXPid = <<131,103,100,0,11,97,112,97,64,108,101,103,111,108,97,115,
+ 0,0,0,36,0,0,0,0,1>>,
+ XPid = binary_to_term(BXPid),
+
+
+ %% X = f1(), Y = f2(), Z = f3(X, Y),
+
+ %% F1 = fun f1/0, % -> abc
+ B1 = <<131,112,0,0,0,66,0,215,206,77,69,249,50,170,17,129,47,21,98,
+ 13,196,76,242,0,0,0,1,0,0,0,0,100,0,1,116,97,1,98,2,195,126,
+ 58,103,100,0,13,110,111,110,111,100,101,64,110,111,104,111,
+ 115,116,0,0,0,112,0,0,0,0,0>>,
+ F1 = binary_to_term(B1),
+
+ %% F2 = fun f2/0, % -> abd
+ B2 = <<131,112,0,0,0,66,0,215,206,77,69,249,50,170,17,129,47,21,98,
+ 13,196,76,242,0,0,0,2,0,0,0,0,100,0,1,116,97,2,98,3,130,152,
+ 185,103,100,0,13,110,111,110,111,100,101,64,110,111,104,111,
+ 115,116,0,0,0,112,0,0,0,0,0>>,
+ F2 = binary_to_term(B2),
+
+ %% F3 = fun f3/2, % -> {abc, abd}
+ B3 = <<131,112,0,0,0,66,2,215,206,77,69,249,50,170,17,129,47,21,98,
+ 13,196,76,242,0,0,0,3,0,0,0,0,100,0,1,116,97,3,98,7,168,160,
+ 93,103,100,0,13,110,111,110,111,100,101,64,110,111,104,111,
+ 115,116,0,0,0,112,0,0,0,0,0>>,
+ F3 = binary_to_term(B3),
+
+ %% F4 = fun () -> 123456789012345678901234567 end,
+ B4 = <<131,112,0,0,0,66,0,215,206,77,69,249,50,170,17,129,47,21,98,
+ 13,196,76,242,0,0,0,4,0,0,0,0,100,0,1,116,97,4,98,2,230,21,
+ 171,103,100,0,13,110,111,110,111,100,101,64,110,111,104,111,
+ 115,116,0,0,0,112,0,0,0,0,0>>,
+ F4 = binary_to_term(B4),
+
+ %% F5 = fun() -> {X,Y,Z} end,
+ B5 = <<131,112,0,0,0,92,0,215,206,77,69,249,50,170,17,129,47,21,98,
+ 13,196,76,242,0,0,0,5,0,0,0,3,100,0,1,116,97,5,98,0,99,101,
+ 130,103,100,0,13,110,111,110,111,100,101,64,110,111,104,111,
+ 115,116,0,0,0,112,0,0,0,0,0,100,0,3,97,98,99,100,0,3,97,98,
+ 100,104,2,100,0,3,97,98,99,100,0,3,97,98,100>>,
+ F5 = binary_to_term(B5),
+
+ Chars = lists:seq(32,127),
+ NotAHeapBin = list_to_binary(lists:flatten(lists:duplicate(500,Chars))),
+ <<_:128,SubBin/binary>> = NotAHeapBin,
+ L = [%% nil
+ {[], 3468870702},
+
+ %% atom :( not very good ):
+ %% (cannot use block_hash due to compatibility issues...)
+ {abc,26499},
+ {abd,26500},
+
+ %% small
+ {0,3175731469},
+ {1, 539485162},
+ {-1, 1117813597},
+ {1 bsl 20, 1477815345},
+ {-(1 bsl 20), 3076904293},
+
+ %% bignum
+ {4294967296, 2108323275},
+ {-4294967296, 2586067094},
+ {981494972710656, 1622082818},
+ {-981494972710656, 3367191372},
+ {36893488147419103232, 2545846594},
+ {-36893488147419103232, 1649047068},
+ {1606938044258990275541962092341162602522202993782792835301376,
+ 2573322433},
+ {-1606938044258990275541962092341162602522202993782792835301376,
+ 2288753377},
+
+ %% binary
+ {<<>>, 147926629},
+ {<<0:8>>, 2914887855},
+ {<<0:32>>, 2014511533},
+ {<<"abc">>, 1306188027},
+ {<<"12345678901234567890">>, 3021061640},
+ {NotAHeapBin,2644086993},
+ {SubBin,3575839236},
+
+ %% unaligned sub binaries
+ {unaligned_sub_bin(<<>>), 147926629},
+ {unaligned_sub_bin(<<0:8>>), 2914887855},
+ {unaligned_sub_bin(<<0:32>>), 2014511533},
+ {unaligned_sub_bin(<<"abc">>), 1306188027},
+ {unaligned_sub_bin(<<"12345678901234567890">>), 3021061640},
+ {unaligned_sub_bin(NotAHeapBin),2644086993},
+ {unaligned_sub_bin(SubBin),3575839236},
+
+ %% bit-level binaries
+ {<<0:7>>, 1055790816},
+ {<<"abc",13:4>>, 670412287},
+ {<<5:3,"12345678901234567890">>, 289973273},
+
+ %% fun
+ {F1, 3826013332},
+ {F2, 126009152},
+ {F3, 3482452479},
+ {F4, 633704783},
+ {F5, 1241537408},
+
+ %% module fun
+ {fun lists:map/2, 840287883},
+ {fun lists:map/3, 2318478565},
+ {fun lists:filter/2, 635165125},
+ {fun lists:filter/3, 3824649396},
+ {fun xxx:map/2, 2630071865},
+ {fun xxx:map/3, 4237970519},
+
+ %% pid
+ {c:pid(0,0,0), 2858162279},
+ {c:pid(0,1,0), 2870503209},
+ {c:pid(0,2,0), 1707788908},
+ {XPid, 1290188489},
+
+ %% port
+ {Port,1954394636},
+ {XPort,274735},
+
+ %% ref
+ {Ref, 1675501484},
+ {XRef, 3845846926},
+
+ %% float
+ {0.0, 423528920},
+ {3.14, 3731709215},
+ {-3.14, 1827518724},
+
+ %% list
+ {[0.0], 167906877},
+ {[{}], 4050867804},
+ {[<<>>], 440873397},
+ {[[]], 499070068},
+ {[abc], 3112446404},
+ {[a,b,c], 1505666924},
+ {[a,b|c], 433753489},
+ {"abc", 519996486},
+ {"abc"++[1009], 290369864},
+ {"abc"++[1009]++"de", 4134369195},
+ {"1234567890123456", 963649519},
+
+ %% tuple
+ {{}, 221703996},
+ {{{}}, 2165044361},
+ {{<<>>}, 682464809},
+ {{0.0}, 688441152},
+ {{[]}, 1775079505},
+ {{abc}, 2032039329},
+ {{a,1,{},-3.14}, 1364396939},
+ {{c:pid(0,2,0)}, 686997880},
+ {{F4}, 2279632930},
+ {{a,<<>>}, 2724468891},
+ {{b,<<>>}, 2702508511}
+ ],
+ SpecFun = fun(S) -> sofs:no_elements(S) > 1 end,
+ F = sofs:relation_to_family(sofs:converse(sofs:relation(L))),
+ D = sofs:to_external(sofs:family_specification(SpecFun, F)),
+ ?line [] = D,
+ ?line [] = [{E,H,H2} || {E,H} <- L, (H2 = erlang:phash2(E, Max)) =/= H],
+ ok.
+
+-ifdef(FALSE).
+f1() ->
+ abc.
+
+f2() ->
+ abd.
+
+f3(X, Y) ->
+ {X, Y}.
+-endif.
+
+otp_5292_test() ->
+ H = fun(E) -> [erlang:hash(E, 16#7FFFFFF),
+ erlang:hash(-E, 16#7FFFFFF)]
+ end,
+ S1 = md5([md5(hash_int(S, E, H)) || {Start, N, Sz} <- d(),
+ {S, E} <- int(Start, N, Sz)]),
+ PH = fun(E) -> [erlang:phash(E, 1 bsl 32),
+ erlang:phash(-E, 1 bsl 32),
+ erlang:phash2(E, 1 bsl 32),
+ erlang:phash2(-E, 1 bsl 32)]
+ end,
+ S2 = md5([md5(hash_int(S, E, PH)) || {Start, N, Sz} <- d(),
+ {S, E} <- int(Start, N, Sz)]),
+ ?line Comment = case S1 of
+ <<43,186,76,102,87,4,110,245,203,177,206,6,130,69,43,99>> ->
+ ?line big = erlang:system_info(endian),
+ "Big endian machine";
+ <<21,206,139,15,149,28,167,81,98,225,132,254,49,125,174,195>> ->
+ ?line little = erlang:system_info(endian),
+ "Little endian machine"
+ end,
+ ?line <<140,37,79,80,26,242,130,22,20,229,123,240,223,244,43,99>> = S2,
+ ?line 2 = erlang:hash(1, (1 bsl 27) -1),
+ ?line {'EXIT', _} = (catch erlang:hash(1, (1 bsl 27))),
+ {comment, Comment}.
+
+d() ->
+ [%% Start, NumOfIntervals, SizeOfInterval
+ {(1 bsl I)-100, 2, 100} || I <- lists:seq(1, 1000)].
+
+int(Start, N, Sz) ->
+ {_, R} = lists:mapfoldl(fun(S, Acc) ->
+ {S + Sz, [{S,S+Sz-1} | Acc]}
+ end, [], lists:seq(Start, Start+(N-1)*Sz, Sz)),
+ lists:reverse(R).
+
+hash_int(Start, End, F) ->
+ HL = lists:flatmap(fun(E) -> F(E) end, lists:seq(Start, End)),
+ {Start, End, md5(HL)}.
+
+md5(T) ->
+ erlang:md5(term_to_binary(T)).
+
+bit_level_binaries() ->
+ ?line [3511317,7022633,14044578,28087749,56173436,112344123,90467083|_] =
+ bit_level_all_different(fun erlang:hash/2),
+ ?line [3511317,7022633,14044578,28087749,56173436,112344123,90467083|_] =
+ bit_level_all_different(fun erlang:phash/2),
+ ?line [102233154,19716,102133857,4532024,123369135,24565730,109558721|_] =
+ bit_level_all_different(fun erlang:phash2/2),
+
+ ?line 13233341 = test_hash_phash(<<42:7>>, 16#7FFFFFF),
+ ?line 79121243 = test_hash_phash(<<99:7>>, 16#7FFFFFF),
+ ?line 95517726 = test_hash_phash(<<16#378ABF73:31>>, 16#7FFFFFF),
+
+ ?line 64409098 = test_phash2(<<99:7>>, 16#7FFFFFF),
+ ?line 55555814 = test_phash2(<<123,19:2>>, 16#7FFFFFF),
+ ?line 83868582 = test_phash2(<<123,45,6:3>>, 16#7FFFFFF),
+ ?line 2123204 = test_phash2(<<123,45,7:3>>, 16#7FFFFFF),
+
+ ok.
+
+bit_level_all_different(Hash) ->
+ {name,Name} = erlang:fun_info(Hash, name),
+ Seq = lists:seq(1, 32),
+ Hashes0 = [Hash(<<1:Sz>>, 16#7FFFFFF) || Sz <- Seq],
+ io:format("~p/2 ~p", [Name,Hashes0]),
+ Hashes0 = [Hash(unaligned_sub_bitstr(<<1:Sz>>), 16#7FFFFFF) || Sz <- Seq],
+ 32 = length(lists:usort(Hashes0)),
+
+ Hashes1 = [Hash(<<(1 bsl (Sz-1)):Sz>>, 16#7FFFFFF) || Sz <- Seq],
+ io:format("~p/2 ~p", [Name,Hashes1]),
+ Hashes1 = [Hash(unaligned_sub_bitstr(<<(1 bsl (Sz-1)):Sz>>), 16#7FFFFFF) ||
+ Sz <- Seq],
+ 32 = length(lists:usort(Hashes1)),
+
+ Hashes2 = [Hash(<<0:Sz>>, 16#7FFFFFF) || Sz <- Seq],
+ io:format("~p/2 ~p", [Name,Hashes2]),
+ Hashes2 = [Hash(unaligned_sub_bitstr(<<0:Sz>>), 16#7FFFFFF) || Sz <- Seq],
+ 32 = length(lists:usort(Hashes2)),
+
+ Hashes1.
+
+test_hash_phash(Bitstr, Rem) ->
+ Hash = erlang:hash(Bitstr, Rem),
+ Hash = erlang:phash(Bitstr, Rem),
+ Hash = erlang:hash(unaligned_sub_bitstr(Bitstr), Rem),
+ Hash = erlang:phash(unaligned_sub_bitstr(Bitstr), Rem).
+
+test_phash2(Bitstr, Rem) ->
+ Hash = erlang:phash2(Bitstr, Rem),
+ Hash = erlang:phash2(unaligned_sub_bitstr(Bitstr), Rem).
+
+otp_7127_test() ->
+ %% Used to return 2589127136.
+ ?line 38990304 = erlang:phash2(<<"Scott9">>),
+ ok.
+
+%%
+%% Reference implementation of integer hashing
+%%
+
+%%
+%% These are primes just above 2^28 that will never be changed, they are also in
+%% utils.c.
+%%
+-define(FN2,268439161).
+-define(FN3,268435459).
+-define(FN4,268436141).
+
+make_hash(N,M) ->
+ Prime1 = ?FN2,
+ {Prime2, BL0} = to_bytes(N),
+ BL = pad(BL0),
+ (integer_hash(BL, Prime1, Prime2) rem M) + 1.
+
+to_bytes(N) when N < 0 ->
+ {?FN4,to_bytes(-N,[])};
+to_bytes(N) ->
+ {?FN3,to_bytes(N,[])}.
+to_bytes(0,Acc) ->
+ Acc;
+to_bytes(N,Acc) ->
+ to_bytes(N bsr 8, [N band 16#FF | Acc]).
+
+pad([]) ->
+ [0,0,0,0];
+pad(L) ->
+ case 4 - (length(L) rem 4) of
+ 4 ->
+ L;
+ N ->
+ lists:duplicate(N,0) ++ L
+ end.
+
+integer_hash(BL,P1,P2) ->
+ (do_ihash(0,lists:reverse(BL),P1) * P2) band 16#FFFFFFFF.
+
+do_ihash(Hash,[],_) ->
+ Hash;
+do_ihash(Hash, [H|T], P) ->
+ do_ihash((((Hash * P) band 16#FFFFFFFF) + H) band 16#FFFFFFFF, T, P).
+
+
+
+
+%%
+%% Utilities for the test of "spreading"
+%%
+-ifdef(debug).
+hex(N) ->
+ hex(0,N,[]).
+hex(X,0,Acc) when X >= 8 ->
+ [$0, $x | Acc];
+hex(X,N,Acc) ->
+ hex(X+1,N bsr 4, [trans(N band 16#F) | Acc]).
+
+trans(N) when N < 10 ->
+ N + $0;
+trans(10) ->
+ $A;
+trans(11) ->
+ $B;
+trans(12) ->
+ $C;
+trans(13) ->
+ $D;
+trans(14) ->
+ $E;
+trans(15) ->
+ $F.
+-endif.
+
+gen_keys(N, Template, BP,Fun) ->
+ Ratio = (1 bsl (BP * 8)),
+ Low = Template + Ratio,
+ High = Template + (N*Ratio),
+ ?dbgformat("N = ~p, BP = ~p, Template = ~p, Low = ~s, High = ~s~n",
+ [hex(N),hex(BP),hex(Template),hex(Low),hex(High-1)]),
+ Fun(Template),
+ gen_keys2(Low, High,Ratio,Fun).
+
+gen_keys2(High,High2,_,_) when High >= High2 ->
+ [];
+gen_keys2(Low,High,R,Fun) ->
+ Fun(Low),
+ gen_keys2(Low + R,High,R,Fun).
+
+test_fun(N,{HM,HF}, Template, Fun) ->
+ init_table(),
+ test_fun_1(0,1,N+1,{HM,HF},Template,Fun).
+
+test_fun_1(_,To,To,_,_,_) ->
+ ok;
+test_fun_1(A,X,To,Y,Z,W) when A > To ->
+ ?dbgformat("~p:~p(~p,~p,~p,~p,~p,~p)~n",[?MODULE,test_fun_1,To,X,To,Y,Z,W]),
+ test_fun_1(0,X+1,To,Y,Z,W);
+test_fun_1(Pos,Siz,To,{HM,HF},Template,Fun) when 1 bsl (Siz*8) =< 65536 ->
+ io:format("Byte: ~p, Size: ~p~n",[Pos,Siz]),
+ N = 1 bsl (Siz*8),
+ gen_keys(N,Template,Pos,fun (X) ->
+ P = HM:HF(Fun(X),N),
+ ets:insert(?MODULE,{P})
+ end
+ ),
+ Hits = collect_hits(),
+ io:format(
+ "Hashing of ~p values spread over ~p buckets~n",
+ [N,Hits]),
+ case (N div Hits) > 2 of
+ true ->
+ exit({not_spread_enough, Hits, on, N});
+ _ ->
+ test_fun_1(Pos + Siz, Siz, To,{HM,HF},Template,Fun)
+ end;
+test_fun_1(_,_,_,_,_,_) ->
+ ok.
+
+init_table() ->
+ (catch ets:delete(?MODULE)),
+ ets:new(?MODULE,[ordered_set,named_table]).
+
+collect_hits() ->
+ N = ets:info(?MODULE,size),
+ init_table(),
+ N.
+
+integer_to_binary(N) ->
+ list_to_binary(lists:reverse(integer_to_bytelist(N,[]))).
+
+integer_to_bytelist(0,Acc) ->
+ Acc;
+integer_to_bytelist(N,Acc) ->
+ integer_to_bytelist(N bsr 8, [N band 16#FF | Acc]).
+
+unaligned_sub_bin(Bin0) when is_binary(Bin0) ->
+ Bin1 = <<42:6,Bin0/binary,3:2>>,
+ Sz = size(Bin0),
+ <<42:6,Bin:Sz/binary,3:2>> = id(Bin1),
+ Bin.
+
+unaligned_sub_bitstr(Bin0) when is_bitstring(Bin0) ->
+ Bin1 = <<(-1):4,Bin0/bits,(-1):64>>,
+ Bits = bit_size(Bin0),
+ <<_:4,Bin:Bits/bits,_:64>> = id(Bin1),
+ Bin.
+
+id(I) -> I.
+