%% %% %CopyrightBegin% %% %% Copyright Ericsson AB 2000-2016. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at %% %% http://www.apache.org/licenses/LICENSE-2.0 %% %% Unless required by applicable law or agreed to in writing, software %% distributed under the License is distributed on an "AS IS" BASIS, %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. %% %% %CopyrightEnd% %% %% %% Verifying erlang:phash/2. And now also phash2/2, to some extent. %% Test the hashing algorithm for integer numbers in 2 ways: %% 1 Test that numbers in diferent sequences get sufficiently spread %% in a "bit pattern" way (modulo 256 etc). %% 2 Test that numbers are correctly hashed compared to a reference implementation, %% regardless of their internal representation. The hashing algorithm should never %% change. %% The hashing of other datatypes is tested with a few samples, so that we are sure %% it does not change across versions. %% Also tests that the limit can be between 0 and 16#FFFFFFFF. %% -module(hash_SUITE). -export([basic_test/0,cmp_test/1,range_test/0,spread_test/1, phash2_test/0, otp_5292_test/0, otp_7127_test/0, run_phash2_benchmarks/0, test_phash2_binary_aligned_and_unaligned_equal/1, test_phash2_4GB_plus_bin/1, test_phash2_10MB_plus_bin/1, test_phash2_large_map/1, test_phash2_shallow_long_list/1, test_phash2_deep_list/1, test_phash2_deep_tuple/1, test_phash2_deep_tiny/1, test_phash2_with_42/1, test_phash2_with_short_tuple/1, test_phash2_with_short_list/1, test_phash2_with_tiny_bin/1, test_phash2_with_tiny_unaligned_sub_binary/1, test_phash2_with_small_unaligned_sub_binary/1, test_phash2_with_large_bin/1, test_phash2_with_large_unaligned_sub_binary/1, test_phash2_with_super_large_unaligned_sub_binary/1]). %% %% Define to run outside of test server %% %-define(STANDALONE,1). %% %% Define for debug output %% -define(debug,1). -ifdef(STANDALONE). -define(config(A,B),config(A,B)). -record(event, {name, data}). -export([config/2]). -else. -include_lib("common_test/include/ct.hrl"). -include_lib("common_test/include/ct_event.hrl"). -endif. -ifdef(debug). -ifdef(STANDALONE). -define(line, erlang:display({?MODULE,?LINE}), ). -endif. -define(dbgformat(A,B),io:format(A,B)). -else. -ifdef(STANDALONE). -define(line, noop, ). -endif. -define(dbgformat(A,B),noop). -endif. -ifdef(STANDALONE). config(priv_dir,_) -> ".". notify(X) -> erlang:display(X). -else. %% When run in test server. -export([groups/0, all/0, suite/0, test_basic/1,test_cmp/1,test_range/1,test_spread/1, test_phash2/1,otp_5292/1,bit_level_binaries/1,otp_7127/1, test_hash_zero/1, init_per_suite/1, end_per_suite/1, init_per_group/2, end_per_group/2]). suite() -> [{ct_hooks,[ts_install_cth]}, {timetrap, {minutes, 10}}]. all() -> [test_basic, test_cmp, test_range, test_spread, test_phash2, otp_5292, bit_level_binaries, otp_7127, test_hash_zero, test_phash2_binary_aligned_and_unaligned_equal, test_phash2_4GB_plus_bin, test_phash2_10MB_plus_bin, {group, phash2_benchmark_tests}, {group, phash2_benchmark}]. get_phash2_benchmarks() -> [ test_phash2_large_map, test_phash2_shallow_long_list, test_phash2_deep_list, test_phash2_deep_tuple, test_phash2_deep_tiny, test_phash2_with_42, test_phash2_with_short_tuple, test_phash2_with_short_list, test_phash2_with_tiny_bin, test_phash2_with_tiny_unaligned_sub_binary, test_phash2_with_small_unaligned_sub_binary, test_phash2_with_large_bin, test_phash2_with_large_unaligned_sub_binary, test_phash2_with_super_large_unaligned_sub_binary ]. groups() -> [ { phash2_benchmark_tests, [], get_phash2_benchmarks() }, { phash2_benchmark, [], get_phash2_benchmarks() } ]. init_per_suite(Config) -> io:format("START APPS~n"), A0 = case application:start(sasl) of ok -> [sasl]; _ -> [] end, A = case application:start(os_mon) of ok -> [os_mon|A0]; _ -> A0 end, io:format("APPS STARTED~n"), [{started_apps, A}|Config]. end_per_suite(Config) -> As = proplists:get_value(started_apps, Config), lists:foreach(fun (A) -> application:stop(A) end, As), Config. init_per_group(phash2_benchmark_tests, Config) -> [phash2_benchmark_tests |Config]; init_per_group(_, Config) -> Config. end_per_group(_, Config) -> Config. %% Tests basic functionality of erlang:phash and that the %% hashes has not changed (neither hash nor phash) test_basic(Config) when is_list(Config) -> basic_test(). %% Compares integer hashes made by erlang:phash with those of a reference implementation test_cmp(Config) when is_list(Config) -> cmp_test(10000). %% Tests ranges on erlang:phash from 1 to 2^32 test_range(Config) when is_list(Config) -> range_test(). %% Tests that the hashes are spread ok test_spread(Config) when is_list(Config) -> spread_test(10). %% Tests phash2 test_phash2(Config) when is_list(Config) -> phash2_test(). %% Tests hash, phash and phash2 regarding integers. otp_5292(Config) when is_list(Config) -> otp_5292_test(). %% Test hashing bit-level binaries. bit_level_binaries(Config) when is_list(Config) -> bit_level_binaries_do(). %% Tests phash2/1. otp_7127(Config) when is_list(Config) -> otp_7127_test(). test_hash_zero(Config) when is_list(Config) -> hash_zero_test(). notify(X) -> ct_event:notify(X). -endif. %% %% Here are the real tests, they can be run without test_server, %% define -DSTANDALONE when compiling. %% basic_test() -> 685556714 = erlang:phash({a,b,c},16#FFFFFFFF), 37442646 = erlang:phash([a,b,c,{1,2,3},c:pid(0,2,3), 16#77777777777777],16#FFFFFFFF), ExternalReference = <<131,114,0,3,100,0,13,110,111,110,111,100,101,64, 110,111,104,111,115,116,0,0,0,0,122,0,0,0,0,0,0,0,0>>, ExternalReference = <<131,114,0,3,100,0,13,110,111,110,111,100,101,64, 110,111,104,111,115,116,0,0,0,0,122,0,0,0,0,0,0,0,0>>, 1113403635 = phash_from_external(ExternalReference), ExternalFun = <<131,112,0,0,0,70,1,212,190,220,28,179,144,194,131, 19,215,105,97,77,251,125,93,0,0,0,0,0,0,0,2,100,0,1, 116,97,0,98,6,165,246,224,103,100,0,13,110,111, 110,111,100,101,64,110,111,104,111,115,116,0,0,0,91, 0,0,0,0,0,97,2,97,1>>, 25769064 = phash_from_external(ExternalFun), case (catch erlang:phash(1,0)) of {'EXIT',{badarg, _}} -> ok; _ -> exit(phash_accepted_zero_as_range) end. phash_from_external(Ext) -> erlang:phash(binary_to_term(Ext), 16#FFFFFFFF). range_test() -> F = fun(From,From,_FF) -> ok; (From,To,FF) -> R = rand:uniform(16#FFFFFFFFFFFFFFFF), X = erlang:phash(R, From), Y = erlang:phash(R, 16#100000000) - 1, Z = (Y rem From) + 1, case X =:= Z of true -> FF(From*2,To,FF); _ -> exit({range_test_failed, hash_on, R, range, From}) end end, F(1,16#100000000,F). spread_test(N) -> test_fun(N,{erlang,phash},16#50000000000,fun(X) -> X end), test_fun(N,{erlang,phash},0,fun(X) -> X end), test_fun(N,{erlang,phash},16#123456789ABCDEF123456789ABCDEF,fun(X) -> X end), test_fun(N,{erlang,phash},16#50000000000,fun(X) -> integer_to_list(X) end), test_fun(N,{erlang,phash},16#50000000000,fun(X) -> integer_to_bytelist(X,[]) end), test_fun(N,{erlang,phash},16#50000000000,fun(X) -> integer_to_binary_value(X) end). cmp_test(N) -> do_cmp_hashes(N,8). do_cmp_hashes(0,_) -> ok; do_cmp_hashes(N,Steps) -> R0 = rand:uniform(1 bsl Steps - 1) + rand:uniform(16#FFFFFFFF), R = case rand:uniform(2) of 1 -> R0; _ -> -R0 end, NSteps = case N rem 10 of 0 -> case (Steps + 8) rem 1024 of 0 -> 8; OK -> OK end; _ -> Steps end, X = erlang:phash(R,16#FFFFFFFF), Y = make_hash(R,16#FFFFFFFF), case X =:= Y of true -> do_cmp_hashes(N - 1, NSteps); _ -> exit({missmatch_on_input, R, phash, X, make_hash, Y}) end. phash2_test() -> Max = 1 bsl 32, BPort = <<131,102,100,0,13,110,111,110,111,100,101,64,110,111,104, 111,115,116,0,0,0,1,0>>, Port = binary_to_term(BPort), BXPort = <<131,102,100,0,11,97,112,97,64,108,101,103,111,108,97,115, 0,0,0,24,3>>, XPort = binary_to_term(BXPort), BRef = <<131,114,0,3,100,0,13,110,111,110,111,100,101,64,110,111,104, 111,115,116,0,0,0,1,255,0,0,0,0,0,0,0,0>>, Ref = binary_to_term(BRef), BXRef = <<131,114,0,3,100,0,11,97,112,97,64,108,101,103,111,108,97,115, 2,0,0,0,155,0,0,0,0,0,0,0,0>>, XRef = binary_to_term(BXRef), BXPid = <<131,103,100,0,11,97,112,97,64,108,101,103,111,108,97,115, 0,0,0,36,0,0,0,0,1>>, XPid = binary_to_term(BXPid), %% X = f1(), Y = f2(), Z = f3(X, Y), %% F1 = fun f1/0, % -> abc B1 = <<131,112,0,0,0,66,0,215,206,77,69,249,50,170,17,129,47,21,98, 13,196,76,242,0,0,0,1,0,0,0,0,100,0,1,116,97,1,98,2,195,126, 58,103,100,0,13,110,111,110,111,100,101,64,110,111,104,111, 115,116,0,0,0,112,0,0,0,0,0>>, F1 = binary_to_term(B1), %% F2 = fun f2/0, % -> abd B2 = <<131,112,0,0,0,66,0,215,206,77,69,249,50,170,17,129,47,21,98, 13,196,76,242,0,0,0,2,0,0,0,0,100,0,1,116,97,2,98,3,130,152, 185,103,100,0,13,110,111,110,111,100,101,64,110,111,104,111, 115,116,0,0,0,112,0,0,0,0,0>>, F2 = binary_to_term(B2), %% F3 = fun f3/2, % -> {abc, abd} B3 = <<131,112,0,0,0,66,2,215,206,77,69,249,50,170,17,129,47,21,98, 13,196,76,242,0,0,0,3,0,0,0,0,100,0,1,116,97,3,98,7,168,160, 93,103,100,0,13,110,111,110,111,100,101,64,110,111,104,111, 115,116,0,0,0,112,0,0,0,0,0>>, F3 = binary_to_term(B3), %% F4 = fun () -> 123456789012345678901234567 end, B4 = <<131,112,0,0,0,66,0,215,206,77,69,249,50,170,17,129,47,21,98, 13,196,76,242,0,0,0,4,0,0,0,0,100,0,1,116,97,4,98,2,230,21, 171,103,100,0,13,110,111,110,111,100,101,64,110,111,104,111, 115,116,0,0,0,112,0,0,0,0,0>>, F4 = binary_to_term(B4), %% F5 = fun() -> {X,Y,Z} end, B5 = <<131,112,0,0,0,92,0,215,206,77,69,249,50,170,17,129,47,21,98, 13,196,76,242,0,0,0,5,0,0,0,3,100,0,1,116,97,5,98,0,99,101, 130,103,100,0,13,110,111,110,111,100,101,64,110,111,104,111, 115,116,0,0,0,112,0,0,0,0,0,100,0,3,97,98,99,100,0,3,97,98, 100,104,2,100,0,3,97,98,99,100,0,3,97,98,100>>, F5 = binary_to_term(B5), Chars = lists:seq(32,127), NotAHeapBin = list_to_binary(lists:flatten(lists:duplicate(500,Chars))), <<_:128,SubBin/binary>> = NotAHeapBin, L = [%% nil {[], 3468870702}, %% atom :( not very good ): %% (cannot use block_hash due to compatibility issues...) {abc,26499}, {abd,26500}, {'åäö', 62518}, %% 81 runes as an atom, 'ᚠᚡᚢᚣᚤᚥᚦᚧᚨᚩᚪᚫᚬᚭᚮᚯᚰᚱᚲᚳᚴᚵᚶᚷᚸᚹᚺᚻᚼᚽᚾᚿᛀᛁᛂᛃᛄᛅᛆᛇᛈᛉᛊᛋᛌᛍᛎᛏᛐᛑᛒᛓᛔᛕᛖᛗᛘᛙᛚᛛᛜᛝᛞᛟᛠᛡᛢᛣᛤᛥᛦᛧᛨᛩᛪ᛫᛬᛭ᛮᛯᛰ' {erlang:binary_to_term(<<131, 118, 0, 243, (unicode:characters_to_binary(lists:seq(5792, 5872)))/binary >>), 241561024}, %% åäö dynamic {erlang:binary_to_term(<<131, 118, 0, 6, 195, 165, 195, 164, 195, 182>>),62518}, %% the atom '゙゚゛゜ゝゞゟ゠ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズ' {erlang:binary_to_term(<<131, 118, 0, 102, (unicode:characters_to_binary(lists:seq(12441, 12542)))/binary>>), 246053818}, %% the atom, '😃' {erlang:binary_to_term(<<131, 118, 0, 4, 240, 159, 152, 131>>), 1026307}, %% small {0,3175731469}, {1, 539485162}, {-1, 1117813597}, {1 bsl 20, 1477815345}, {-(1 bsl 20), 3076904293}, %% bignum {4294967296, 2108323275}, {-4294967296, 2586067094}, {981494972710656, 1622082818}, {-981494972710656, 3367191372}, {36893488147419103232, 2545846594}, {-36893488147419103232, 1649047068}, {1606938044258990275541962092341162602522202993782792835301376, 2573322433}, {-1606938044258990275541962092341162602522202993782792835301376, 2288753377}, %% binary {<<>>, 147926629}, {<<0:8>>, 2914887855}, {<<0:32>>, 2014511533}, {<<"abc">>, 1306188027}, {<<"12345678901234567890">>, 3021061640}, {NotAHeapBin,2644086993}, {SubBin,3575839236}, %% unaligned sub binaries {unaligned_sub_bin(<<>>), 147926629}, {unaligned_sub_bin(<<0:8>>), 2914887855}, {unaligned_sub_bin(<<0:32>>), 2014511533}, {unaligned_sub_bin(<<"abc">>), 1306188027}, {unaligned_sub_bin(<<"12345678901234567890">>), 3021061640}, {unaligned_sub_bin(NotAHeapBin),2644086993}, {unaligned_sub_bin(SubBin),3575839236}, %% bit-level binaries {<<0:7>>, 1055790816}, {(fun()-> B = <<255,7:3>>, <<_:4,D/bitstring>> = B, D end)(), 911751529}, {<<"abc",13:4>>, 670412287}, {<<5:3,"12345678901234567890">>, 289973273}, %% fun {F1, 3826013332}, {F2, 126009152}, {F3, 3482452479}, {F4, 633704783}, {F5, 1241537408}, %% module fun {fun lists:map/2, 840287883}, {fun lists:map/3, 2318478565}, {fun lists:filter/2, 635165125}, {fun lists:filter/3, 3824649396}, {fun xxx:map/2, 2630071865}, {fun xxx:map/3, 4237970519}, %% pid {c:pid(0,0,0), 2858162279}, {c:pid(0,1,0), 2870503209}, {c:pid(0,2,0), 1707788908}, {XPid, 1290188489}, %% port {Port,1954394636}, {XPort,274735}, %% ref {Ref, 1675501484}, {XRef, 3845846926}, %% float {0.0, 423528920}, {3.14, 3731709215}, {-3.14, 1827518724}, %% list {[0.0], 167906877}, {[{}], 4050867804}, {[<<>>], 440873397}, {[[]], 499070068}, {[abc], 3112446404}, {[a,b,c], 1505666924}, {[a,b|c], 433753489}, {"abc", 519996486}, {"abc"++[1009], 290369864}, {"abc"++[1009]++"de", 4134369195}, {"1234567890123456", 963649519}, %% tuple {{}, 221703996}, {{{}}, 2165044361}, {{<<>>}, 682464809}, {{0.0}, 688441152}, {{[]}, 1775079505}, {{abc}, 2032039329}, {{a,1,{},-3.14}, 1364396939}, {{c:pid(0,2,0)}, 686997880}, {{F4}, 2279632930}, {{a,<<>>}, 2724468891}, {{b,<<>>}, 2702508511} ], SpecFun = fun(S) -> sofs:no_elements(S) > 1 end, F = sofs:relation_to_family(sofs:converse(sofs:relation(L))), D = sofs:to_external(sofs:family_specification(SpecFun, F)), [] = D, [] = [{E,H,H2} || {E,H} <- L, (H2 = erlang:phash2(E, Max)) =/= H], ok. test_phash2_binary_aligned_and_unaligned_equal(Config) when is_list(Config) -> erts_debug:set_internal_state(available_internal_state, true), test_aligned_and_unaligned_equal_up_to(256*12+255), erts_debug:set_internal_state(available_internal_state, false). test_aligned_and_unaligned_equal_up_to(BinSize) -> Results = lists:map(fun(Size) -> test_aligned_and_unaligned_equal(Size) end, lists:seq(1, BinSize)), %% DataDir = filename:join(filename:dirname(code:which(?MODULE)), "hash_SUITE_data"), %% ExpResFile = filename:join(DataDir, "phash2_bin_expected_results.txt"), %% {ok, [ExpRes]} = file:consult(ExpResFile), %% %% ok = file:write_file(ExpResFile, io_lib:format("~w.~n", [Results])), %% Results = ExpRes, 110469206 = erlang:phash2(Results). test_aligned_and_unaligned_equal(BinSize) -> Bin = make_random_bin(BinSize), LastByte = last_byte(Bin), LastInBitstring = LastByte rem 11, Bitstring = << Bin/binary, <>/bitstring >>, UnalignedBin = make_unaligned_sub_bitstring(Bin), UnalignedBitstring = make_unaligned_sub_bitstring(Bitstring), case erts_debug:get_internal_state(available_internal_state) of false -> erts_debug:set_internal_state(available_internal_state, true); _ -> ok end, erts_debug:set_internal_state(reds_left, 3), BinHash = erlang:phash2(Bin), BinHash = erlang:phash2(Bin), erts_debug:set_internal_state(reds_left, 3), UnalignedBinHash = erlang:phash2(UnalignedBin), UnalignedBinHash = erlang:phash2(UnalignedBin), BinHash = UnalignedBinHash, erts_debug:set_internal_state(reds_left, 3), BitstringHash = erlang:phash2(Bitstring), BitstringHash = erlang:phash2(Bitstring), erts_debug:set_internal_state(reds_left, 3), UnalignedBitstringHash = erlang:phash2(UnalignedBitstring), UnalignedBitstringHash = erlang:phash2(UnalignedBitstring), BitstringHash = UnalignedBitstringHash, {BinHash, BitstringHash}. last_byte(Bin) -> NotLastByteSize = (erlang:bit_size(Bin)) - 8, <<_:NotLastByteSize/bitstring, LastByte:8>> = Bin, LastByte. test_phash2_4GB_plus_bin(Config) when is_list(Config) -> run_when_enough_resources( fun() -> erts_debug:set_internal_state(available_internal_state, true), %% Created Bin4GB here so it only needs to be created once erts_debug:set_internal_state(force_gc, self()), Bin4GB = get_4GB_bin(), test_phash2_plus_bin_helper1(Bin4GB, <<>>, <<>>, 13708901), erts_debug:set_internal_state(force_gc, self()), test_phash2_plus_bin_helper1(Bin4GB, <<>>, <<3:5>>, 66617678), erts_debug:set_internal_state(force_gc, self()), test_phash2_plus_bin_helper1(Bin4GB, <<13>>, <<>>, 31308392), erts_debug:set_internal_state(force_gc, self()), erts_debug:set_internal_state(available_internal_state, false) end). test_phash2_10MB_plus_bin(Config) when is_list(Config) -> erts_debug:set_internal_state(available_internal_state, true), erts_debug:set_internal_state(force_gc, self()), Bin10MB = get_10MB_bin(), test_phash2_plus_bin_helper1(Bin10MB, <<>>, <<>>, 22776267), erts_debug:set_internal_state(force_gc, self()), test_phash2_plus_bin_helper1(Bin10MB, <<>>, <<3:5>>, 124488972), erts_debug:set_internal_state(force_gc, self()), test_phash2_plus_bin_helper1(Bin10MB, <<13>>, <<>>, 72958346), erts_debug:set_internal_state(force_gc, self()), erts_debug:set_internal_state(available_internal_state, false). get_10MB_bin() -> TmpBin = make_random_bin(10239), Bin = erlang:iolist_to_binary([0, TmpBin]), IOList10MB = duplicate_iolist(Bin, 10), Bin10MB = erlang:iolist_to_binary(IOList10MB), 10485760 = size(Bin10MB), Bin10MB. get_4GB_bin() -> TmpBin = make_random_bin(65535), Bin = erlang:iolist_to_binary([0, TmpBin]), IOList4GB = duplicate_iolist(Bin, 16), Bin4GB = erlang:iolist_to_binary(IOList4GB), 4294967296 = size(Bin4GB), Bin4GB. duplicate_iolist(IOList, 0) -> IOList; duplicate_iolist(IOList, NrOfTimes) -> duplicate_iolist([IOList, IOList], NrOfTimes - 1). test_phash2_plus_bin_helper1(Bin4GB, ExtraBytes, ExtraBits, ExpectedHash) -> test_phash2_plus_bin_helper2(Bin4GB, fun id/1, ExtraBytes, ExtraBits, ExpectedHash), test_phash2_plus_bin_helper2(Bin4GB, fun make_unaligned_sub_bitstring/1, ExtraBytes, ExtraBits, ExpectedHash). test_phash2_plus_bin_helper2(Bin, TransformerFun, ExtraBytes, ExtraBits, ExpectedHash) -> ExtraBitstring = << ExtraBytes/binary, ExtraBits/bitstring >>, LargerBitstring = << ExtraBytes/binary, ExtraBits/bitstring, Bin/bitstring >>, LargerTransformedBitstring = TransformerFun(LargerBitstring), ExtraBitstringHash = erlang:phash2(ExtraBitstring), ExpectedHash = case size(LargerTransformedBitstring) < 4294967296 of true -> erts_debug:set_internal_state(force_gc, self()), erts_debug:set_internal_state(reds_left, 1), Hash = erlang:phash2(LargerTransformedBitstring), Hash = erlang:phash2(LargerTransformedBitstring), Hash; false -> erts_debug:set_internal_state(force_gc, self()), erts_debug:set_internal_state(reds_left, 1), ExtraBitstringHash = erlang:phash2(LargerTransformedBitstring), ExtraBitstringHash = erlang:phash2(LargerTransformedBitstring), ExtraBitstringHash end. run_when_enough_resources(Fun) -> case {total_memory(), erlang:system_info(wordsize)} of {Mem, 8} when is_integer(Mem) andalso Mem >= 31 -> Fun(); {Mem, WordSize} -> {skipped, io_lib:format("Not enough resources (System Memory >= ~p, Word Size = ~p)", [Mem, WordSize])} end. %% Total memory in GB total_memory() -> try MemoryData = memsup:get_system_memory_data(), case lists:keysearch(total_memory, 1, MemoryData) of {value, {total_memory, TM}} -> TM div (1024*1024*1024); false -> {value, {system_total_memory, STM}} = lists:keysearch(system_total_memory, 1, MemoryData), STM div (1024*1024*1024) end catch _ : _ -> undefined end. -ifdef(FALSE). f1() -> abc. f2() -> abd. f3(X, Y) -> {X, Y}. -endif. otp_5292_test() -> PH = fun(E) -> EInList = [1, 2, 3, E], EInList2 = [E, 1, 2, 3], NegEInList = [1, 2, 3, -E], NegEInList2 = [-E, 1, 2, 3], [erlang:phash(E, 1 bsl 32), erlang:phash(-E, 1 bsl 32), erlang:phash2(E, 1 bsl 32), erlang:phash2(-E, 1 bsl 32), erlang:phash2(EInList, 1 bsl 32), erlang:phash2(EInList2, 1 bsl 32), erlang:phash2(NegEInList, 1 bsl 32), erlang:phash2(NegEInList2, 1 bsl 32)] end, S2 = md5([md5(hash_int(S, E, PH)) || {Start, N, Sz} <- d(), {S, E} <- int(Start, N, Sz)]), <<234,63,192,76,253,57,250,32,44,11,73,1,161,102,14,238>> = S2, ok. d() -> [%% Start, NumOfIntervals, SizeOfInterval {(1 bsl I)-100, 2, 100} || I <- lists:seq(1, 1000)]. int(Start, N, Sz) -> {_, R} = lists:mapfoldl(fun(S, Acc) -> {S + Sz, [{S,S+Sz-1} | Acc]} end, [], lists:seq(Start, Start+(N-1)*Sz, Sz)), lists:reverse(R). hash_int(Start, End, F) -> HL = lists:flatmap(fun(E) -> F(E) end, lists:seq(Start, End)), {Start, End, md5(HL)}. md5(T) -> erlang:md5(term_to_binary(T)). bit_level_binaries_do() -> [3511317,7022633,14044578,28087749,56173436,112344123,90467083|_] = bit_level_all_different(fun erlang:phash/2), [102233154,19716,102133857,4532024,123369135,24565730,109558721|_] = bit_level_all_different(fun erlang:phash2/2), 13233341 = test_hash_phash(<<42:7>>, 16#7FFFFFF), 79121243 = test_hash_phash(<<99:7>>, 16#7FFFFFF), 95517726 = test_hash_phash(<<16#378ABF73:31>>, 16#7FFFFFF), 64409098 = test_phash2(<<99:7>>, 16#7FFFFFF), 55555814 = test_phash2(<<123,19:2>>, 16#7FFFFFF), 83868582 = test_phash2(<<123,45,6:3>>, 16#7FFFFFF), 2123204 = test_phash2(<<123,45,7:3>>, 16#7FFFFFF), ok. bit_level_all_different(Hash) -> {name,Name} = erlang:fun_info(Hash, name), Seq = lists:seq(1, 32), Hashes0 = [Hash(<<1:Sz>>, 16#7FFFFFF) || Sz <- Seq], io:format("~p/2 ~p", [Name,Hashes0]), Hashes0 = [Hash(unaligned_sub_bitstr(<<1:Sz>>), 16#7FFFFFF) || Sz <- Seq], 32 = length(lists:usort(Hashes0)), Hashes1 = [Hash(<<(1 bsl (Sz-1)):Sz>>, 16#7FFFFFF) || Sz <- Seq], io:format("~p/2 ~p", [Name,Hashes1]), Hashes1 = [Hash(unaligned_sub_bitstr(<<(1 bsl (Sz-1)):Sz>>), 16#7FFFFFF) || Sz <- Seq], 32 = length(lists:usort(Hashes1)), Hashes2 = [Hash(<<0:Sz>>, 16#7FFFFFF) || Sz <- Seq], io:format("~p/2 ~p", [Name,Hashes2]), Hashes2 = [Hash(unaligned_sub_bitstr(<<0:Sz>>), 16#7FFFFFF) || Sz <- Seq], 32 = length(lists:usort(Hashes2)), Hashes1. test_hash_phash(Bitstr, Rem) -> Hash = erlang:phash(Bitstr, Rem), Hash = erlang:phash(unaligned_sub_bitstr(Bitstr), Rem). test_phash2(Bitstr, Rem) -> Hash = erlang:phash2(Bitstr, Rem), Hash = erlang:phash2(unaligned_sub_bitstr(Bitstr), Rem). otp_7127_test() -> %% Used to return 2589127136. 38990304 = erlang:phash2(<<"Scott9">>), ok. hash_zero_test() -> Zs = [0.0, -0.0, 0/-1, 0.0/-1, 0/-(1 bsl 65), binary_to_term(<<131,70,0,0,0,0,0,0,0,0>>), %% +0.0 binary_to_term(<<131,70,128,0,0,0,0,0,0,0>>)], %% -0.0 ok = hash_zero_test(Zs,fun(T) -> erlang:phash2(T, 1 bsl 32) end), ok = hash_zero_test(Zs,fun(T) -> erlang:phash(T, 1 bsl 32) end), ok. hash_zero_test([Z|Zs],F) -> hash_zero_test(Zs,Z,F(Z),F). hash_zero_test([Z|Zs],Z0,V,F) -> true = Z0 =:= Z, %% assert exact equal Z0 = Z, %% assert matching V = F(Z), %% assert hash hash_zero_test(Zs,Z0,V,F); hash_zero_test([],_,_,_) -> ok. %% %% Reference implementation of integer hashing %% %% %% These are primes just above 2^28 that will never be changed, they are also in %% utils.c. %% -define(FN2,268439161). -define(FN3,268435459). -define(FN4,268436141). make_hash(N,M) -> Prime1 = ?FN2, {Prime2, BL0} = to_bytes(N), BL = pad(BL0), (integer_hash(BL, Prime1, Prime2) rem M) + 1. to_bytes(N) when N < 0 -> {?FN4,to_bytes(-N,[])}; to_bytes(N) -> {?FN3,to_bytes(N,[])}. to_bytes(0,Acc) -> Acc; to_bytes(N,Acc) -> to_bytes(N bsr 8, [N band 16#FF | Acc]). pad([]) -> [0,0,0,0]; pad(L) -> case 4 - (length(L) rem 4) of 4 -> L; N -> lists:duplicate(N,0) ++ L end. integer_hash(BL,P1,P2) -> (do_ihash(0,lists:reverse(BL),P1) * P2) band 16#FFFFFFFF. do_ihash(Hash,[],_) -> Hash; do_ihash(Hash, [H|T], P) -> do_ihash((((Hash * P) band 16#FFFFFFFF) + H) band 16#FFFFFFFF, T, P). %% %% Utilities for the test of "spreading" %% -ifdef(debug). hex(N) -> hex(0,N,[]). hex(X,0,Acc) when X >= 8 -> [$0, $x | Acc]; hex(X,N,Acc) -> hex(X+1,N bsr 4, [trans(N band 16#F) | Acc]). trans(N) when N < 10 -> N + $0; trans(10) -> $A; trans(11) -> $B; trans(12) -> $C; trans(13) -> $D; trans(14) -> $E; trans(15) -> $F. -endif. gen_keys(N, Template, BP,Fun) -> Ratio = (1 bsl (BP * 8)), Low = Template + Ratio, High = Template + (N*Ratio), ?dbgformat("N = ~p, BP = ~p, Template = ~p, Low = ~s, High = ~s~n", [hex(N),hex(BP),hex(Template),hex(Low),hex(High-1)]), Fun(Template), gen_keys2(Low, High,Ratio,Fun). gen_keys2(High,High2,_,_) when High >= High2 -> []; gen_keys2(Low,High,R,Fun) -> Fun(Low), gen_keys2(Low + R,High,R,Fun). test_fun(N,{HM,HF}, Template, Fun) -> init_table(), test_fun_1(0,1,N+1,{HM,HF},Template,Fun). test_fun_1(_,To,To,_,_,_) -> ok; test_fun_1(A,X,To,Y,Z,W) when A > To -> ?dbgformat("~p:~p(~p,~p,~p,~p,~p,~p)~n",[?MODULE,test_fun_1,To,X,To,Y,Z,W]), test_fun_1(0,X+1,To,Y,Z,W); test_fun_1(Pos,Siz,To,{HM,HF},Template,Fun) when 1 bsl (Siz*8) =< 65536 -> io:format("Byte: ~p, Size: ~p~n",[Pos,Siz]), N = 1 bsl (Siz*8), gen_keys(N,Template,Pos,fun (X) -> P = HM:HF(Fun(X),N), ets:insert(?MODULE,{P}) end ), Hits = collect_hits(), io:format( "Hashing of ~p values spread over ~p buckets~n", [N,Hits]), case (N div Hits) > 2 of true -> exit({not_spread_enough, Hits, on, N}); _ -> test_fun_1(Pos + Siz, Siz, To,{HM,HF},Template,Fun) end; test_fun_1(_,_,_,_,_,_) -> ok. init_table() -> (catch ets:delete(?MODULE)), ets:new(?MODULE,[ordered_set,named_table]). collect_hits() -> N = ets:info(?MODULE,size), init_table(), N. integer_to_binary_value(N) -> list_to_binary(lists:reverse(integer_to_bytelist(N,[]))). integer_to_bytelist(0,Acc) -> Acc; integer_to_bytelist(N,Acc) -> integer_to_bytelist(N bsr 8, [N band 16#FF | Acc]). unaligned_sub_bin(Bin0) when is_binary(Bin0) -> Bin1 = <<42:6,Bin0/binary,3:2>>, Sz = size(Bin0), <<42:6,Bin:Sz/binary,3:2>> = id(Bin1), Bin. unaligned_sub_bitstr(Bin0) when is_bitstring(Bin0) -> Bin1 = <<(-1):4,Bin0/bits,(-1):64>>, Bits = bit_size(Bin0), <<_:4,Bin:Bits/bits,_:64>> = id(Bin1), Bin. id(I) -> I. %% Benchmarks for phash2 run_phash2_benchmarks() -> Benchmarks = [ test_phash2_large_map, test_phash2_shallow_long_list, test_phash2_deep_list, test_phash2_deep_tuple, test_phash2_deep_tiny, test_phash2_with_42, test_phash2_with_short_tuple, test_phash2_with_short_list, test_phash2_with_tiny_bin, test_phash2_with_tiny_unaligned_sub_binary, test_phash2_with_small_unaligned_sub_binary, test_phash2_with_large_bin, test_phash2_with_large_unaligned_sub_binary, test_phash2_with_super_large_unaligned_sub_binary ], [print_comment(B) || B <- Benchmarks]. print_comment(FunctionName) -> io:format("~p~n", [FunctionName]), io:format("~s~n", [element(2, erlang:apply(?MODULE, FunctionName, [[]]))]). nr_of_iters(BenchmarkNumberOfIterations, Config) -> case lists:member(phash2_benchmark_tests, Config) of true -> 1; false -> BenchmarkNumberOfIterations end. test_phash2_large_map(Config) when is_list(Config) -> {Size, ExpectedHash} = case {total_memory(), erlang:system_info(wordsize)} of {Mem, 8} when is_integer(Mem) andalso Mem > 2 -> {1000000, 121857429}; _ -> {1000, 66609305} end, run_phash2_test_and_benchmark(nr_of_iters(45, Config), get_map(Size), ExpectedHash). test_phash2_shallow_long_list(Config) when is_list(Config) -> {Size, ExpectedHash} = case {total_memory(), erlang:system_info(wordsize)} of {Mem, 8} when is_integer(Mem) andalso Mem > 2 -> {1000000, 78700388}; _ -> {1000, 54749638} end, run_phash2_test_and_benchmark(nr_of_iters(1, Config), lists:duplicate(Size, get_complex_tuple()), ExpectedHash). test_phash2_deep_list(Config) when is_list(Config) -> {Size, ExpectedHash} = case {total_memory(), erlang:system_info(wordsize)} of {Mem, 8} when is_integer(Mem) andalso Mem > 2 -> {500000, 17986444}; _ -> {1000, 81794308} end, run_phash2_test_and_benchmark(nr_of_iters(1, Config), make_deep_list(Size, get_complex_tuple()), ExpectedHash). test_phash2_deep_tuple(Config) when is_list(Config) -> {Size, ExpectedHash} = case {total_memory(), erlang:system_info(wordsize)} of {Mem, 8} when is_integer(Mem) andalso Mem > 2 -> {500000, 116594715}; _ -> {500, 109057352} end, run_phash2_test_and_benchmark(nr_of_iters(1, Config), make_deep_tuple(Size, get_complex_tuple()), ExpectedHash). test_phash2_deep_tiny(Config) when is_list(Config) -> run_phash2_test_and_benchmark(nr_of_iters(1000000, Config), make_deep_list(19, 42), 111589624). test_phash2_with_42(Config) when is_list(Config) -> run_phash2_test_and_benchmark(nr_of_iters(20000000, Config), 42, 30328728). test_phash2_with_short_tuple(Config) when is_list(Config) -> run_phash2_test_and_benchmark(nr_of_iters(10000000, Config), {a,b,<<"hej">>, "hej"}, 50727199). test_phash2_with_short_list(Config) when is_list(Config) -> run_phash2_test_and_benchmark(nr_of_iters(10000000, Config), [a,b,"hej", "hello"], 117108642). test_phash2_with_tiny_bin(Config) when is_list(Config) -> run_phash2_test_and_benchmark(nr_of_iters(20000000, Config), make_random_bin(10), 129616602). test_phash2_with_tiny_unaligned_sub_binary(Config) when is_list(Config) -> run_phash2_test_and_benchmark(nr_of_iters(10000000, Config), make_unaligned_sub_binary(make_random_bin(11)), 59364725). test_phash2_with_small_unaligned_sub_binary(Config) when is_list(Config) -> run_phash2_test_and_benchmark(nr_of_iters(400000, Config), make_unaligned_sub_binary(make_random_bin(1001)), 130388119). test_phash2_with_large_bin(Config) when is_list(Config) -> {Size, ExpectedHash} = case {total_memory(), erlang:system_info(wordsize)} of {Mem, 8} when is_integer(Mem) andalso Mem > 2 -> {10000000, 48249379}; _ -> {1042, 14679520} end, run_phash2_test_and_benchmark(nr_of_iters(150, Config), make_random_bin(Size), ExpectedHash). test_phash2_with_large_unaligned_sub_binary(Config) when is_list(Config) -> {Size, ExpectedHash} = case {total_memory(), erlang:system_info(wordsize)} of {Mem, 8} when is_integer(Mem) andalso Mem > 2 -> {10000001, 122836437}; _ -> {10042, 127144287} end, run_phash2_test_and_benchmark(nr_of_iters(50, Config), make_unaligned_sub_binary(make_random_bin(Size)), ExpectedHash). test_phash2_with_super_large_unaligned_sub_binary(Config) when is_list(Config) -> {Size, ExpectedHash} = case {total_memory(), erlang:system_info(wordsize)} of {Mem, 8} when is_integer(Mem) andalso Mem > 2 -> {20000001, 112086727}; _ -> {20042, 91996619} end, run_phash2_test_and_benchmark(nr_of_iters(20, Config), make_unaligned_sub_binary(make_random_bin(Size)), ExpectedHash). make_deep_list(1, Item) -> {Item, Item}; make_deep_list(Depth, Item) -> [{Item, Item}, make_deep_list(Depth - 1, Item)]. make_deep_tuple(1, Item) -> [Item, Item]; make_deep_tuple(Depth, Item) -> {[Item, Item], make_deep_tuple(Depth - 1, Item)}. % Helper functions for benchmarking loop(0, _) -> ok; loop(Iterations, Fun) -> Fun(), loop(Iterations - 1, Fun). run_phash2_test_and_benchmark(Iterations, Term, ExpectedHash) -> Parent = self(), Test = fun() -> Hash = erlang:phash2(Term), case ExpectedHash =:= Hash of false -> Parent ! {got_bad_hash, Hash}, ExpectedHash = Hash; _ -> ok end end, Benchmark = fun() -> garbage_collect(), {Time, _} =timer:tc(fun() -> loop(Iterations, Test) end), Parent ! Time end, spawn(Benchmark), receive {got_bad_hash, Hash} -> ExpectedHash = Hash; Time -> TimeInS = case (Time/1000000) of 0.0 -> 0.0000000001; T -> T end, IterationsPerSecond = Iterations / TimeInS, notify(#event{ name = benchmark_data, data = [{value, IterationsPerSecond}]}), {comment, io_lib:format("Iterations per second: ~p, Iterations ~p, Benchmark time: ~p seconds)", [IterationsPerSecond, Iterations, Time/1000000])} end. get_complex_tuple() -> BPort = <<131,102,100,0,13,110,111,110,111,100,101,64,110,111,104, 111,115,116,0,0,0,1,0>>, Port = binary_to_term(BPort), BXPort = <<131,102,100,0,11,97,112,97,64,108,101,103,111,108,97,115, 0,0,0,24,3>>, XPort = binary_to_term(BXPort), BRef = <<131,114,0,3,100,0,13,110,111,110,111,100,101,64,110,111,104, 111,115,116,0,0,0,1,255,0,0,0,0,0,0,0,0>>, Ref = binary_to_term(BRef), BXRef = <<131,114,0,3,100,0,11,97,112,97,64,108,101,103,111,108,97,115, 2,0,0,0,155,0,0,0,0,0,0,0,0>>, XRef = binary_to_term(BXRef), BXPid = <<131,103,100,0,11,97,112,97,64,108,101,103,111,108,97,115, 0,0,0,36,0,0,0,0,1>>, XPid = binary_to_term(BXPid), %% X = f1(), Y = f2(), Z = f3(X, Y), %% F1 = fun f1/0, % -> abc B1 = <<131,112,0,0,0,66,0,215,206,77,69,249,50,170,17,129,47,21,98, 13,196,76,242,0,0,0,1,0,0,0,0,100,0,1,116,97,1,98,2,195,126, 58,103,100,0,13,110,111,110,111,100,101,64,110,111,104,111, 115,116,0,0,0,112,0,0,0,0,0>>, F1 = binary_to_term(B1), %% F2 = fun f2/0, % -> abd B2 = <<131,112,0,0,0,66,0,215,206,77,69,249,50,170,17,129,47,21,98, 13,196,76,242,0,0,0,2,0,0,0,0,100,0,1,116,97,2,98,3,130,152, 185,103,100,0,13,110,111,110,111,100,101,64,110,111,104,111, 115,116,0,0,0,112,0,0,0,0,0>>, F2 = binary_to_term(B2), %% F3 = fun f3/2, % -> {abc, abd} B3 = <<131,112,0,0,0,66,2,215,206,77,69,249,50,170,17,129,47,21,98, 13,196,76,242,0,0,0,3,0,0,0,0,100,0,1,116,97,3,98,7,168,160, 93,103,100,0,13,110,111,110,111,100,101,64,110,111,104,111, 115,116,0,0,0,112,0,0,0,0,0>>, F3 = binary_to_term(B3), %% F4 = fun () -> 123456789012345678901234567 end, B4 = <<131,112,0,0,0,66,0,215,206,77,69,249,50,170,17,129,47,21,98, 13,196,76,242,0,0,0,4,0,0,0,0,100,0,1,116,97,4,98,2,230,21, 171,103,100,0,13,110,111,110,111,100,101,64,110,111,104,111, 115,116,0,0,0,112,0,0,0,0,0>>, F4 = binary_to_term(B4), %% F5 = fun() -> {X,Y,Z} end, B5 = <<131,112,0,0,0,92,0,215,206,77,69,249,50,170,17,129,47,21,98, 13,196,76,242,0,0,0,5,0,0,0,3,100,0,1,116,97,5,98,0,99,101, 130,103,100,0,13,110,111,110,111,100,101,64,110,111,104,111, 115,116,0,0,0,112,0,0,0,0,0,100,0,3,97,98,99,100,0,3,97,98, 100,104,2,100,0,3,97,98,99,100,0,3,97,98,100>>, F5 = binary_to_term(B5), {{1,{2}},an_atom, 1, 3434.923942394,<<"this is a binary">>, make_unaligned_sub_binary(<<"this is also a binary">>),c,d,e,f,g,h,i,j,k,l,[f], 999999999999999999666666662123123123123324234999999999999999, 234234234, BPort, Port, BXPort, XPort, BRef, Ref, BXRef, XRef, BXPid, XPid, F1, F2, F3, F4, F5, #{a => 1, b => 2, c => 3, d => 4, e => 5, f => 6, g => 7, h => 8, i => 9, j => 1, k => 1, l => 123123123123213, m => [1,2,3,4,5,6,7,8], o => 5, p => 6, q => 7, r => 8, s => 9}}. get_map_helper(MapSoFar, 0) -> MapSoFar; get_map_helper(MapSoFar, NumOfItemsToAdd) -> NewMapSoFar = maps:put(NumOfItemsToAdd, NumOfItemsToAdd, MapSoFar), get_map_helper(NewMapSoFar, NumOfItemsToAdd -1). get_map(Size) -> get_map_helper(#{}, Size). %% Copied from binary_SUITE make_unaligned_sub_binary(Bin0) when is_binary(Bin0) -> Bin1 = <<0:3,Bin0/binary,31:5>>, Sz = size(Bin0), <<0:3,Bin:Sz/binary,31:5>> = id(Bin1), Bin. make_unaligned_sub_bitstring(Bin0) -> Bin1 = <<0:3,Bin0/bitstring,31:5>>, Sz = erlang:bit_size(Bin0), <<0:3,Bin:Sz/bitstring,31:5>> = id(Bin1), Bin. make_random_bin(Size) -> make_random_bin(Size, []). make_random_bin(0, Acc) -> iolist_to_binary(Acc); make_random_bin(Size, []) -> make_random_bin(Size - 1, [simple_rand() rem 256]); make_random_bin(Size, [N | Tail]) -> make_random_bin(Size - 1, [simple_rand(N) rem 256, N |Tail]). simple_rand() -> 123456789. simple_rand(Seed) -> A = 1103515245, C = 12345, M = (1 bsl 31), (A * Seed + C) rem M.