From d8756f8665a42effa2f3515369058cff4441abeb Mon Sep 17 00:00:00 2001 From: Guilherme Andrade Date: Thu, 20 Apr 2017 22:44:04 +0100 Subject: erts: Refactor enif_phash2 into enif_hash A more generic hashing function which can also hash terms based on `make_internal_hash'. --- erts/doc/src/erl_nif.xml | 49 +++++++++++++++++++-------- erts/emulator/beam/erl_nif.c | 11 ++++-- erts/emulator/beam/erl_nif.h | 5 +++ erts/emulator/beam/erl_nif_api_funcs.h | 4 +-- erts/emulator/test/nif_SUITE.erl | 47 +++++++++++++++++++++++-- erts/emulator/test/nif_SUITE_data/nif_SUITE.c | 20 ++++++++--- 6 files changed, 111 insertions(+), 25 deletions(-) diff --git a/erts/doc/src/erl_nif.xml b/erts/doc/src/erl_nif.xml index 35c42ac517..53b6fdeaa1 100644 --- a/erts/doc/src/erl_nif.xml +++ b/erts/doc/src/erl_nif.xml @@ -813,6 +813,28 @@ typedef enum { + ErlNifHash + +

An enumeration of the supported hash types that can be generated + using enif_hash. +

+ + ERL_NIF_INTERNAL_HASH + +

Non-portable hash function that gives different hashes for the same Erlang + term depending on machine architecture and ERTS version.

+

Generated hash values within 0..2^32-1.

+
+ ERL_NIF_PHASH2 + +

Portable hash function that gives the same hash for the same Erlang term + regardless of machine architecture and ERTS version. Corresponds to + erlang:phash2/1.

+

Slower than ERL_NIF_INTERNAL_HASH.

+

Generated hash values within 0..2^27-1.

+
+
+
@@ -1386,6 +1408,19 @@ typedef enum { + + + unsigned long + enif_hash(ErlNifHash type, ERL_NIF_TERM term) + + Hash terms. + +

Hash terms according to the specified + ErlNifHash type.

+

Range of returned value depends on the hash type.

+
+
+ intenif_inspect_binary(ErlNifEnv* env, ERL_NIF_TERM bin_term, ErlNifBinary* bin) @@ -2369,20 +2404,6 @@ enif_map_iterator_destroy(env, &iter); - - - unsigned long - enif_phash2(ERL_NIF_TERM term) - - Portable hash function. - -

Portable hash function that gives the same hash for the same Erlang term - regardless of machine architecture and ERTS version. Corresponds to - erlang:phash2/1.

-

Returns an unsigned integer within 0..2^27-1.

-
-
- intenif_port_command(ErlNifEnv* env, const ErlNifPort* to_port, ErlNifEnv *msg_env, ERL_NIF_TERM msg) diff --git a/erts/emulator/beam/erl_nif.c b/erts/emulator/beam/erl_nif.c index f7777c8b92..3470cf3fd5 100644 --- a/erts/emulator/beam/erl_nif.c +++ b/erts/emulator/beam/erl_nif.c @@ -1214,13 +1214,20 @@ int enif_compare(Eterm lhs, Eterm rhs) return result; } -unsigned long enif_phash2(Eterm term) +unsigned long enif_hash(ErlNifHash type, Eterm term) { #if SIZEOF_LONG < 4 /* This *really* shouldn't happen */ # error Incompatible long word size #endif - return make_hash2(term) & ((1 << 27) - 1); + switch (type) { + case ERL_NIF_INTERNAL_HASH: + return make_internal_hash(term); + case ERL_NIF_PHASH2: + return make_hash2(term) & ((1 << 27) - 1); + default: + return 0; + } } int enif_get_tuple(ErlNifEnv* env, Eterm tpl, int* arity, const Eterm** array) diff --git a/erts/emulator/beam/erl_nif.h b/erts/emulator/beam/erl_nif.h index ac45f3ac81..5a81f5fbbb 100644 --- a/erts/emulator/beam/erl_nif.h +++ b/erts/emulator/beam/erl_nif.h @@ -236,6 +236,11 @@ typedef enum { ERL_NIF_BIN2TERM_SAFE = 0x20000000 } ErlNifBinaryToTerm; +typedef enum { + ERL_NIF_INTERNAL_HASH = 1, + ERL_NIF_PHASH2 = 2 +} ErlNifHash; + /* * Return values from enif_thread_type(). Negative values * reserved for specific types of non-scheduler threads. diff --git a/erts/emulator/beam/erl_nif_api_funcs.h b/erts/emulator/beam/erl_nif_api_funcs.h index c096b18f91..f52c841ed7 100644 --- a/erts/emulator/beam/erl_nif_api_funcs.h +++ b/erts/emulator/beam/erl_nif_api_funcs.h @@ -47,7 +47,7 @@ ERL_NIF_API_FUNC_DECL(int,enif_get_list_cell,(ErlNifEnv* env, ERL_NIF_TERM term, ERL_NIF_API_FUNC_DECL(int,enif_get_tuple,(ErlNifEnv* env, ERL_NIF_TERM tpl, int* arity, const ERL_NIF_TERM** array)); ERL_NIF_API_FUNC_DECL(int,enif_is_identical,(ERL_NIF_TERM lhs, ERL_NIF_TERM rhs)); ERL_NIF_API_FUNC_DECL(int,enif_compare,(ERL_NIF_TERM lhs, ERL_NIF_TERM rhs)); -ERL_NIF_API_FUNC_DECL(unsigned long,enif_phash2,(ERL_NIF_TERM term)); +ERL_NIF_API_FUNC_DECL(unsigned long,enif_hash,(ErlNifHash type, ERL_NIF_TERM term)); ERL_NIF_API_FUNC_DECL(ERL_NIF_TERM,enif_make_binary,(ErlNifEnv* env, ErlNifBinary* bin)); ERL_NIF_API_FUNC_DECL(ERL_NIF_TERM,enif_make_badarg,(ErlNifEnv* env)); ERL_NIF_API_FUNC_DECL(ERL_NIF_TERM,enif_make_int,(ErlNifEnv* env, int i)); @@ -209,7 +209,7 @@ ERL_NIF_API_FUNC_DECL(int, enif_compare_monitors,(const ErlNifMonitor*,const Erl # define enif_get_list_cell ERL_NIF_API_FUNC_MACRO(enif_get_list_cell) # define enif_is_identical ERL_NIF_API_FUNC_MACRO(enif_is_identical) # define enif_compare ERL_NIF_API_FUNC_MACRO(enif_compare) -# define enif_phash2 ERL_NIF_API_FUNC_MACRO(enif_phash2) +# define enif_hash ERL_NIF_API_FUNC_MACRO(enif_hash) # define enif_make_binary ERL_NIF_API_FUNC_MACRO(enif_make_binary) # define enif_make_badarg ERL_NIF_API_FUNC_MACRO(enif_make_badarg) diff --git a/erts/emulator/test/nif_SUITE.erl b/erts/emulator/test/nif_SUITE.erl index 9127a5eae9..e177fa4963 100644 --- a/erts/emulator/test/nif_SUITE.erl +++ b/erts/emulator/test/nif_SUITE.erl @@ -57,6 +57,7 @@ nif_term_to_binary/1, nif_binary_to_term/1, nif_port_command/1, nif_snprintf/1, + nif_internal_hash/1, nif_phash2/1 ]). @@ -92,6 +93,7 @@ all() -> nif_term_to_binary, nif_binary_to_term, nif_port_command, nif_snprintf, + nif_internal_hash, nif_phash2]. groups() -> @@ -2612,15 +2614,54 @@ nif_snprintf(Config) -> <<"{{hello,world,-33},",0>> = format_term_nif(20,{{hello,world, -33}, 3.14, self()}), ok. +nif_internal_hash(Config) -> + ensure_lib_loaded(Config), + Terms = + [random_term() || _ <- lists:seq(1, 5000)], + + % Unlike the phash2 hash, in this case we + % have nothing to compare to, so let's try + % and at least make sure the distribution + % isn't outright wrong, statistical nuances + % aside. + + OnesPerBit = + lists:foldl( + fun (Term, Acc) -> + NifHashValue = hash_nif(internal, Term), + lists:foldl( + fun (BitIndex, AccB) -> + BitValue = (NifHashValue band (1 bsl BitIndex)) bsr BitIndex, + dict:update_counter(BitIndex, BitValue, AccB) + end, + Acc, + lists:seq(0, 31)) + end, + dict:new(), + Terms), + + ExpectedNrOfOnes = length(Terms) div 2, + dict:fold( + fun (BitIndex, NrOfOnes, Acc) -> + RelativeDeviation = abs(NrOfOnes - ExpectedNrOfOnes) / ExpectedNrOfOnes, + (RelativeDeviation < 0.10 + orelse ct:fail("Unreasonable deviation on number of set bits (i=~p): " + "expected ~p, got ~p (relative dev. ~.3f)", + [BitIndex, ExpectedNrOfOnes, NrOfOnes, RelativeDeviation])), + Acc + end, + ok, + OnesPerBit). + nif_phash2(Config) -> ensure_lib_loaded(Config), Terms = - [random_term() || _ <- lists:seq(1, 1000)], + [random_term() || _ <- lists:seq(1, 5000)], lists:foreach( fun (Term) -> HashValue = erlang:phash2(Term), - NifHashValue = phash2_nif(Term), + NifHashValue = hash_nif(phash2, Term), (HashValue =:= NifHashValue orelse ct:fail("Expected: ~p\nActual: ~p", [HashValue, NifHashValue])) @@ -2673,7 +2714,7 @@ type_test() -> ?nif_stub. tuple_2_list(_) -> ?nif_stub. is_identical(_,_) -> ?nif_stub. compare(_,_) -> ?nif_stub. -phash2_nif(_) -> ?nif_stub. +hash_nif(_, _) -> ?nif_stub. many_args_100(_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_) -> ?nif_stub. clone_bin(_) -> ?nif_stub. make_sub_bin(_,_,_) -> ?nif_stub. diff --git a/erts/emulator/test/nif_SUITE_data/nif_SUITE.c b/erts/emulator/test/nif_SUITE_data/nif_SUITE.c index dac6d02e9d..9d65b9c09b 100644 --- a/erts/emulator/test/nif_SUITE_data/nif_SUITE.c +++ b/erts/emulator/test/nif_SUITE_data/nif_SUITE.c @@ -687,12 +687,24 @@ static ERL_NIF_TERM compare(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) return enif_make_int(env, enif_compare(argv[0],argv[1])); } -static ERL_NIF_TERM phash2_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +static ERL_NIF_TERM hash_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { - if (argc != 1) { + if (argc != 2) { + return enif_make_badarg(env); + } + + ErlNifHash type; + if (enif_is_identical(argv[0], enif_make_atom(env, "internal"))) { + type = ERL_NIF_INTERNAL_HASH; + } + else if (enif_is_identical(argv[0], enif_make_atom(env, "phash2"))) { + type = ERL_NIF_PHASH2; + } + else { return enif_make_badarg(env); } - return enif_make_ulong(env, enif_phash2(argv[0])); + + return enif_make_ulong(env, enif_hash(type, argv[1])); } static ERL_NIF_TERM many_args_100(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) @@ -2872,7 +2884,7 @@ static ErlNifFunc nif_funcs[] = {"tuple_2_list", 1, tuple_2_list}, {"is_identical",2,is_identical}, {"compare",2,compare}, - {"phash2_nif",1,phash2_nif}, + {"hash_nif",2,hash_nif}, {"many_args_100", 100, many_args_100}, {"clone_bin", 1, clone_bin}, {"make_sub_bin", 3, make_sub_bin}, -- cgit v1.2.3