diff options
author | Sverker Eriksson <[email protected]> | 2017-04-28 14:54:42 +0200 |
---|---|---|
committer | GitHub <[email protected]> | 2017-04-28 14:54:42 +0200 |
commit | 61e55780e2800e340e8ff16b5414f08373f89ef3 (patch) | |
tree | c584727c8695ac4c72c4fae9deff18931a8c54d4 /erts | |
parent | 2e2526b58f74c6c3209b3feca34866772be65335 (diff) | |
parent | e40ec046a2a1037b2f87b657503c5f21c5de4e2a (diff) | |
download | otp-61e55780e2800e340e8ff16b5414f08373f89ef3.tar.gz otp-61e55780e2800e340e8ff16b5414f08373f89ef3.tar.bz2 otp-61e55780e2800e340e8ff16b5414f08373f89ef3.zip |
Merge PR1413 from g-andrade/feature/phash2_nif
Support hashing terms from NIF code
Diffstat (limited to 'erts')
-rw-r--r-- | erts/doc/src/erl_nif.xml | 36 | ||||
-rw-r--r-- | erts/emulator/beam/erl_bif_info.c | 2 | ||||
-rw-r--r-- | erts/emulator/beam/erl_db_hash.c | 2 | ||||
-rw-r--r-- | erts/emulator/beam/erl_map.h | 2 | ||||
-rw-r--r-- | erts/emulator/beam/erl_nif.c | 17 | ||||
-rw-r--r-- | erts/emulator/beam/erl_nif.h | 5 | ||||
-rw-r--r-- | erts/emulator/beam/erl_nif_api_funcs.h | 2 | ||||
-rw-r--r-- | erts/emulator/beam/erl_process_dict.c | 2 | ||||
-rw-r--r-- | erts/emulator/beam/erl_trace.c | 2 | ||||
-rw-r--r-- | erts/emulator/beam/erl_utils.h | 2 | ||||
-rw-r--r-- | erts/emulator/beam/utils.c | 10 | ||||
-rw-r--r-- | erts/emulator/test/nif_SUITE.erl | 159 | ||||
-rw-r--r-- | erts/emulator/test/nif_SUITE_data/nif_SUITE.c | 26 |
13 files changed, 254 insertions, 13 deletions
diff --git a/erts/doc/src/erl_nif.xml b/erts/doc/src/erl_nif.xml index 6bb1109415..05b519fe7d 100644 --- a/erts/doc/src/erl_nif.xml +++ b/erts/doc/src/erl_nif.xml @@ -813,6 +813,29 @@ typedef enum { </item> </taglist> </item> + <tag><marker id="ErlNifHash"/><c>ErlNifHash</c></tag> + <item> + <p>An enumeration of the supported hash types that can be generated + using <seealso marker="#enif_hash"><c>enif_hash</c></seealso>. + </p> + <taglist> + <tag><c>ERL_NIF_INTERNAL_HASH</c></tag> + <item> + <p>Non-portable hash function that only guarantees the same hash + for the same term within one Erlang VM instance.</p> + <p>It takes 32-bit salt values and generates hashes within <c>0..2^32-1</c>.</p> + </item> + <tag><c>ERL_NIF_PHASH2</c></tag> + <item> + <p>Portable hash function that gives the same hash for the + same Erlang term regardless of machine architecture and ERTS version.</p> + <p><em>It ignores salt values</em> and generates hashes within <c>0..2^27-1</c>.</p> + <p>Slower than <c>ERL_NIF_INTERNAL_HASH.</c> + It corresponds to <seealso marker="erlang#phash2-1"><c>erlang:phash2/1</c></seealso>. + </p> + </item> + </taglist> + </item> </taglist> </section> @@ -1387,6 +1410,19 @@ typedef enum { </func> <func> + <name> + <ret>ErlNifUInt64</ret> + <nametext>enif_hash(ErlNifHash type, ERL_NIF_TERM term, ErlNifUInt64 salt)</nametext> + </name> + <fsummary>Hash terms.</fsummary> + <desc> + <p>Hashes <c>term</c> according to the specified + <seealso marker="#ErlNifHash"><c>ErlNifHash</c></seealso> <c>type</c>.</p> + <p>Ranges of taken salt (if any) and returned value depend on the hash type.</p> + </desc> + </func> + + <func> <name><ret>int</ret><nametext>enif_inspect_binary(ErlNifEnv* env, ERL_NIF_TERM bin_term, ErlNifBinary* bin)</nametext></name> <fsummary>Inspect the content of a binary.</fsummary> diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c index 3a70c6036b..1a680b127c 100644 --- a/erts/emulator/beam/erl_bif_info.c +++ b/erts/emulator/beam/erl_bif_info.c @@ -3950,7 +3950,7 @@ BIF_RETTYPE erts_debug_get_internal_state_1(BIF_ALIST_1) BIF_RET(erts_debug_reader_groups_map(BIF_P, (int) groups)); } else if (ERTS_IS_ATOM_STR("internal_hash", tp[1])) { - Uint hash = (Uint) make_internal_hash(tp[2]); + Uint hash = (Uint) make_internal_hash(tp[2], 0); Uint hsz = 0; Eterm* hp; erts_bld_uint(NULL, &hsz, hash); diff --git a/erts/emulator/beam/erl_db_hash.c b/erts/emulator/beam/erl_db_hash.c index 80c4824eeb..9009c00833 100644 --- a/erts/emulator/beam/erl_db_hash.c +++ b/erts/emulator/beam/erl_db_hash.c @@ -189,7 +189,7 @@ static ERTS_INLINE int add_fixed_deletion(DbTableHash* tb, int ix, /* optimised version of make_hash (normal case? atomic key) */ #define MAKE_HASH(term) \ ((is_atom(term) ? (atom_tab(atom_val(term))->slot.bucket.hvalue) : \ - make_internal_hash(term)) % MAX_HASH) + make_internal_hash(term, 0)) % MAX_HASH) #ifdef ERTS_SMP # define DB_HASH_LOCK_MASK (DB_HASH_LOCK_CNT-1) diff --git a/erts/emulator/beam/erl_map.h b/erts/emulator/beam/erl_map.h index 61a841f7f0..f7d0413685 100644 --- a/erts/emulator/beam/erl_map.h +++ b/erts/emulator/beam/erl_map.h @@ -57,7 +57,7 @@ typedef struct flatmap_s { #define hashmap_size(x) (((hashmap_head_t*) hashmap_val(x))->size) -#define hashmap_make_hash(Key) make_internal_hash(Key) +#define hashmap_make_hash(Key) make_internal_hash(Key, 0) #define hashmap_restore_hash(Heap,Lvl,Key) \ (((Lvl) < 8) ? hashmap_make_hash(Key) >> (4*(Lvl)) : hashmap_make_hash(CONS(Heap, make_small((Lvl)>>3), (Key))) >> (4*((Lvl) & 7))) diff --git a/erts/emulator/beam/erl_nif.c b/erts/emulator/beam/erl_nif.c index 872b58d1ef..e041fd7b83 100644 --- a/erts/emulator/beam/erl_nif.c +++ b/erts/emulator/beam/erl_nif.c @@ -55,6 +55,7 @@ #include "dtrace-wrapper.h" #include "erl_process.h" #include "erl_bif_unique.h" +#include "erl_utils.h" #undef ERTS_WANT_NFUNC_SCHED_INTERNALS__ #define ERTS_WANT_NFUNC_SCHED_INTERNALS__ #include "erl_nfunc_sched.h" @@ -1210,6 +1211,22 @@ int enif_compare(Eterm lhs, Eterm rhs) return result; } +ErlNifUInt64 enif_hash(ErlNifHash type, Eterm term, ErlNifUInt64 salt) +{ + switch (type) { + case ERL_NIF_INTERNAL_HASH: + return make_internal_hash(term, (Uint32) salt); + case ERL_NIF_PHASH2: + /* It appears that make_hash2 doesn't always react to seasoning + * as well as it should. Therefore, let's make it ignore the salt + * value and declare salted uses of phash2 as unsupported. + */ + return make_hash2(term) & ((1 << 27) - 1); + default: + return 0; + } +} + int enif_get_tuple(ErlNifEnv* env, Eterm tpl, int* arity, const Eterm** array) { Eterm* ptr; diff --git a/erts/emulator/beam/erl_nif.h b/erts/emulator/beam/erl_nif.h index ac45f3ac81..5a81f5fbbb 100644 --- a/erts/emulator/beam/erl_nif.h +++ b/erts/emulator/beam/erl_nif.h @@ -236,6 +236,11 @@ typedef enum { ERL_NIF_BIN2TERM_SAFE = 0x20000000 } ErlNifBinaryToTerm; +typedef enum { + ERL_NIF_INTERNAL_HASH = 1, + ERL_NIF_PHASH2 = 2 +} ErlNifHash; + /* * Return values from enif_thread_type(). Negative values * reserved for specific types of non-scheduler threads. diff --git a/erts/emulator/beam/erl_nif_api_funcs.h b/erts/emulator/beam/erl_nif_api_funcs.h index 01d9e386ed..b34058f303 100644 --- a/erts/emulator/beam/erl_nif_api_funcs.h +++ b/erts/emulator/beam/erl_nif_api_funcs.h @@ -180,6 +180,7 @@ ERL_NIF_API_FUNC_DECL(ErlNifResourceType*,enif_open_resource_type_x,(ErlNifEnv*, ERL_NIF_API_FUNC_DECL(int, enif_monitor_process,(ErlNifEnv*,void* obj,const ErlNifPid*,ErlDrvMonitor *monitor)); ERL_NIF_API_FUNC_DECL(int, enif_demonitor_process,(ErlNifEnv*,void* obj,const ErlDrvMonitor *monitor)); ERL_NIF_API_FUNC_DECL(int, enif_compare_monitors,(const ErlNifMonitor*,const ErlNifMonitor*)); +ERL_NIF_API_FUNC_DECL(ErlNifUInt64,enif_hash,(ErlNifHash type, ERL_NIF_TERM term, ErlNifUInt64 salt)); /* ** ADD NEW ENTRIES HERE (before this comment) !!! @@ -342,6 +343,7 @@ ERL_NIF_API_FUNC_DECL(int, enif_compare_monitors,(const ErlNifMonitor*,const Erl # define enif_monitor_process ERL_NIF_API_FUNC_MACRO(enif_monitor_process) # define enif_demonitor_process ERL_NIF_API_FUNC_MACRO(enif_demonitor_process) # define enif_compare_monitors ERL_NIF_API_FUNC_MACRO(enif_compare_monitors) +# define enif_hash ERL_NIF_API_FUNC_MACRO(enif_hash) /* ** ADD NEW ENTRIES HERE (before this comment) diff --git a/erts/emulator/beam/erl_process_dict.c b/erts/emulator/beam/erl_process_dict.c index 7cfdf20341..01e240c65d 100644 --- a/erts/emulator/beam/erl_process_dict.c +++ b/erts/emulator/beam/erl_process_dict.c @@ -57,7 +57,7 @@ ((is_small(Term)) ? (Uint32) unsigned_val(Term) : \ ((is_atom(Term)) ? \ (Uint32) atom_val(Term) : \ - make_internal_hash(Term))) + make_internal_hash(Term, 0))) #define PD_SZ2BYTES(Sz) (sizeof(ProcDict) + ((Sz) - 1)*sizeof(Eterm)) diff --git a/erts/emulator/beam/erl_trace.c b/erts/emulator/beam/erl_trace.c index 870f1f142d..a5fc3a2477 100644 --- a/erts/emulator/beam/erl_trace.c +++ b/erts/emulator/beam/erl_trace.c @@ -3229,7 +3229,7 @@ static int tracer_cmp_fun(void* a, void* b) static HashValue tracer_hash_fun(void* obj) { - return make_internal_hash(((ErtsTracerNif*)obj)->module); + return make_internal_hash(((ErtsTracerNif*)obj)->module, 0); } static void *tracer_alloc_fun(void* tmpl) diff --git a/erts/emulator/beam/erl_utils.h b/erts/emulator/beam/erl_utils.h index 47289a0af1..75d7e47239 100644 --- a/erts/emulator/beam/erl_utils.h +++ b/erts/emulator/beam/erl_utils.h @@ -120,7 +120,7 @@ int erts_is_builtin(Eterm, Eterm, int); Uint32 block_hash(byte *, unsigned, Uint32); Uint32 make_hash2(Eterm); Uint32 make_hash(Eterm); -Uint32 make_internal_hash(Eterm); +Uint32 make_internal_hash(Eterm, Uint32 salt); void erts_save_emu_args(int argc, char **argv); Eterm erts_get_emu_args(struct process *c_p); diff --git a/erts/emulator/beam/utils.c b/erts/emulator/beam/utils.c index cdf766b206..96a7bfe8ac 100644 --- a/erts/emulator/beam/utils.c +++ b/erts/emulator/beam/utils.c @@ -1578,13 +1578,13 @@ do { /* Lightweight mixing of constant (type info) */ \ } while (0) Uint32 -make_internal_hash(Eterm term) +make_internal_hash(Eterm term, Uint32 salt) { Uint32 hash; /* Optimization. Simple cases before declaration of estack. */ if (primary_tag(term) == TAG_PRIMARY_IMMED1) { - hash = 0; + hash = salt; #if ERTS_SIZEOF_ETERM == 8 UINT32_HASH_2((Uint32)term, (Uint32)(term >> 32), HCONST); #elif ERTS_SIZEOF_ETERM == 4 @@ -1598,7 +1598,7 @@ make_internal_hash(Eterm term) Eterm tmp; DECLARE_ESTACK(s); - hash = 0; + hash = salt; for (;;) { switch (primary_tag(term)) { case TAG_PRIMARY_LIST: @@ -1874,7 +1874,7 @@ make_internal_hash(Eterm term) goto pop_next; } default: - erts_exit(ERTS_ERROR_EXIT, "Invalid tag in make_hash2(0x%X)\n", term); + erts_exit(ERTS_ERROR_EXIT, "Invalid tag in make_internal_hash(0x%X, %lu)\n", term, salt); } } break; @@ -1887,7 +1887,7 @@ make_internal_hash(Eterm term) goto pop_next; default: - erts_exit(ERTS_ERROR_EXIT, "Invalid tag in make_hash2(0x%X)\n", term); + erts_exit(ERTS_ERROR_EXIT, "Invalid tag in make_internal_hash(0x%X, %lu)\n", term, salt); pop_next: if (ESTACK_ISEMPTY(s)) { diff --git a/erts/emulator/test/nif_SUITE.erl b/erts/emulator/test/nif_SUITE.erl index 693db42e58..8ad11d3bf3 100644 --- a/erts/emulator/test/nif_SUITE.erl +++ b/erts/emulator/test/nif_SUITE.erl @@ -56,7 +56,10 @@ nif_is_process_alive/1, nif_is_port_alive/1, nif_term_to_binary/1, nif_binary_to_term/1, nif_port_command/1, - nif_snprintf/1 + nif_snprintf/1, + nif_internal_hash/1, + nif_internal_hash_salted/1, + nif_phash2/1 ]). -export([many_args_100/100]). @@ -90,7 +93,10 @@ all() -> nif_is_process_alive, nif_is_port_alive, nif_term_to_binary, nif_binary_to_term, nif_port_command, - nif_snprintf]. + nif_snprintf, + nif_internal_hash, + nif_internal_hash_salted, + nif_phash2]. groups() -> [{G, [], api_repeaters()} || G <- api_groups()] @@ -2610,6 +2616,154 @@ nif_snprintf(Config) -> <<"{{hello,world,-33},",0>> = format_term_nif(20,{{hello,world, -33}, 3.14, self()}), ok. +nif_internal_hash(Config) -> + ensure_lib_loaded(Config), + HashValueBitSize = nif_hash_result_bitsize(internal), + Terms = unique([random_term() || _ <- lists:seq(1, 5000)]), + HashValues = [hash_nif(internal, Term, 0) || Term <- Terms], + test_bit_distribution_fitness(HashValues, HashValueBitSize, 0.05). + +nif_internal_hash_salted(Config) -> + ensure_lib_loaded(Config), + test_salted_nif_hash(internal). + +nif_phash2(Config) -> + ensure_lib_loaded(Config), + HashValueBitSize = nif_hash_result_bitsize(phash2), + Terms = unique([random_term() || _ <- lists:seq(1, 5000)]), + HashValues = + lists:map( + fun (Term) -> + HashValue = erlang:phash2(Term), + Salt = random_uint32(), % phash2 should ignore salt + NifHashValue = hash_nif(phash2, Term, Salt), + (HashValue =:= NifHashValue + orelse ct:fail("Expected: ~p\nActual: ~p", + [HashValue, NifHashValue])), + HashValue + end, + Terms), + test_bit_distribution_fitness(HashValues, HashValueBitSize, 0.05). + +test_salted_nif_hash(HashType) -> + HashValueBitSize = nif_hash_result_bitsize(HashType), + Terms = unique([random_term() || _ <- lists:seq(1, 5000)]), + Salts = unique([random_uint32() || _ <- lists:seq(1, 100)]), + {HashValuesPerSalt, HashValuesPerTerm} = + lists:mapfoldl( + fun (Salt, Acc) -> + {HashValues, NewAcc} = + lists:mapfoldl( + fun (Term, AccB) -> + HashValue = hash_nif(HashType, Term, Salt), + NewAccB = dict:append(Term, HashValue, AccB), + {HashValue, NewAccB} + end, + Acc, + Terms), + {{Salt, HashValues}, NewAcc} + end, + dict:new(), + Salts), + + % Test per-salt hash distribution of different terms + lists:foreach( + fun ({_Salt, HashValues}) -> + test_bit_distribution_fitness(HashValues, HashValueBitSize, 0.05) + end, + HashValuesPerSalt), + + % Test per-term hash distribution of different salts + dict:fold( + fun (_Term, HashValues, Acc) -> + % Be more tolerant of relative deviation, + % as there's fewer hash values here. + test_bit_distribution_fitness(HashValues, HashValueBitSize, 0.30), + Acc + end, + ok, + HashValuesPerTerm). + +test_bit_distribution_fitness(Integers, BitSize, MaxRelativeDeviation) -> + MaxInteger = (1 bsl BitSize) - 1, + OnesPerBit = + lists:foldl( + fun (Integer, Acc) when Integer >= 0, Integer =< MaxInteger -> + lists:foldl( + fun (BitIndex, AccB) -> + BitValue = (Integer band (1 bsl BitIndex)) bsr BitIndex, + orddict:update_counter(BitIndex, BitValue, AccB) + end, + Acc, + lists:seq(0, BitSize - 1)) + end, + orddict:new(), + Integers), + + ExpectedNrOfOnes = length(Integers) div 2, + FailureText = + orddict:fold( + fun (BitIndex, NrOfOnes, Acc) -> + RelativeDeviation = abs(NrOfOnes - ExpectedNrOfOnes) / length(Integers), + case RelativeDeviation >= MaxRelativeDeviation of + false -> Acc; + true -> + [Acc, + io_lib:format( + "Unreasonable deviation on number of set bits (i=~p): " + "expected ~p, got ~p (relative dev. ~.3f)~n", + [BitIndex, ExpectedNrOfOnes, NrOfOnes, RelativeDeviation])] + end + end, + [], + OnesPerBit), + + (FailureText =:= [] orelse ct:fail(FailureText)). + +nif_hash_result_bitsize(internal) -> 32; +nif_hash_result_bitsize(phash2) -> 27. + +unique(List) -> + lists:usort(List). + +random_uint32() -> + rand:uniform(1 bsl 32) - 1. + +random_term() -> + case rand:uniform(6) of + 1 -> rand:uniform(1 bsl 27) - 1; % small + 2 -> (1 bsl 27) + rand:uniform(1 bsl 128); % big + 3 -> random_sign() * (rand:uniform() * (1 bsl 53)); % float + 4 -> random_binary(); + 5 -> random_pid(); + 6 -> + Length = rand:uniform(10), + List = [random_term() || _ <- lists:seq(1, Length)], + case rand:uniform(2) of + 1 -> + List; + 2 -> + list_to_tuple(List) + end + end. + +random_sign() -> + case rand:uniform(2) of + 1 -> -1.0; + 2 -> 1.0 + end. + +random_binary() -> + list_to_binary(random_bytes(rand:uniform(32) - 1)). + +random_bytes(0) -> + []; +random_bytes(N) when N > 0 -> + [rand:uniform(256) - 1 | random_bytes(N - 1)]. + +random_pid() -> + Processes = erlang:processes(), + lists:nth(rand:uniform(length(Processes)), Processes). %% The NIFs: lib_version() -> undefined. @@ -2621,6 +2775,7 @@ type_test() -> ?nif_stub. tuple_2_list(_) -> ?nif_stub. is_identical(_,_) -> ?nif_stub. compare(_,_) -> ?nif_stub. +hash_nif(_Type, _Term, _Salt) -> ?nif_stub. many_args_100(_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_) -> ?nif_stub. clone_bin(_) -> ?nif_stub. make_sub_bin(_,_,_) -> ?nif_stub. diff --git a/erts/emulator/test/nif_SUITE_data/nif_SUITE.c b/erts/emulator/test/nif_SUITE_data/nif_SUITE.c index 8fe5ee809a..a255c9f096 100644 --- a/erts/emulator/test/nif_SUITE_data/nif_SUITE.c +++ b/erts/emulator/test/nif_SUITE_data/nif_SUITE.c @@ -687,6 +687,31 @@ static ERL_NIF_TERM compare(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) return enif_make_int(env, enif_compare(argv[0],argv[1])); } +static ERL_NIF_TERM hash_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + if (argc != 3) { + return enif_make_badarg(env); + } + + ErlNifHash type; + if (enif_is_identical(argv[0], enif_make_atom(env, "internal"))) { + type = ERL_NIF_INTERNAL_HASH; + } + else if (enif_is_identical(argv[0], enif_make_atom(env, "phash2"))) { + type = ERL_NIF_PHASH2; + } + else { + return enif_make_badarg(env); + } + + ErlNifUInt64 salt; + if (! enif_get_uint64(env, argv[2], &salt)) { + return enif_make_badarg(env); + } + + return enif_make_uint64(env, enif_hash(type, argv[1], salt)); +} + static ERL_NIF_TERM many_args_100(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { int i, k; @@ -2864,6 +2889,7 @@ static ErlNifFunc nif_funcs[] = {"tuple_2_list", 1, tuple_2_list}, {"is_identical",2,is_identical}, {"compare",2,compare}, + {"hash_nif",3,hash_nif}, {"many_args_100", 100, many_args_100}, {"clone_bin", 1, clone_bin}, {"make_sub_bin", 3, make_sub_bin}, |