aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSverker Eriksson <[email protected]>2017-04-28 14:54:42 +0200
committerGitHub <[email protected]>2017-04-28 14:54:42 +0200
commit61e55780e2800e340e8ff16b5414f08373f89ef3 (patch)
treec584727c8695ac4c72c4fae9deff18931a8c54d4
parent2e2526b58f74c6c3209b3feca34866772be65335 (diff)
parente40ec046a2a1037b2f87b657503c5f21c5de4e2a (diff)
downloadotp-61e55780e2800e340e8ff16b5414f08373f89ef3.tar.gz
otp-61e55780e2800e340e8ff16b5414f08373f89ef3.tar.bz2
otp-61e55780e2800e340e8ff16b5414f08373f89ef3.zip
Merge PR1413 from g-andrade/feature/phash2_nif
Support hashing terms from NIF code
-rw-r--r--erts/doc/src/erl_nif.xml36
-rw-r--r--erts/emulator/beam/erl_bif_info.c2
-rw-r--r--erts/emulator/beam/erl_db_hash.c2
-rw-r--r--erts/emulator/beam/erl_map.h2
-rw-r--r--erts/emulator/beam/erl_nif.c17
-rw-r--r--erts/emulator/beam/erl_nif.h5
-rw-r--r--erts/emulator/beam/erl_nif_api_funcs.h2
-rw-r--r--erts/emulator/beam/erl_process_dict.c2
-rw-r--r--erts/emulator/beam/erl_trace.c2
-rw-r--r--erts/emulator/beam/erl_utils.h2
-rw-r--r--erts/emulator/beam/utils.c10
-rw-r--r--erts/emulator/test/nif_SUITE.erl159
-rw-r--r--erts/emulator/test/nif_SUITE_data/nif_SUITE.c26
13 files changed, 254 insertions, 13 deletions
diff --git a/erts/doc/src/erl_nif.xml b/erts/doc/src/erl_nif.xml
index 6bb1109415..05b519fe7d 100644
--- a/erts/doc/src/erl_nif.xml
+++ b/erts/doc/src/erl_nif.xml
@@ -813,6 +813,29 @@ typedef enum {
</item>
</taglist>
</item>
+ <tag><marker id="ErlNifHash"/><c>ErlNifHash</c></tag>
+ <item>
+ <p>An enumeration of the supported hash types that can be generated
+ using <seealso marker="#enif_hash"><c>enif_hash</c></seealso>.
+ </p>
+ <taglist>
+ <tag><c>ERL_NIF_INTERNAL_HASH</c></tag>
+ <item>
+ <p>Non-portable hash function that only guarantees the same hash
+ for the same term within one Erlang VM instance.</p>
+ <p>It takes 32-bit salt values and generates hashes within <c>0..2^32-1</c>.</p>
+ </item>
+ <tag><c>ERL_NIF_PHASH2</c></tag>
+ <item>
+ <p>Portable hash function that gives the same hash for the
+ same Erlang term regardless of machine architecture and ERTS version.</p>
+ <p><em>It ignores salt values</em> and generates hashes within <c>0..2^27-1</c>.</p>
+ <p>Slower than <c>ERL_NIF_INTERNAL_HASH.</c>
+ It corresponds to <seealso marker="erlang#phash2-1"><c>erlang:phash2/1</c></seealso>.
+ </p>
+ </item>
+ </taglist>
+ </item>
</taglist>
</section>
@@ -1387,6 +1410,19 @@ typedef enum {
</func>
<func>
+ <name>
+ <ret>ErlNifUInt64</ret>
+ <nametext>enif_hash(ErlNifHash type, ERL_NIF_TERM term, ErlNifUInt64 salt)</nametext>
+ </name>
+ <fsummary>Hash terms.</fsummary>
+ <desc>
+ <p>Hashes <c>term</c> according to the specified
+ <seealso marker="#ErlNifHash"><c>ErlNifHash</c></seealso> <c>type</c>.</p>
+ <p>Ranges of taken salt (if any) and returned value depend on the hash type.</p>
+ </desc>
+ </func>
+
+ <func>
<name><ret>int</ret><nametext>enif_inspect_binary(ErlNifEnv* env,
ERL_NIF_TERM bin_term, ErlNifBinary* bin)</nametext></name>
<fsummary>Inspect the content of a binary.</fsummary>
diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c
index 3a70c6036b..1a680b127c 100644
--- a/erts/emulator/beam/erl_bif_info.c
+++ b/erts/emulator/beam/erl_bif_info.c
@@ -3950,7 +3950,7 @@ BIF_RETTYPE erts_debug_get_internal_state_1(BIF_ALIST_1)
BIF_RET(erts_debug_reader_groups_map(BIF_P, (int) groups));
}
else if (ERTS_IS_ATOM_STR("internal_hash", tp[1])) {
- Uint hash = (Uint) make_internal_hash(tp[2]);
+ Uint hash = (Uint) make_internal_hash(tp[2], 0);
Uint hsz = 0;
Eterm* hp;
erts_bld_uint(NULL, &hsz, hash);
diff --git a/erts/emulator/beam/erl_db_hash.c b/erts/emulator/beam/erl_db_hash.c
index 80c4824eeb..9009c00833 100644
--- a/erts/emulator/beam/erl_db_hash.c
+++ b/erts/emulator/beam/erl_db_hash.c
@@ -189,7 +189,7 @@ static ERTS_INLINE int add_fixed_deletion(DbTableHash* tb, int ix,
/* optimised version of make_hash (normal case? atomic key) */
#define MAKE_HASH(term) \
((is_atom(term) ? (atom_tab(atom_val(term))->slot.bucket.hvalue) : \
- make_internal_hash(term)) % MAX_HASH)
+ make_internal_hash(term, 0)) % MAX_HASH)
#ifdef ERTS_SMP
# define DB_HASH_LOCK_MASK (DB_HASH_LOCK_CNT-1)
diff --git a/erts/emulator/beam/erl_map.h b/erts/emulator/beam/erl_map.h
index 61a841f7f0..f7d0413685 100644
--- a/erts/emulator/beam/erl_map.h
+++ b/erts/emulator/beam/erl_map.h
@@ -57,7 +57,7 @@ typedef struct flatmap_s {
#define hashmap_size(x) (((hashmap_head_t*) hashmap_val(x))->size)
-#define hashmap_make_hash(Key) make_internal_hash(Key)
+#define hashmap_make_hash(Key) make_internal_hash(Key, 0)
#define hashmap_restore_hash(Heap,Lvl,Key) \
(((Lvl) < 8) ? hashmap_make_hash(Key) >> (4*(Lvl)) : hashmap_make_hash(CONS(Heap, make_small((Lvl)>>3), (Key))) >> (4*((Lvl) & 7)))
diff --git a/erts/emulator/beam/erl_nif.c b/erts/emulator/beam/erl_nif.c
index 872b58d1ef..e041fd7b83 100644
--- a/erts/emulator/beam/erl_nif.c
+++ b/erts/emulator/beam/erl_nif.c
@@ -55,6 +55,7 @@
#include "dtrace-wrapper.h"
#include "erl_process.h"
#include "erl_bif_unique.h"
+#include "erl_utils.h"
#undef ERTS_WANT_NFUNC_SCHED_INTERNALS__
#define ERTS_WANT_NFUNC_SCHED_INTERNALS__
#include "erl_nfunc_sched.h"
@@ -1210,6 +1211,22 @@ int enif_compare(Eterm lhs, Eterm rhs)
return result;
}
+ErlNifUInt64 enif_hash(ErlNifHash type, Eterm term, ErlNifUInt64 salt)
+{
+ switch (type) {
+ case ERL_NIF_INTERNAL_HASH:
+ return make_internal_hash(term, (Uint32) salt);
+ case ERL_NIF_PHASH2:
+ /* It appears that make_hash2 doesn't always react to seasoning
+ * as well as it should. Therefore, let's make it ignore the salt
+ * value and declare salted uses of phash2 as unsupported.
+ */
+ return make_hash2(term) & ((1 << 27) - 1);
+ default:
+ return 0;
+ }
+}
+
int enif_get_tuple(ErlNifEnv* env, Eterm tpl, int* arity, const Eterm** array)
{
Eterm* ptr;
diff --git a/erts/emulator/beam/erl_nif.h b/erts/emulator/beam/erl_nif.h
index ac45f3ac81..5a81f5fbbb 100644
--- a/erts/emulator/beam/erl_nif.h
+++ b/erts/emulator/beam/erl_nif.h
@@ -236,6 +236,11 @@ typedef enum {
ERL_NIF_BIN2TERM_SAFE = 0x20000000
} ErlNifBinaryToTerm;
+typedef enum {
+ ERL_NIF_INTERNAL_HASH = 1,
+ ERL_NIF_PHASH2 = 2
+} ErlNifHash;
+
/*
* Return values from enif_thread_type(). Negative values
* reserved for specific types of non-scheduler threads.
diff --git a/erts/emulator/beam/erl_nif_api_funcs.h b/erts/emulator/beam/erl_nif_api_funcs.h
index 01d9e386ed..b34058f303 100644
--- a/erts/emulator/beam/erl_nif_api_funcs.h
+++ b/erts/emulator/beam/erl_nif_api_funcs.h
@@ -180,6 +180,7 @@ ERL_NIF_API_FUNC_DECL(ErlNifResourceType*,enif_open_resource_type_x,(ErlNifEnv*,
ERL_NIF_API_FUNC_DECL(int, enif_monitor_process,(ErlNifEnv*,void* obj,const ErlNifPid*,ErlDrvMonitor *monitor));
ERL_NIF_API_FUNC_DECL(int, enif_demonitor_process,(ErlNifEnv*,void* obj,const ErlDrvMonitor *monitor));
ERL_NIF_API_FUNC_DECL(int, enif_compare_monitors,(const ErlNifMonitor*,const ErlNifMonitor*));
+ERL_NIF_API_FUNC_DECL(ErlNifUInt64,enif_hash,(ErlNifHash type, ERL_NIF_TERM term, ErlNifUInt64 salt));
/*
** ADD NEW ENTRIES HERE (before this comment) !!!
@@ -342,6 +343,7 @@ ERL_NIF_API_FUNC_DECL(int, enif_compare_monitors,(const ErlNifMonitor*,const Erl
# define enif_monitor_process ERL_NIF_API_FUNC_MACRO(enif_monitor_process)
# define enif_demonitor_process ERL_NIF_API_FUNC_MACRO(enif_demonitor_process)
# define enif_compare_monitors ERL_NIF_API_FUNC_MACRO(enif_compare_monitors)
+# define enif_hash ERL_NIF_API_FUNC_MACRO(enif_hash)
/*
** ADD NEW ENTRIES HERE (before this comment)
diff --git a/erts/emulator/beam/erl_process_dict.c b/erts/emulator/beam/erl_process_dict.c
index 7cfdf20341..01e240c65d 100644
--- a/erts/emulator/beam/erl_process_dict.c
+++ b/erts/emulator/beam/erl_process_dict.c
@@ -57,7 +57,7 @@
((is_small(Term)) ? (Uint32) unsigned_val(Term) : \
((is_atom(Term)) ? \
(Uint32) atom_val(Term) : \
- make_internal_hash(Term)))
+ make_internal_hash(Term, 0)))
#define PD_SZ2BYTES(Sz) (sizeof(ProcDict) + ((Sz) - 1)*sizeof(Eterm))
diff --git a/erts/emulator/beam/erl_trace.c b/erts/emulator/beam/erl_trace.c
index 870f1f142d..a5fc3a2477 100644
--- a/erts/emulator/beam/erl_trace.c
+++ b/erts/emulator/beam/erl_trace.c
@@ -3229,7 +3229,7 @@ static int tracer_cmp_fun(void* a, void* b)
static HashValue tracer_hash_fun(void* obj)
{
- return make_internal_hash(((ErtsTracerNif*)obj)->module);
+ return make_internal_hash(((ErtsTracerNif*)obj)->module, 0);
}
static void *tracer_alloc_fun(void* tmpl)
diff --git a/erts/emulator/beam/erl_utils.h b/erts/emulator/beam/erl_utils.h
index 47289a0af1..75d7e47239 100644
--- a/erts/emulator/beam/erl_utils.h
+++ b/erts/emulator/beam/erl_utils.h
@@ -120,7 +120,7 @@ int erts_is_builtin(Eterm, Eterm, int);
Uint32 block_hash(byte *, unsigned, Uint32);
Uint32 make_hash2(Eterm);
Uint32 make_hash(Eterm);
-Uint32 make_internal_hash(Eterm);
+Uint32 make_internal_hash(Eterm, Uint32 salt);
void erts_save_emu_args(int argc, char **argv);
Eterm erts_get_emu_args(struct process *c_p);
diff --git a/erts/emulator/beam/utils.c b/erts/emulator/beam/utils.c
index cdf766b206..96a7bfe8ac 100644
--- a/erts/emulator/beam/utils.c
+++ b/erts/emulator/beam/utils.c
@@ -1578,13 +1578,13 @@ do { /* Lightweight mixing of constant (type info) */ \
} while (0)
Uint32
-make_internal_hash(Eterm term)
+make_internal_hash(Eterm term, Uint32 salt)
{
Uint32 hash;
/* Optimization. Simple cases before declaration of estack. */
if (primary_tag(term) == TAG_PRIMARY_IMMED1) {
- hash = 0;
+ hash = salt;
#if ERTS_SIZEOF_ETERM == 8
UINT32_HASH_2((Uint32)term, (Uint32)(term >> 32), HCONST);
#elif ERTS_SIZEOF_ETERM == 4
@@ -1598,7 +1598,7 @@ make_internal_hash(Eterm term)
Eterm tmp;
DECLARE_ESTACK(s);
- hash = 0;
+ hash = salt;
for (;;) {
switch (primary_tag(term)) {
case TAG_PRIMARY_LIST:
@@ -1874,7 +1874,7 @@ make_internal_hash(Eterm term)
goto pop_next;
}
default:
- erts_exit(ERTS_ERROR_EXIT, "Invalid tag in make_hash2(0x%X)\n", term);
+ erts_exit(ERTS_ERROR_EXIT, "Invalid tag in make_internal_hash(0x%X, %lu)\n", term, salt);
}
}
break;
@@ -1887,7 +1887,7 @@ make_internal_hash(Eterm term)
goto pop_next;
default:
- erts_exit(ERTS_ERROR_EXIT, "Invalid tag in make_hash2(0x%X)\n", term);
+ erts_exit(ERTS_ERROR_EXIT, "Invalid tag in make_internal_hash(0x%X, %lu)\n", term, salt);
pop_next:
if (ESTACK_ISEMPTY(s)) {
diff --git a/erts/emulator/test/nif_SUITE.erl b/erts/emulator/test/nif_SUITE.erl
index 693db42e58..8ad11d3bf3 100644
--- a/erts/emulator/test/nif_SUITE.erl
+++ b/erts/emulator/test/nif_SUITE.erl
@@ -56,7 +56,10 @@
nif_is_process_alive/1, nif_is_port_alive/1,
nif_term_to_binary/1, nif_binary_to_term/1,
nif_port_command/1,
- nif_snprintf/1
+ nif_snprintf/1,
+ nif_internal_hash/1,
+ nif_internal_hash_salted/1,
+ nif_phash2/1
]).
-export([many_args_100/100]).
@@ -90,7 +93,10 @@ all() ->
nif_is_process_alive, nif_is_port_alive,
nif_term_to_binary, nif_binary_to_term,
nif_port_command,
- nif_snprintf].
+ nif_snprintf,
+ nif_internal_hash,
+ nif_internal_hash_salted,
+ nif_phash2].
groups() ->
[{G, [], api_repeaters()} || G <- api_groups()]
@@ -2610,6 +2616,154 @@ nif_snprintf(Config) ->
<<"{{hello,world,-33},",0>> = format_term_nif(20,{{hello,world, -33}, 3.14, self()}),
ok.
+nif_internal_hash(Config) ->
+ ensure_lib_loaded(Config),
+ HashValueBitSize = nif_hash_result_bitsize(internal),
+ Terms = unique([random_term() || _ <- lists:seq(1, 5000)]),
+ HashValues = [hash_nif(internal, Term, 0) || Term <- Terms],
+ test_bit_distribution_fitness(HashValues, HashValueBitSize, 0.05).
+
+nif_internal_hash_salted(Config) ->
+ ensure_lib_loaded(Config),
+ test_salted_nif_hash(internal).
+
+nif_phash2(Config) ->
+ ensure_lib_loaded(Config),
+ HashValueBitSize = nif_hash_result_bitsize(phash2),
+ Terms = unique([random_term() || _ <- lists:seq(1, 5000)]),
+ HashValues =
+ lists:map(
+ fun (Term) ->
+ HashValue = erlang:phash2(Term),
+ Salt = random_uint32(), % phash2 should ignore salt
+ NifHashValue = hash_nif(phash2, Term, Salt),
+ (HashValue =:= NifHashValue
+ orelse ct:fail("Expected: ~p\nActual: ~p",
+ [HashValue, NifHashValue])),
+ HashValue
+ end,
+ Terms),
+ test_bit_distribution_fitness(HashValues, HashValueBitSize, 0.05).
+
+test_salted_nif_hash(HashType) ->
+ HashValueBitSize = nif_hash_result_bitsize(HashType),
+ Terms = unique([random_term() || _ <- lists:seq(1, 5000)]),
+ Salts = unique([random_uint32() || _ <- lists:seq(1, 100)]),
+ {HashValuesPerSalt, HashValuesPerTerm} =
+ lists:mapfoldl(
+ fun (Salt, Acc) ->
+ {HashValues, NewAcc} =
+ lists:mapfoldl(
+ fun (Term, AccB) ->
+ HashValue = hash_nif(HashType, Term, Salt),
+ NewAccB = dict:append(Term, HashValue, AccB),
+ {HashValue, NewAccB}
+ end,
+ Acc,
+ Terms),
+ {{Salt, HashValues}, NewAcc}
+ end,
+ dict:new(),
+ Salts),
+
+ % Test per-salt hash distribution of different terms
+ lists:foreach(
+ fun ({_Salt, HashValues}) ->
+ test_bit_distribution_fitness(HashValues, HashValueBitSize, 0.05)
+ end,
+ HashValuesPerSalt),
+
+ % Test per-term hash distribution of different salts
+ dict:fold(
+ fun (_Term, HashValues, Acc) ->
+ % Be more tolerant of relative deviation,
+ % as there's fewer hash values here.
+ test_bit_distribution_fitness(HashValues, HashValueBitSize, 0.30),
+ Acc
+ end,
+ ok,
+ HashValuesPerTerm).
+
+test_bit_distribution_fitness(Integers, BitSize, MaxRelativeDeviation) ->
+ MaxInteger = (1 bsl BitSize) - 1,
+ OnesPerBit =
+ lists:foldl(
+ fun (Integer, Acc) when Integer >= 0, Integer =< MaxInteger ->
+ lists:foldl(
+ fun (BitIndex, AccB) ->
+ BitValue = (Integer band (1 bsl BitIndex)) bsr BitIndex,
+ orddict:update_counter(BitIndex, BitValue, AccB)
+ end,
+ Acc,
+ lists:seq(0, BitSize - 1))
+ end,
+ orddict:new(),
+ Integers),
+
+ ExpectedNrOfOnes = length(Integers) div 2,
+ FailureText =
+ orddict:fold(
+ fun (BitIndex, NrOfOnes, Acc) ->
+ RelativeDeviation = abs(NrOfOnes - ExpectedNrOfOnes) / length(Integers),
+ case RelativeDeviation >= MaxRelativeDeviation of
+ false -> Acc;
+ true ->
+ [Acc,
+ io_lib:format(
+ "Unreasonable deviation on number of set bits (i=~p): "
+ "expected ~p, got ~p (relative dev. ~.3f)~n",
+ [BitIndex, ExpectedNrOfOnes, NrOfOnes, RelativeDeviation])]
+ end
+ end,
+ [],
+ OnesPerBit),
+
+ (FailureText =:= [] orelse ct:fail(FailureText)).
+
+nif_hash_result_bitsize(internal) -> 32;
+nif_hash_result_bitsize(phash2) -> 27.
+
+unique(List) ->
+ lists:usort(List).
+
+random_uint32() ->
+ rand:uniform(1 bsl 32) - 1.
+
+random_term() ->
+ case rand:uniform(6) of
+ 1 -> rand:uniform(1 bsl 27) - 1; % small
+ 2 -> (1 bsl 27) + rand:uniform(1 bsl 128); % big
+ 3 -> random_sign() * (rand:uniform() * (1 bsl 53)); % float
+ 4 -> random_binary();
+ 5 -> random_pid();
+ 6 ->
+ Length = rand:uniform(10),
+ List = [random_term() || _ <- lists:seq(1, Length)],
+ case rand:uniform(2) of
+ 1 ->
+ List;
+ 2 ->
+ list_to_tuple(List)
+ end
+ end.
+
+random_sign() ->
+ case rand:uniform(2) of
+ 1 -> -1.0;
+ 2 -> 1.0
+ end.
+
+random_binary() ->
+ list_to_binary(random_bytes(rand:uniform(32) - 1)).
+
+random_bytes(0) ->
+ [];
+random_bytes(N) when N > 0 ->
+ [rand:uniform(256) - 1 | random_bytes(N - 1)].
+
+random_pid() ->
+ Processes = erlang:processes(),
+ lists:nth(rand:uniform(length(Processes)), Processes).
%% The NIFs:
lib_version() -> undefined.
@@ -2621,6 +2775,7 @@ type_test() -> ?nif_stub.
tuple_2_list(_) -> ?nif_stub.
is_identical(_,_) -> ?nif_stub.
compare(_,_) -> ?nif_stub.
+hash_nif(_Type, _Term, _Salt) -> ?nif_stub.
many_args_100(_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_) -> ?nif_stub.
clone_bin(_) -> ?nif_stub.
make_sub_bin(_,_,_) -> ?nif_stub.
diff --git a/erts/emulator/test/nif_SUITE_data/nif_SUITE.c b/erts/emulator/test/nif_SUITE_data/nif_SUITE.c
index 8fe5ee809a..a255c9f096 100644
--- a/erts/emulator/test/nif_SUITE_data/nif_SUITE.c
+++ b/erts/emulator/test/nif_SUITE_data/nif_SUITE.c
@@ -687,6 +687,31 @@ static ERL_NIF_TERM compare(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
return enif_make_int(env, enif_compare(argv[0],argv[1]));
}
+static ERL_NIF_TERM hash_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+ if (argc != 3) {
+ return enif_make_badarg(env);
+ }
+
+ ErlNifHash type;
+ if (enif_is_identical(argv[0], enif_make_atom(env, "internal"))) {
+ type = ERL_NIF_INTERNAL_HASH;
+ }
+ else if (enif_is_identical(argv[0], enif_make_atom(env, "phash2"))) {
+ type = ERL_NIF_PHASH2;
+ }
+ else {
+ return enif_make_badarg(env);
+ }
+
+ ErlNifUInt64 salt;
+ if (! enif_get_uint64(env, argv[2], &salt)) {
+ return enif_make_badarg(env);
+ }
+
+ return enif_make_uint64(env, enif_hash(type, argv[1], salt));
+}
+
static ERL_NIF_TERM many_args_100(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
{
int i, k;
@@ -2864,6 +2889,7 @@ static ErlNifFunc nif_funcs[] =
{"tuple_2_list", 1, tuple_2_list},
{"is_identical",2,is_identical},
{"compare",2,compare},
+ {"hash_nif",3,hash_nif},
{"many_args_100", 100, many_args_100},
{"clone_bin", 1, clone_bin},
{"make_sub_bin", 3, make_sub_bin},