From 70bb13c626ffbffc9c7d6fbe1d69e91dd0a853be Mon Sep 17 00:00:00 2001 From: Sverker Eriksson Date: Tue, 17 Feb 2015 19:35:59 +0100 Subject: erts: Change phash2 of maps to be sensitive to key-value combos. The old hashing did not care which value belonged to which key, for example: would hash the same. --- erts/emulator/beam/utils.c | 67 ++++++++++++++++++---------------------- erts/emulator/test/map_SUITE.erl | 18 +++++------ 2 files changed, 39 insertions(+), 46 deletions(-) (limited to 'erts/emulator') diff --git a/erts/emulator/beam/utils.c b/erts/emulator/beam/utils.c index 3f9cb5dbea..d234e8fc68 100644 --- a/erts/emulator/beam/utils.c +++ b/erts/emulator/beam/utils.c @@ -1133,10 +1133,11 @@ Uint32 make_hash2(Eterm term) { Uint32 hash; - Uint32 hash_xor_keys = 0; - Uint32 hash_xor_values = 0; + Uint32 hash_xor_pairs; DeclareTmpHeapNoproc(tmp_big,2); + ERTS_UNDEF(hash_xor_pairs, 0); + /* (HCONST * {2, ..., 16}) mod 2^32 */ #define HCONST_2 0x3c6ef372UL #define HCONST_3 0xdaa66d2bUL @@ -1155,8 +1156,8 @@ make_hash2(Eterm term) #define HCONST_16 0xe3779b90UL #define HASH_MAP_TAIL (_make_header(1,_TAG_HEADER_REF)) -#define HASH_MAP_KEY (_make_header(2,_TAG_HEADER_REF)) -#define HASH_MAP_VAL (_make_header(3,_TAG_HEADER_REF)) +#define HASH_MAP_PAIR (_make_header(2,_TAG_HEADER_REF)) +#define HASH_CDR (_make_header(3,_TAG_HEADER_REF)) #define UINT32_HASH_2(Expr1, Expr2, AConst) \ do { \ @@ -1233,9 +1234,9 @@ make_hash2(Eterm term) if (c > 0) UINT32_HASH(sh, HCONST_4); if (is_list(term)) { - term = *ptr; - tmp = *++ptr; - ESTACK_PUSH(s, tmp); + tmp = CDR(ptr); + ESTACK_PUSH(s, tmp); + term = CAR(ptr); } } break; @@ -1271,20 +1272,20 @@ make_hash2(Eterm term) if (size == 0) { goto hash2_common; } - ESTACK_PUSH4(s, hash_xor_values, hash_xor_keys, hash, HASH_MAP_TAIL); - hash = 0; - hash_xor_keys = 0; - hash_xor_values = 0; - for (i = size - 1; i >= 0; i--) { - tmp = vs[i]; - ESTACK_PUSH2(s, HASH_MAP_VAL, tmp); - } - /* We do not want to expose the tuple representation. - * Do not push the keys as a tuple. + /* We want a portable hash function that is *independent* of + * the order in which keys and values are encountered. + * We therefore calculate context independent hashes for all . + * key-value pairs and then xor them together. */ + ESTACK_PUSH(s, hash_xor_pairs); + ESTACK_PUSH(s, hash); + ESTACK_PUSH(s, HASH_MAP_TAIL); + hash = 0; + hash_xor_pairs = 0; for (i = size - 1; i >= 0; i--) { - tmp = ks[i]; - ESTACK_PUSH2(s, HASH_MAP_KEY, tmp); + ESTACK_PUSH(s, HASH_MAP_PAIR); + ESTACK_PUSH(s, vs[i]); + ESTACK_PUSH(s, ks[i]); } goto hash2_common; } @@ -1301,13 +1302,11 @@ make_hash2(Eterm term) UINT32_HASH(size, HCONST_16); if (size == 0) goto hash2_common; - ESTACK_PUSH(s, hash_xor_values); - ESTACK_PUSH(s, hash_xor_keys); + ESTACK_PUSH(s, hash_xor_pairs); ESTACK_PUSH(s, hash); ESTACK_PUSH(s, HASH_MAP_TAIL); hash = 0; - hash_xor_keys = 0; - hash_xor_values = 0; + hash_xor_pairs = 0; } switch (hdr & _HEADER_MAP_SUBTAG_MASK) { case HAMT_SUBTAG_HEAD_ARRAY: @@ -1324,10 +1323,9 @@ make_hash2(Eterm term) while (i) { if (is_list(*ptr)) { Eterm* cons = list_val(*ptr); - ESTACK_PUSH(s, HASH_MAP_KEY); - ESTACK_PUSH(s, CAR(cons)); - ESTACK_PUSH(s, HASH_MAP_VAL); + ESTACK_PUSH(s, HASH_MAP_PAIR); ESTACK_PUSH(s, CDR(cons)); + ESTACK_PUSH(s, CAR(cons)); } else { ASSERT(is_boxed(*ptr)); @@ -1437,7 +1435,8 @@ make_hash2(Eterm term) do { Uint t; Uint32 x, y; - t = i < n ? BIG_DIGIT(ptr, i++) : 0; + ASSERT(i < n); + t = BIG_DIGIT(ptr, i++); x = t & 0xffffffff; y = t >> 32; UINT32_HASH_2(x, y, con); @@ -1545,18 +1544,12 @@ make_hash2(Eterm term) switch (term) { case HASH_MAP_TAIL: { hash = (Uint32) ESTACK_POP(s); - UINT32_HASH(hash_xor_keys, HCONST_16); - UINT32_HASH(hash_xor_values, HCONST_16); - hash_xor_keys = (Uint32) ESTACK_POP(s); - hash_xor_values = (Uint32) ESTACK_POP(s); + UINT32_HASH(hash_xor_pairs, HCONST_19); + hash_xor_pairs = (Uint32) ESTACK_POP(s); goto hash2_common; } - case HASH_MAP_KEY: - hash_xor_keys ^= hash; - hash = 0; - goto hash2_common; - case HASH_MAP_VAL: - hash_xor_values ^= hash; + case HASH_MAP_PAIR: + hash_xor_pairs ^= hash; hash = 0; goto hash2_common; default: diff --git a/erts/emulator/test/map_SUITE.erl b/erts/emulator/test/map_SUITE.erl index 5944450f33..0205c52c98 100644 --- a/erts/emulator/test/map_SUITE.erl +++ b/erts/emulator/test/map_SUITE.erl @@ -976,21 +976,21 @@ t_erlang_hash(Config) when is_list(Config) -> t_bif_erlang_phash2() -> 39679005 = erlang:phash2(#{}), - 78942764 = erlang:phash2(#{ a => 1, "a" => 2, <<"a">> => 3, {a,b} => 4 }), - 37338230 = erlang:phash2(#{ 1 => a, 2 => "a", 3 => <<"a">>, 4 => {a,b} }), - 14363616 = erlang:phash2(#{ 1 => a }), - 51612236 = erlang:phash2(#{ a => 1 }), + 33667975 = erlang:phash2(#{ a => 1, "a" => 2, <<"a">> => 3, {a,b} => 4 }), % 78942764 + 95332690 = erlang:phash2(#{ 1 => a, 2 => "a", 3 => <<"a">>, 4 => {a,b} }), % 37338230 + 108954384 = erlang:phash2(#{ 1 => a }), % 14363616 + 59617982 = erlang:phash2(#{ a => 1 }), % 51612236 - 37468437 = erlang:phash2(#{{} => <<>>}), - 44049159 = erlang:phash2(#{<<>> => {}}), + 42770201 = erlang:phash2(#{{} => <<>>}), % 37468437 + 71687700 = erlang:phash2(#{<<>> => {}}), % 44049159 M0 = #{ a => 1, "key" => <<"value">> }, M1 = maps:remove("key",M0), M2 = M1#{ "key" => <<"value">> }, - 118679416 = erlang:phash2(M0), - 51612236 = erlang:phash2(M1), - 118679416 = erlang:phash2(M2), + 70249457 = erlang:phash2(M0), % 118679416 + 59617982 = erlang:phash2(M1), % 51612236 + 70249457 = erlang:phash2(M2), % 118679416 ok. t_bif_erlang_phash() -> -- cgit v1.2.3