From 0149a73d15df1f80cb46752ec3829f48c38dd230 Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Thu, 21 Sep 2017 09:20:30 +0200 Subject: erts: Implement maps path iterator --- erts/emulator/beam/bif.tab | 8 +- erts/emulator/beam/erl_map.c | 288 +++++++++++++++++++++++++++++++-------- erts/emulator/beam/erl_map.h | 4 +- erts/emulator/test/map_SUITE.erl | 51 +++++++ 4 files changed, 289 insertions(+), 62 deletions(-) (limited to 'erts/emulator') diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab index 4ee5af14bc..f2c8331850 100644 --- a/erts/emulator/beam/bif.tab +++ b/erts/emulator/beam/bif.tab @@ -689,6 +689,10 @@ bif erts_internal:maps_to_list/2 # # New in 20.1 # - bif erlang:iolist_to_iovec/1 -bif erts_internal:map_get_index/2 \ No newline at end of file + +# +# New in 21.0 +# + +bif erts_internal:map_next/2 diff --git a/erts/emulator/beam/erl_map.c b/erts/emulator/beam/erl_map.c index 37fcedbc6d..a5fda7b86d 100644 --- a/erts/emulator/beam/erl_map.c +++ b/erts/emulator/beam/erl_map.c @@ -90,6 +90,7 @@ static BIF_RETTYPE hashmap_merge(Process *p, Eterm nodeA, Eterm nodeB, int swap_ struct HashmapMergeContext_*); static Export hashmap_merge_trap_export; static BIF_RETTYPE maps_merge_trap_1(BIF_ALIST_1); +static Uint hashmap_subtree_size(Eterm node); static Eterm hashmap_to_list(Process *p, Eterm map, Sint n); static Eterm hashmap_keys(Process *p, Eterm map); static Eterm hashmap_values(Process *p, Eterm map); @@ -1518,7 +1519,7 @@ trap: /* Yield */ return trap_ret; } -Uint hashmap_subtree_size(Eterm node) { +static Uint hashmap_subtree_size(Eterm node) { DECLARE_WSTACK(stack); Uint size = 0; @@ -3059,74 +3060,247 @@ static Eterm hashmap_bld_tuple_uint(Uint **hpp, Uint *szp, Uint n, Uint nums[]) return res; } -static Uint32 -index_to_hash(Sint idx, Uint32 hval) -{ - Uint32 new_hval = 1; - while (1) { - if (hval & 0x1) { - idx--; - if (!idx) break; - } - new_hval = new_hval << 1; - hval = hval >> 1; - } - return new_hval; -} -BIF_RETTYPE erts_internal_map_get_index_2(BIF_ALIST_2) { - Sint idx; - Uint32 hval, sz; - Eterm *ptr, hdr; +/* + * The iterator work in about the same way for both + * flat- and hash-maps. It consists of a Path, Map + * term where Path describes where to find the + * term to get. + * + * In flatmap Path is just the index of the element + * in the flatmap array. + * + * In hashmap the Path is a bit pattern that describes + * which slot we should traverse in each hashmap node. + * Since each hashmap node can only be up to 16 elements + * large we use 4 bits per level in the path. + * + * So a Path with value 0x110 will first get the 0:th + * slot in the head node, and then the 1:st slot in the + * resulting node and then finally the 1:st slot in the + * node beneath. If that slot is not a leaf, then the path + * continues down the 0:th slot until it finds a leaf and + * returns that leaf. + * + * Once the leaf has been found, the Path to the next leaf + * is built using a stack that was created while traversing + * the tree down. + * + * The index can become a bignum, which complicates the code + * a bit. However it should be very rare that this happens + * even on a 32bit system as you would need a tree of depth + * 7 or more. + */ - if (!is_hashmap(BIF_ARG_2) || is_not_small(BIF_ARG_1)) - BIF_ERROR(BIF_P, BADARG); +#define PATH_ELEM_SIZE 4 +#define PATH_ELEM_MASK 0xf +#define PATH_ELEM(PATH) ((PATH) & PATH_ELEM_MASK) +#define PATH_ELEMS_PER_DIGIT (sizeof(ErtsDigit) * 8 / PATH_ELEM_SIZE) - idx = signed_val(BIF_ARG_1); +BIF_RETTYPE erts_internal_map_next_2(BIF_ALIST_2) { - if (idx < 1 || idx > 17) + Eterm *hp = NULL, *lst, + iter, path, map, k, v; + + path = BIF_ARG_1; + map = BIF_ARG_2; + + if (!is_map(map)) BIF_ERROR(BIF_P, BADARG); - ASSERT(is_boxed(BIF_ARG_2)); + if (path == am_none) + BIF_RET(am_none); - ptr = boxed_val(BIF_ARG_2); -again: - hdr = *ptr; + if (is_flatmap(map)) { + flatmap_t *mp = (flatmap_t *)flatmap_val(map); + Uint idx, sz = flatmap_get_size(mp); - switch(hdr & _HEADER_MAP_SUBTAG_MASK) { - case HAMT_SUBTAG_HEAD_ARRAY: - ptr++; - sz = 16; - hval = 0xffff; - break; - case HAMT_SUBTAG_HEAD_BITMAP: - ptr++; - if (ptr[0] == -1) { - ptr = boxed_val(ptr[1]); - goto again; + if (is_not_small(path)) + BIF_ERROR(BIF_P, BADARG); + + idx = unsigned_val(path); + + if (sz == 0) { + if (idx == 0) + BIF_RET(am_none); + else + BIF_ERROR(BIF_P, BADARG); } - case HAMT_SUBTAG_NODE_BITMAP: - hval = MAP_HEADER_VAL(hdr); - sz = hashmap_bitcount(hval); - ASSERT(sz < 17); - break; - default: - erts_exit(ERTS_ABORT_EXIT, "bad header"); - } - - if (idx <= sz) { - if (is_list(ptr[idx])) - return ptr[idx]; - else { - Eterm *hp = HAlloc(BIF_P, HAMT_HEAD_BITMAP_SZ(1)); - hp[0] = MAP_HEADER_HAMT_HEAD_BITMAP(index_to_hash(idx, hval)); - hp[1] = -1; - hp[2] = ptr[idx]; - return make_hashmap(hp); + + k = flatmap_get_keys(mp)[idx]; + v = flatmap_get_values(mp)[idx]; + + if (idx + 1 < sz) { + path = make_small(idx + 1); + } else if (idx < sz) { + path = am_none; + } else { + BIF_ERROR(BIF_P, BADARG); } } else { - return am_none; + Uint curr_path; + Uint path_length = 0; + Uint *path_rest = NULL; + int i; + Eterm node = map; + DECLARE_WSTACK(stack); + + ASSERT(is_hashmap(node)); + + if (is_small(path)) { + curr_path = unsigned_val(path); + } else if (is_big(path)) { + Eterm *big = big_val(path); + if (bignum_header_is_neg(*big)) + BIF_ERROR(BIF_P, BADARG); + path_length = BIG_ARITY(big) - 1; + curr_path = BIG_DIGIT(big, 0); + path_rest = BIG_V(big) + 1; + } else { + BIF_ERROR(BIF_P, BADARG); + } + + /* First we look for the K and V to return using the + path given. While doing so, we push each map node + and the index onto the stack so use later when + building the next index. */ + for (i = 1; ; i++) { + Eterm *ptr = hashmap_val(node), + hdr = *ptr; + Uint sz; + + ptr++; + + switch(hdr & _HEADER_MAP_SUBTAG_MASK) { + case HAMT_SUBTAG_HEAD_ARRAY: + ptr++; + sz = 16; + break; + case HAMT_SUBTAG_HEAD_BITMAP: + ptr++; + case HAMT_SUBTAG_NODE_BITMAP: + sz = hashmap_bitcount(MAP_HEADER_VAL(hdr)); + ASSERT(sz < 17); + break; + default: + erts_exit(ERTS_ABORT_EXIT, "bad header"); + } + + if (PATH_ELEM(curr_path) >= sz) + BIF_ERROR(BIF_P, BADARG); + + WSTACK_PUSH2(stack, node, PATH_ELEM(curr_path)); + + /* We have found a leaf, return it and calculate the path to next */ + if (is_list(ptr[PATH_ELEM(curr_path)])) { + lst = list_val(ptr[PATH_ELEM(curr_path)]); + k = CAR(lst); + v = CDR(lst); + break; + } + + node = ptr[PATH_ELEM(curr_path)]; + + curr_path >>= PATH_ELEM_SIZE; + + if (i == PATH_ELEMS_PER_DIGIT) { + /* Switch to next bignum word if available, + otherwise just follow 0 path */ + i = 0; + if (path_length) { + curr_path = *path_rest; + path_length--; + path_rest++; + } else { + curr_path = 0; + } + } + } + + /* We pop the stack until we come to the next leaf to return */ + while (1) { + Uint idx = (Uint)WSTACK_POP(stack); + Eterm node = (Eterm)WSTACK_POP(stack); + Eterm *ptr = hashmap_val(node), + hdr = *ptr; + Uint sz; + + ptr++; + + switch(hdr & _HEADER_MAP_SUBTAG_MASK) { + case HAMT_SUBTAG_HEAD_ARRAY: + ptr++; + sz = 16; + break; + case HAMT_SUBTAG_HEAD_BITMAP: + ptr++; + case HAMT_SUBTAG_NODE_BITMAP: + sz = hashmap_bitcount(MAP_HEADER_VAL(hdr)); + ASSERT(sz < 17); + break; + default: + erts_exit(ERTS_ABORT_EXIT, "bad header"); + } + + if (idx + 1 < sz) { + curr_path = idx + 1; + break; + } + + /* We have reached the end of the iteration */ + if (WSTACK_ISEMPTY(stack)) { + path = am_none; + break; + } + + } + + if (path != am_none) { + Uint depth = WSTACK_COUNT(stack) / 2 + 1; + /* +1 because we already have the first element in curr_path */ + Eterm *path_digits = NULL; + + /* If the path cannot fit in a small, we allocate a bignum */ + if (depth >= PATH_ELEMS_PER_DIGIT) { + /* We need multiple ErtsDigit's to represent the path */ + int big_size = BIG_NEED_FOR_BITS(depth * PATH_ELEM_SIZE); + hp = HAlloc(BIF_P, big_size); + hp[0] = make_pos_bignum_header(big_size - BIG_NEED_SIZE(0)); + path_digits = hp + big_size - 1; + } + + /* Pop the stack to create the complete path to the next leaf */ + while(!WSTACK_ISEMPTY(stack)) { + Uint idx = (Uint)WSTACK_POP(stack); + (void)WSTACK_POP(stack); + + depth--; + if (depth % PATH_ELEMS_PER_DIGIT == 0) { + path_digits[0] = curr_path; + path_digits--; + curr_path = 0; + } + + curr_path <<= PATH_ELEM_SIZE; + curr_path |= idx; + } + + if (path_digits) { + path_digits[0] = curr_path; + path = make_big(hp); + } else { + /* The Uint could be too large for a small */ + path = erts_make_integer(curr_path, BIF_P); + } + } + + DESTROY_WSTACK(stack); } + + hp = HAlloc(BIF_P, 4 + 3 /* 3-tuple + 2-tuple */); + iter = TUPLE2(hp, path, map); + hp += 3; + BIF_RET(TUPLE3(hp, k, v, iter)); } /* implementation of builtin emulations */ diff --git a/erts/emulator/beam/erl_map.h b/erts/emulator/beam/erl_map.h index d910e98398..718d400e22 100644 --- a/erts/emulator/beam/erl_map.h +++ b/erts/emulator/beam/erl_map.h @@ -56,7 +56,7 @@ typedef struct flatmap_s { /* the head-node is a bitmap or array with an untagged size */ -#define hashmap_size(x) (((hashmap_head_t*) hashmap_val(x))->size == -1 ? hashmap_subtree_size(x) : ((hashmap_head_t*) hashmap_val(x))->size) +#define hashmap_size(x) (((hashmap_head_t*) hashmap_val(x))->size) #define hashmap_make_hash(Key) make_internal_hash(Key, 0) #define hashmap_restore_hash(Heap,Lvl,Key) \ @@ -64,7 +64,6 @@ typedef struct flatmap_s { #define hashmap_shift_hash(Heap,Hx,Lvl,Key) \ (((++(Lvl)) & 7) ? (Hx) >> 4 : hashmap_make_hash(CONS(Heap, make_small((Lvl)>>3), Key))) - /* erl_term.h stuff */ #define flatmap_get_values(x) (((Eterm *)(x)) + sizeof(flatmap_t)/sizeof(Eterm)) #define flatmap_get_keys(x) (((Eterm *)tuple_val(((flatmap_t *)(x))->keys)) + 1) @@ -106,7 +105,6 @@ Eterm erts_hashmap_from_ks_and_vs_extra(ErtsHeapFactory *factory, Eterm *ks, Eterm *vs, Uint n, Eterm k, Eterm v); -Uint hashmap_subtree_size(Eterm node); const Eterm *erts_maps_get(Eterm key, Eterm map); const Eterm *erts_hashmap_get(Uint32 hx, Eterm key, Eterm map); diff --git a/erts/emulator/test/map_SUITE.erl b/erts/emulator/test/map_SUITE.erl index 02f3c89318..3cf071b590 100644 --- a/erts/emulator/test/map_SUITE.erl +++ b/erts/emulator/test/map_SUITE.erl @@ -53,6 +53,7 @@ t_bif_map_to_list/1, t_bif_map_from_list/1, t_bif_erts_internal_maps_to_list/1, + t_bif_map_next/1, %% erlang t_erlang_hash/1, @@ -120,6 +121,7 @@ all() -> [t_build_and_match_literals, t_build_and_match_literals_large, t_bif_map_values, t_bif_map_to_list, t_bif_map_from_list, t_bif_erts_internal_maps_to_list, + t_bif_map_next, %% erlang t_erlang_hash, t_map_encode_decode, @@ -2399,6 +2401,55 @@ t_bif_erts_internal_maps_to_list(Config) when is_list(Config) -> {'EXIT', {badarg,_}} = (catch erts_internal:maps_to_list(id(#{1=>2}),id(<<>>))), ok. +t_bif_map_next(Config) when is_list(Config) -> + + erts_debug:set_internal_state(available_internal_state, true), + + try + + none = maps:next(maps:iterator(id(#{}))), + + verify_iterator(#{}), + verify_iterator(#{a => 1, b => 2, c => 3}), + + %% Use fatmap in order to test iterating in very deep maps + FM = fatmap(43), + verify_iterator(FM), + + {'EXIT', {{badmap,[{a,b},b]},_}} = (catch maps:iterator(id([{a,b},b]))), + {'EXIT', {badarg,_}} = (catch maps:next(id(a))), + {'EXIT', {badarg,_}} = (catch maps:next(id([a|FM]))), + {'EXIT', {badarg,_}} = (catch maps:next(id([1|#{}]))), + {'EXIT', {badarg,_}} = (catch maps:next(id([16#FFFFFFF|FM]))), + {'EXIT', {badarg,_}} = (catch maps:next(id([16#F0F0F0F|FM]))), + {'EXIT', {badarg,_}} = (catch maps:next(id([16#1234567890ABCDEF|FM]))), + + ok + after + erts_debug:set_internal_state(available_internal_state, false) + end. + +verify_iterator(Map) -> + KVs = t_fold(fun(K, V, A) -> [{K, V} | A] end, [], Map), + + %% Verify that KVs created by iterating Map is of + %% correct size and contains all elements + true = length(KVs) == maps:size(Map), + [maps:get(K, Map) || {K, _} <- KVs], + ok. + + +t_fold(Fun, Init, Map) -> + t_fold_1(Fun, Init, maps:iterator(Map)). + +t_fold_1(Fun, Acc, Iter) -> + case maps:next(Iter) of + {K, V, NextIter} -> + t_fold_1(Fun, Fun(K,V,Acc), NextIter); + none -> + Acc + end. + t_bif_build_and_check(Config) when is_list(Config) -> ok = check_build_and_remove(750,[fun(K) -> [K,K] end, fun(K) -> [float(K),K] end, -- cgit v1.2.3