diff options
Diffstat (limited to 'erts/emulator')
28 files changed, 5193 insertions, 994 deletions
diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index e847403882..6526e87e4c 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -699,7 +699,7 @@ void** beam_ops; Fail; \ } -#define IsMap(Src, Fail) if (is_not_map(Src)) { Fail; } +#define IsMap(Src, Fail) if (!is_map(Src)) { Fail; } #define HasMapField(Src, Key, Fail) if (has_not_map_field(Src, Key)) { Fail; } @@ -2392,7 +2392,7 @@ void process_main(void) } OpCase(i_has_map_fields_fsI): { - map_t* mp; + flatmap_t* mp; Eterm map; Eterm field; Eterm *ks; @@ -2400,22 +2400,34 @@ void process_main(void) Uint sz,n; GetArg1(1, map); + n = (Uint)Arg(2); + fs = &Arg(3); /* pattern fields */ - /* this instruction assumes Arg1 is a map, - * i.e. that it follows a test is_map if needed. - */ + /* get term from field? */ + if (is_hashmap(map)) { + Uint32 hx; + while(n--) { + field = *fs++; + hx = hashmap_make_hash(field); + if (!erts_hashmap_get(hx,field,map)) { + SET_I((BeamInstr *) Arg(0)); + goto has_map_fields_fail; + } + } + goto has_map_fields_ok; + } + + ASSERT(is_flatmap(map)); - mp = (map_t *)map_val(map); - sz = map_get_size(mp); + mp = (flatmap_t *)flatmap_val(map); + sz = flatmap_get_size(mp); if (sz == 0) { SET_I((BeamInstr *) Arg(0)); goto has_map_fields_fail; } - ks = map_get_keys(mp); - n = (Uint)Arg(2); - fs = &Arg(3); /* pattern fields */ + ks = flatmap_get_keys(mp); ASSERT(n>0); @@ -2433,7 +2445,7 @@ void process_main(void) SET_I((BeamInstr *) Arg(0)); goto has_map_fields_fail; } - +has_map_fields_ok: I += 4 + Arg(2); has_map_fields_fail: ASSERT(VALID_INSTR(*I)); @@ -2460,12 +2472,8 @@ do { \ OpCase(i_get_map_elements_fsI): { Eterm map; - map_t *mp; - Eterm field; - Eterm *ks; - Eterm *vs; BeamInstr *fs; - Uint sz,n; + Uint sz, n; GetArg1(1, map); @@ -2473,36 +2481,56 @@ do { \ * i.e. that it follows a test is_map if needed. */ - mp = (map_t *)map_val(map); - sz = map_get_size(mp); - - if (sz == 0) { - SET_I((BeamInstr *) Arg(0)); - goto get_map_elements_fail; - } - n = (Uint)Arg(2) / 2; fs = &Arg(3); /* pattern fields and target registers */ - ks = map_get_keys(mp); - vs = map_get_values(mp); - while(sz) { - field = (Eterm)*fs; - if (EQ(field,*ks)) { - PUT_TERM_REG(*vs, fs[1]); - n--; + if (is_flatmap(map)) { + flatmap_t *mp; + Eterm *ks; + Eterm *vs; + + mp = (flatmap_t *)flatmap_val(map); + sz = flatmap_get_size(mp); + + if (sz == 0) { + SET_I((BeamInstr *) Arg(0)); + goto get_map_elements_fail; + } + + ks = flatmap_get_keys(mp); + vs = flatmap_get_values(mp); + + while(sz) { + if (EQ((Eterm)*fs,*ks)) { + PUT_TERM_REG(*vs, fs[1]); + n--; + fs += 2; + /* no more values to fetch, we are done */ + if (n == 0) break; + } + ks++; sz--; + vs++; + } + + if (n) { + SET_I((BeamInstr *) Arg(0)); + goto get_map_elements_fail; + } + } else { + const Eterm *v; + Uint32 hx; + ASSERT(is_hashmap(map)); + while(n--) { + hx = hashmap_make_hash((Eterm)*fs); + if ((v = erts_hashmap_get(hx,(Eterm)*fs, map)) == NULL) { + SET_I((BeamInstr *) Arg(0)); + goto get_map_elements_fail; + } + PUT_TERM_REG(*v, fs[1]); fs += 2; - /* no more values to fetch, we are done */ - if (n == 0) break; } - ks++; sz--; - vs++; } - if (n) { - SET_I((BeamInstr *) Arg(0)); - goto get_map_elements_fail; - } I += 4 + Arg(2); get_map_elements_fail: @@ -2801,6 +2829,7 @@ get_map_elements_fail: } PreFetch(1, next); ASSERT(!ERTS_PROC_IS_EXITING(c_p)); + ERTS_VERIFY_UNUSED_TEMP_ALLOC(c_p); reg[0] = r(0); result = (*bf)(c_p, reg, I); ASSERT(!ERTS_PROC_IS_EXITING(c_p) || is_non_value(result)); @@ -6443,55 +6472,69 @@ new_fun(Process* p, Eterm* reg, ErlFunEntry* fe, int num_free) static int has_not_map_field(Eterm map, Eterm key) { - map_t* mp; - Eterm* keys; - Uint i; - Uint n; - - mp = (map_t *)map_val(map); - keys = map_get_keys(mp); - n = map_get_size(mp); - if (is_immed(key)) { - for (i = 0; i < n; i++) { - if (keys[i] == key) { - return 0; + Uint32 hx; + if (is_flatmap(map)) { + flatmap_t* mp; + Eterm* keys; + Uint i; + Uint n; + + mp = (flatmap_t *)flatmap_val(map); + keys = flatmap_get_keys(mp); + n = flatmap_get_size(mp); + if (is_immed(key)) { + for (i = 0; i < n; i++) { + if (keys[i] == key) { + return 0; + } } - } - } else { - for (i = 0; i < n; i++) { - if (EQ(keys[i], key)) { - return 0; + } else { + for (i = 0; i < n; i++) { + if (EQ(keys[i], key)) { + return 0; + } } } + return 1; } - return 1; + ASSERT(is_hashmap(map)); + hx = hashmap_make_hash(key); + return erts_hashmap_get(hx,key,map) ? 0 : 1; } static Eterm get_map_element(Eterm map, Eterm key) { - map_t *mp; - Eterm* ks, *vs; - Uint i; - Uint n; - - mp = (map_t *)map_val(map); - ks = map_get_keys(mp); - vs = map_get_values(mp); - n = map_get_size(mp); - if (is_immed(key)) { - for (i = 0; i < n; i++) { - if (ks[i] == key) { - return vs[i]; + Uint32 hx; + const Eterm *vs; + if (is_flatmap(map)) { + flatmap_t *mp; + Eterm *ks; + Uint i; + Uint n; + + mp = (flatmap_t *)flatmap_val(map); + ks = flatmap_get_keys(mp); + vs = flatmap_get_values(mp); + n = flatmap_get_size(mp); + if (is_immed(key)) { + for (i = 0; i < n; i++) { + if (ks[i] == key) { + return vs[i]; + } } - } - } else { - for (i = 0; i < n; i++) { - if (EQ(ks[i], key)) { - return vs[i]; + } else { + for (i = 0; i < n; i++) { + if (EQ(ks[i], key)) { + return vs[i]; + } } } + return THE_NON_VALUE; } - return THE_NON_VALUE; + ASSERT(is_hashmap(map)); + hx = hashmap_make_hash(key); + vs = erts_hashmap_get(hx,key,map); + return vs ? *vs : THE_NON_VALUE; } #define GET_TERM(term, dest) \ @@ -6524,7 +6567,30 @@ new_map(Process* p, Eterm* reg, BeamInstr* I) Eterm *mhp,*thp; Eterm *E; BeamInstr *ptr; - map_t *mp; + flatmap_t *mp; + ErtsHeapFactory factory; + + ptr = &Arg(4); + + if (n > 2*MAP_SMALL_MAP_LIMIT) { + if (HeapWordsLeft(p) < n) { + erts_garbage_collect(p, n, reg, Arg(2)); + } + + mhp = p->htop; + thp = p->htop; + E = p->stop; + + for (i = 0; i < n/2; i++) { + GET_TERM(*ptr++, *mhp++); + GET_TERM(*ptr++, *mhp++); + } + + p->htop = mhp; + + factory.p = p; + return erts_hashmap_from_array(&factory, thp, n/2, 0); + } if (HeapWordsLeft(p) < need) { erts_garbage_collect(p, need, reg, Arg(2)); @@ -6533,11 +6599,10 @@ new_map(Process* p, Eterm* reg, BeamInstr* I) thp = p->htop; mhp = thp + 1 + n/2; E = p->stop; - ptr = &Arg(4); keys = make_tuple(thp); *thp++ = make_arityval(n/2); - mp = (map_t *)mhp; mhp += MAP_HEADER_SIZE; + mp = (flatmap_t *)mhp; mhp += MAP_HEADER_SIZE; mp->thing_word = MAP_HEADER; mp->size = n/2; mp->keys = keys; @@ -6547,7 +6612,7 @@ new_map(Process* p, Eterm* reg, BeamInstr* I) GET_TERM(*ptr++, *mhp++); } p->htop = mhp; - return make_map(mp); + return make_flatmap(mp); } static Eterm @@ -6557,7 +6622,7 @@ update_map_assoc(Process* p, Eterm* reg, Eterm map, BeamInstr* I) Uint num_old; Uint num_updates; Uint need; - map_t *old_mp, *mp; + flatmap_t *old_mp, *mp; Eterm res; Eterm* hp; Eterm* E; @@ -6567,12 +6632,44 @@ update_map_assoc(Process* p, Eterm* reg, Eterm map, BeamInstr* I) Eterm new_key; Eterm* kp; - if (is_not_map(map)) { - return THE_NON_VALUE; + new_p = &Arg(5); + num_updates = Arg(4) / 2; + + if (is_not_flatmap(map)) { + Uint32 hx; + Eterm val; + + /* apparently the compiler does not emit is_map instructions, + * bad compiler */ + + if (is_not_hashmap(map)) + return THE_NON_VALUE; + + res = map; + E = p->stop; + while(num_updates--) { + /* assoc can't fail */ + GET_TERM(new_p[0], new_key); + GET_TERM(new_p[1], val); + hx = hashmap_make_hash(new_key); + + res = erts_hashmap_insert(p, hx, new_key, val, res, 0); + if (p->mbuf) { + Uint live = Arg(3); + reg[live] = res; + erts_garbage_collect(p, 0, reg, live+1); + res = reg[live]; + } + + E = p->stop; + + new_p += 2; + } + return res; } - old_mp = (map_t *) map_val(map); - num_old = map_get_size(old_mp); + old_mp = (flatmap_t *) flatmap_val(map); + num_old = flatmap_get_size(old_mp); /* * If the old map is empty, create a new map. @@ -6587,14 +6684,13 @@ update_map_assoc(Process* p, Eterm* reg, Eterm map, BeamInstr* I) * update list are new). */ - num_updates = Arg(4) / 2; need = 2*(num_old+num_updates) + 1 + MAP_HEADER_SIZE; if (HeapWordsLeft(p) < need) { Uint live = Arg(3); reg[live] = map; erts_garbage_collect(p, need, reg, live+1); map = reg[live]; - old_mp = (map_t *)map_val(map); + old_mp = (flatmap_t *)flatmap_val(map); } /* @@ -6625,16 +6721,15 @@ update_map_assoc(Process* p, Eterm* reg, Eterm map, BeamInstr* I) kp = p->htop + 1; /* Point to first key */ hp = kp + num_old + num_updates; - res = make_map(hp); - mp = (map_t *)hp; + res = make_flatmap(hp); + mp = (flatmap_t *)hp; hp += MAP_HEADER_SIZE; mp->thing_word = MAP_HEADER; mp->keys = make_tuple(kp-1); - old_vals = map_get_values(old_mp); - old_keys = map_get_keys(old_mp); + old_vals = flatmap_get_values(old_mp); + old_keys = flatmap_get_keys(old_mp); - new_p = &Arg(5); GET_TERM(*new_p, new_key); n = num_updates; @@ -6720,8 +6815,19 @@ update_map_assoc(Process* p, Eterm* reg, Eterm map, BeamInstr* I) n = kp - p->htop - 1; /* Actual number of keys/values */ *p->htop = make_arityval(n); + p->htop = hp; mp->size = n; - p->htop = hp; + + /* The expensive case, need to build a hashmap */ + if (n > MAP_SMALL_MAP_LIMIT) { + res = erts_hashmap_from_ks_and_vs(p,flatmap_get_keys(mp),flatmap_get_values(mp),n); + if (p->mbuf) { + Uint live = Arg(3); + reg[live] = res; + erts_garbage_collect(p, 0, reg, live+1); + res = reg[live]; + } + } return res; } @@ -6736,7 +6842,7 @@ update_map_exact(Process* p, Eterm* reg, Eterm map, BeamInstr* I) Uint i; Uint num_old; Uint need; - map_t *old_mp, *mp; + flatmap_t *old_mp, *mp; Eterm res; Eterm* hp; Eterm* E; @@ -6745,12 +6851,48 @@ update_map_exact(Process* p, Eterm* reg, Eterm map, BeamInstr* I) BeamInstr* new_p; Eterm new_key; - if (is_not_map(map)) { - return THE_NON_VALUE; + new_p = &Arg(5); + n = Arg(4) / 2; /* Number of values to be updated */ + ASSERT(n > 0); + + if (is_not_flatmap(map)) { + Uint32 hx; + Eterm val; + + /* apparently the compiler does not emit is_map instructions, + * bad compiler */ + + if (is_not_hashmap(map)) + return THE_NON_VALUE; + + res = map; + E = p->stop; + while(n--) { + /* assoc can't fail */ + GET_TERM(new_p[0], new_key); + GET_TERM(new_p[1], val); + hx = hashmap_make_hash(new_key); + + res = erts_hashmap_insert(p, hx, new_key, val, res, 1); + if (is_non_value(res)) + return res; + + if (p->mbuf) { + Uint live = Arg(3); + reg[live] = res; + erts_garbage_collect(p, 0, reg, live+1); + res = reg[live]; + } + + E = p->stop; + + new_p += 2; + } + return res; } - old_mp = (map_t *) map_val(map); - num_old = map_get_size(old_mp); + old_mp = (flatmap_t *) flatmap_val(map); + num_old = flatmap_get_size(old_mp); /* * If the old map is empty, create a new map. @@ -6770,7 +6912,7 @@ update_map_exact(Process* p, Eterm* reg, Eterm map, BeamInstr* I) reg[live] = map; erts_garbage_collect(p, need, reg, live+1); map = reg[live]; - old_mp = (map_t *)map_val(map); + old_mp = (flatmap_t *)flatmap_val(map); } /* @@ -6780,23 +6922,20 @@ update_map_exact(Process* p, Eterm* reg, Eterm map, BeamInstr* I) hp = p->htop; E = p->stop; - old_vals = map_get_values(old_mp); - old_keys = map_get_keys(old_mp); + old_vals = flatmap_get_values(old_mp); + old_keys = flatmap_get_keys(old_mp); - res = make_map(hp); - mp = (map_t *)hp; + res = make_flatmap(hp); + mp = (flatmap_t *)hp; hp += MAP_HEADER_SIZE; mp->thing_word = MAP_HEADER; mp->size = num_old; mp->keys = old_mp->keys; /* Get array of key/value pairs to be updated */ - new_p = &Arg(5); GET_TERM(*new_p, new_key); /* Update all values */ - n = Arg(4) / 2; /* Number of values to be updated */ - ASSERT(n > 0); for (i = 0; i < num_old; i++) { if (!EQ(*old_keys, new_key)) { /* Not same keys */ diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab index 788c866e63..5a0ee2ffc3 100644 --- a/erts/emulator/beam/bif.tab +++ b/erts/emulator/beam/bif.tab @@ -157,6 +157,8 @@ bif erts_internal:request_system_task/3 bif erts_internal:check_process_code/2 bif erts_internal:map_to_tuple_keys/1 +bif erts_internal:map_type/1 +bif erts_internal:map_hashmap_children/1 # inet_db support bif erlang:port_set_data/2 @@ -614,6 +616,7 @@ bif erlang:fun_info_mfa/1 bif erlang:get_keys/0 bif ets:update_counter/4 +bif erts_debug:map_info/1 # # Obsolete diff --git a/erts/emulator/beam/break.c b/erts/emulator/beam/break.c index 4ede2c9d7d..e2fa572546 100644 --- a/erts/emulator/beam/break.c +++ b/erts/emulator/beam/break.c @@ -661,7 +661,6 @@ erl_crash_dump_v(char *file, int line, char* fmt, va_list args) { #ifdef ERTS_SMP ErtsThrPrgrData tpd_buf; /* in case we aren't a managed thread... */ - int bc; #endif int fd; size_t envsz; @@ -681,7 +680,7 @@ erl_crash_dump_v(char *file, int line, char* fmt, va_list args) /* Order all managed threads to block, this has to be done first to guarantee that this is the only thread to generate crash dump. */ - bc = erts_thr_progress_fatal_error_block(&tpd_buf); + erts_thr_progress_fatal_error_block(&tpd_buf); #ifdef ERTS_THR_HAVE_SIG_FUNCS /* diff --git a/erts/emulator/beam/copy.c b/erts/emulator/beam/copy.c index 0010f6a440..027b85b079 100644 --- a/erts/emulator/beam/copy.c +++ b/erts/emulator/beam/copy.c @@ -127,6 +127,35 @@ Uint size_object(Eterm obj) obj = *bptr; break; } + case HASHMAP_SUBTAG: + switch (MAP_HEADER_TYPE(hdr)) { + case MAP_HEADER_TAG_HAMT_HEAD_BITMAP : + case MAP_HEADER_TAG_HAMT_HEAD_ARRAY : + case MAP_HEADER_TAG_HAMT_NODE_BITMAP : + { + Eterm *head; + Uint sz; + head = hashmap_val_rel(obj, base); + sz = hashmap_bitcount(MAP_HEADER_VAL(hdr)); + sum += 1 + sz + header_arity(hdr); + head += 1 + header_arity(hdr); + + if (sz == 0) { + goto pop_next; + } + while(sz-- > 1) { + obj = head[sz]; + if (!IS_CONST(obj)) { + ESTACK_PUSH(s, obj); + } + } + obj = head[0]; + } + break; + default: + erl_exit(ERTS_ABORT_EXIT, "size_object: bad hashmap type %d\n", MAP_HEADER_TYPE(hdr)); + } + break; case SUB_BINARY_SUBTAG: { Eterm real_bin; @@ -157,10 +186,10 @@ Uint size_object(Eterm obj) case MAP_SUBTAG: { Uint n; - map_t *mp; - mp = (map_t*)map_val_rel(obj,base); + flatmap_t *mp; + mp = (flatmap_t*)flatmap_val_rel(obj,base); ptr = (Eterm *)mp; - n = map_get_size(mp) + 1; + n = flatmap_get_size(mp) + 1; sum += n + 2; ptr += 2; /* hdr + size words */ while (n--) { @@ -342,8 +371,8 @@ Eterm copy_struct(Eterm obj, Uint sz, Eterm** hpp, ErlOffHeap* off_heap) break; case MAP_SUBTAG: { - i = map_get_size(objp) + 3; - *argp = make_map_rel(htop, dst_base); + i = flatmap_get_size(objp) + 3; + *argp = make_flatmap_rel(htop, dst_base); while (i--) { *htop++ = *objp++; } @@ -459,7 +488,7 @@ Eterm copy_struct(Eterm obj, Uint sz, Eterm** hpp, ErlOffHeap* off_heap) { ExternalThing *etp = (ExternalThing *) htop; - i = thing_arityval(hdr) + 1; + i = thing_arityval(hdr) + 1; tp = htop; while (i--) { @@ -473,6 +502,21 @@ Eterm copy_struct(Eterm obj, Uint sz, Eterm** hpp, ErlOffHeap* off_heap) *argp = make_external_rel(tp, dst_base); } break; + case HASHMAP_SUBTAG: + tp = htop; + switch (MAP_HEADER_TYPE(hdr)) { + case MAP_HEADER_TAG_HAMT_HEAD_BITMAP : + case MAP_HEADER_TAG_HAMT_HEAD_ARRAY : + *htop++ = *objp++; + case MAP_HEADER_TAG_HAMT_NODE_BITMAP : + i = 1 + hashmap_bitcount(MAP_HEADER_VAL(hdr)); + while (i--) { *htop++ = *objp++; } + *argp = make_hashmap_rel(tp, dst_base); + break; + default: + erl_exit(ERTS_ABORT_EXIT, "copy_struct: bad hashmap type %d\n", MAP_HEADER_TYPE(hdr)); + } + break; case BIN_MATCHSTATE_SUBTAG: erl_exit(ERTS_ABORT_EXIT, "copy_struct: matchstate term not allowed"); diff --git a/erts/emulator/beam/erl_bif_guard.c b/erts/emulator/beam/erl_bif_guard.c index bbd8aa31d9..e7d84ebda1 100644 --- a/erts/emulator/beam/erl_bif_guard.c +++ b/erts/emulator/beam/erl_bif_guard.c @@ -459,23 +459,25 @@ Eterm erts_gc_byte_size_1(Process* p, Eterm* reg, Uint live) Eterm erts_gc_map_size_1(Process* p, Eterm* reg, Uint live) { Eterm arg = reg[live]; - if (is_map(arg)) { - map_t *mp = (map_t*)map_val(arg); - Uint size = map_get_size(mp); - if (IS_USMALL(0, size)) { - return make_small(size); - } else { - Eterm* hp; - if (ERTS_NEED_GC(p, BIG_UINT_HEAP_SIZE)) { - erts_garbage_collect(p, BIG_UINT_HEAP_SIZE, reg, live); - } - hp = p->htop; - p->htop += BIG_UINT_HEAP_SIZE; - return uint_to_big(size, hp); - } + Eterm* hp; + Uint size; + if (is_flatmap(arg)) { + flatmap_t *mp = (flatmap_t*)flatmap_val(arg); + size = flatmap_get_size(mp); + } else if (is_hashmap(arg)) { + size = hashmap_size(arg); } else { BIF_ERROR(p, BADARG); } + if (IS_USMALL(0, size)) { + return make_small(size); + } + if (ERTS_NEED_GC(p, BIG_UINT_HEAP_SIZE)) { + erts_garbage_collect(p, BIG_UINT_HEAP_SIZE, reg, live); + } + hp = p->htop; + p->htop += BIG_UINT_HEAP_SIZE; + return uint_to_big(size, hp); } Eterm erts_gc_abs_1(Process* p, Eterm* reg, Uint live) diff --git a/erts/emulator/beam/erl_db_hash.c b/erts/emulator/beam/erl_db_hash.c index 8668a87ba1..045c8ae135 100644 --- a/erts/emulator/beam/erl_db_hash.c +++ b/erts/emulator/beam/erl_db_hash.c @@ -174,7 +174,7 @@ static ERTS_INLINE void add_fixed_deletion(DbTableHash* tb, int ix) /* optimised version of make_hash (normal case? atomic key) */ #define MAKE_HASH(term) \ ((is_atom(term) ? (atom_tab(atom_val(term))->slot.bucket.hvalue) : \ - make_hash2(term)) % MAX_HASH) + make_internal_hash(term)) % MAX_HASH) #ifdef ERTS_SMP # define DB_HASH_LOCK_MASK (DB_HASH_LOCK_CNT-1) diff --git a/erts/emulator/beam/erl_db_util.c b/erts/emulator/beam/erl_db_util.c index 60da35da56..9d699d4b22 100644 --- a/erts/emulator/beam/erl_db_util.c +++ b/erts/emulator/beam/erl_db_util.c @@ -231,7 +231,8 @@ typedef enum { matchConsA, /* Car is below Cdr */ matchConsB, /* Cdr is below Car (unusual) */ matchMkTuple, - matchMkMap, + matchMkFlatMap, + matchMkHashMap, matchCall0, matchCall1, matchCall2, @@ -1376,15 +1377,15 @@ restart: for (;;) { switch (t & _TAG_PRIMARY_MASK) { case TAG_PRIMARY_BOXED: - if (is_map(t)) { - num_iters = map_get_size(map_val(t)); + if (is_flatmap(t)) { + num_iters = flatmap_get_size(flatmap_val(t)); if (!structure_checked) { DMC_PUSH(text, matchMap); DMC_PUSH(text, num_iters); } structure_checked = 0; for (i = 0; i < num_iters; ++i) { - Eterm key = map_get_keys(map_val(t))[i]; + Eterm key = flatmap_get_keys(flatmap_val(t))[i]; if (db_is_variable(key) >= 0) { if (context.err_info) { add_dmc_err(context.err_info, @@ -1404,7 +1405,7 @@ restart: DMC_PUSH(text, dmc_private_copy(&context, key)); { int old_stack = ++(context.stack_used); - Eterm value = map_get_values(map_val(t))[i]; + Eterm value = flatmap_get_values(flatmap_val(t))[i]; res = dmc_one_term(&context, &heap, &stack, &text, value); ASSERT(res != retFail); @@ -1424,6 +1425,63 @@ restart: } break; } + if (is_hashmap(t)) { + DECLARE_WSTACK(wstack); + Eterm *kv; + num_iters = hashmap_size(t); + if (!structure_checked) { + DMC_PUSH(text, matchMap); + DMC_PUSH(text, num_iters); + } + structure_checked = 0; + + hashmap_iterator_init(&wstack, t, 0); + + while ((kv=hashmap_iterator_next(&wstack)) != NULL) { + Eterm key = CAR(kv); + Eterm value = CDR(kv); + if (db_is_variable(key) >= 0) { + if (context.err_info) { + add_dmc_err(context.err_info, + "Variable found in map key.", + -1, 0UL, dmcError); + } + DESTROY_WSTACK(wstack); + goto error; + } else if (key == am_Underscore) { + if (context.err_info) { + add_dmc_err(context.err_info, + "Underscore found in map key.", + -1, 0UL, dmcError); + } + DESTROY_WSTACK(wstack); + goto error; + } + DMC_PUSH(text, matchKey); + DMC_PUSH(text, dmc_private_copy(&context, key)); + { + int old_stack = ++(context.stack_used); + res = dmc_one_term(&context, &heap, &stack, &text, + value); + ASSERT(res != retFail); + if (res == retRestart) { + DESTROY_WSTACK(wstack); + goto restart; + } + if (old_stack != context.stack_used) { + ASSERT(old_stack + 1 == context.stack_used); + DMC_PUSH(text, matchSwap); + } + if (context.stack_used > context.stack_need) { + context.stack_need = context.stack_used; + } + DMC_PUSH(text, matchPop); + --(context.stack_used); + } + } + DESTROY_WSTACK(wstack); + break; + } if (!is_tuple(t)) { goto simple_term; } @@ -1950,24 +2008,38 @@ restart: FAIL(); } n = *pc++; - if (map_get_size(map_val_rel(*ep, base)) < n) { - FAIL(); - } - ep = map_val_rel(*ep, base); + if (is_flatmap_rel(*ep,base)) { + if (flatmap_get_size(flatmap_val_rel(*ep, base)) < n) { + FAIL(); + } + } else { + ASSERT(is_hashmap_rel(*ep,base)); + if (hashmap_size_rel(*ep, base) < n) { + FAIL(); + } + } + ep = flatmap_val_rel(*ep, base); break; case matchPushM: if (!is_map_rel(*ep, base)) { FAIL(); } n = *pc++; - if (map_get_size(map_val_rel(*ep, base)) < n) { - FAIL(); - } - *sp++ = map_val_rel(*ep++, base); + if (is_flatmap_rel(*ep,base)) { + if (flatmap_get_size(flatmap_val_rel(*ep, base)) < n) { + FAIL(); + } + } else { + ASSERT(is_hashmap_rel(*ep,base)); + if (hashmap_size_rel(*ep, base) < n) { + FAIL(); + } + } + *sp++ = flatmap_val_rel(*ep++, base); break; case matchKey: t = (Eterm) *pc++; - tp = erts_maps_get_rel(t, make_map_rel(ep, base), base); + tp = erts_maps_get_rel(t, make_flatmap_rel(ep, base), base); if (!tp) { FAIL(); } @@ -2079,23 +2151,38 @@ restart: } *esp++ = t; break; - case matchMkMap: + case matchMkFlatMap: n = *pc++; ehp = HAllocX(build_proc, 1 + MAP_HEADER_SIZE + n, HEAP_XTRA); t = *ehp++ = *--esp; { - map_t *m = (map_t *)ehp; + flatmap_t *m = (flatmap_t *)ehp; m->thing_word = MAP_HEADER; m->size = n; m->keys = t; } - t = make_map(ehp); + t = make_flatmap(ehp); ehp += MAP_HEADER_SIZE; while (n--) { *ehp++ = *--esp; } *esp++ = t; break; + case matchMkHashMap: + n = *pc++; + esp -= 2*n; + ehp = HAllocX(build_proc, 2*n, HEAP_XTRA); + { + ErtsHeapFactory factory; + Uint ix; + factory.p = build_proc; + for (ix = 0; ix < 2*n; ix++){ + ehp[ix] = esp[ix]; + } + t = erts_hashmap_from_array(&factory, ehp, n, 0); + } + *esp++ = t; + break; case matchCall0: bif = (Eterm (*)(Process*, ...)) *pc++; t = (*bif)(build_proc, bif_args); @@ -3286,10 +3373,10 @@ int db_has_variable(Eterm node) { while(arity--) { ESTACK_PUSH(s,*(++tuple)); } - } else if (is_map(node)) { - Eterm *values = map_get_values(map_val(node)); - int size = map_get_size(map_val(node)); - ESTACK_PUSH(s, ((map_t *) map_val(node))->keys); + } else if (is_flatmap(node)) { + Eterm *values = flatmap_get_values(flatmap_val(node)); + Uint size = flatmap_get_size(flatmap_val(node)); + ESTACK_PUSH(s, ((flatmap_t *) flatmap_val(node))->keys); while (size--) { ESTACK_PUSH(s, *(values++)); } @@ -3366,7 +3453,6 @@ static DMCRet dmc_one_term(DMCContext *context, Uint sz, sz2, sz3; Uint i, j; - switch (c & _TAG_PRIMARY_MASK) { case TAG_PRIMARY_IMMED1: if ((n = db_is_variable(c)) >= 0) { /* variable */ @@ -3454,7 +3540,14 @@ static DMCRet dmc_one_term(DMCContext *context, DMC_PUSH(*stack, c); break; case (_TAG_HEADER_MAP >> _TAG_PRIMARY_SIZE): - n = map_get_size(map_val(c)); + n = flatmap_get_size(flatmap_val(c)); + DMC_PUSH(*text, matchPushM); + ++(context->stack_used); + DMC_PUSH(*text, n); + DMC_PUSH(*stack, c); + break; + case (_TAG_HEADER_HASHMAP >> _TAG_PRIMARY_SIZE): + n = hashmap_size(c); DMC_PUSH(*text, matchPushM); ++(context->stack_used); DMC_PUSH(*text, n); @@ -3745,30 +3838,87 @@ static DMCRet dmc_map(DMCContext *context, DMCHeap *heap, DMC_STACK_TYPE(UWord) *text, Eterm t, int *constant) { - map_t *m = (map_t *)map_val(t); - Eterm *values = map_get_values(m); - int nelems = map_get_size(m); + int nelems; int constant_values; DMCRet ret; + if (is_flatmap(t)) { + flatmap_t *m = (flatmap_t *)flatmap_val(t); + Eterm *values = flatmap_get_values(m); - ret = dmc_array(context, heap, text, values, nelems, &constant_values); - if (ret != retOk) { - return ret; - } - if (constant_values) { - *constant = 1; + nelems = flatmap_get_size(m); + ret = dmc_array(context, heap, text, values, nelems, &constant_values); + + if (ret != retOk) { + return ret; + } + if (constant_values) { + *constant = 1; + return retOk; + } + DMC_PUSH(*text, matchPushC); + DMC_PUSH(*text, dmc_private_copy(context, m->keys)); + if (++context->stack_used > context->stack_need) { + context->stack_need = context->stack_used; + } + DMC_PUSH(*text, matchMkFlatMap); + DMC_PUSH(*text, nelems); + context->stack_used -= nelems; + *constant = 0; + return retOk; + } else { + DECLARE_WSTACK(wstack); + Eterm *kv; + int c; + + ASSERT(is_hashmap(t)); + + hashmap_iterator_init(&wstack, t, 1); + constant_values = 1; + nelems = hashmap_size(t); + + while ((kv=hashmap_iterator_prev(&wstack)) != NULL) { + if ((ret = dmc_expr(context, heap, text, CDR(kv), &c)) != retOk) { + DESTROY_WSTACK(wstack); + return ret; + } + if (!c) + constant_values = 0; + } + + if (constant_values) { + *constant = 1; + DESTROY_WSTACK(wstack); + return retOk; + } + + *constant = 0; + + hashmap_iterator_init(&wstack, t, 1); + + while ((kv=hashmap_iterator_prev(&wstack)) != NULL) { + /* push key */ + if ((ret = dmc_expr(context, heap, text, CAR(kv), &c)) != retOk) { + DESTROY_WSTACK(wstack); + return ret; + } + if (c) { + do_emit_constant(context, text, CAR(kv)); + } + /* push value */ + if ((ret = dmc_expr(context, heap, text, CDR(kv), &c)) != retOk) { + DESTROY_WSTACK(wstack); + return ret; + } + if (c) { + do_emit_constant(context, text, CDR(kv)); + } + } + DMC_PUSH(*text, matchMkHashMap); + DMC_PUSH(*text, nelems); + context->stack_used -= nelems; + DESTROY_WSTACK(wstack); return retOk; } - DMC_PUSH(*text, matchPushC); - DMC_PUSH(*text, dmc_private_copy(context, m->keys)); - if (++context->stack_used > context->stack_need) { - context->stack_need = context->stack_used; - } - DMC_PUSH(*text, matchMkMap); - DMC_PUSH(*text, nelems); - context->stack_used -= nelems; - *constant = 0; - return retOk; } static DMCRet dmc_whole_expression(DMCContext *context, @@ -5462,11 +5612,17 @@ void db_match_dis(Binary *bp) ++t; erts_printf("MkTuple\t%beu\n", n); break; - case matchMkMap: + case matchMkFlatMap: + ++t; + n = *t; + ++t; + erts_printf("MkFlatMap\t%beu\n", n); + break; + case matchMkHashMap: ++t; n = *t; ++t; - erts_printf("MkMapA\t%beu\n", n); + erts_printf("MkHashMap\t%beu\n", n); break; case matchOr: ++t; diff --git a/erts/emulator/beam/erl_gc.h b/erts/emulator/beam/erl_gc.h index bf0496c112..8afcb060a1 100644 --- a/erts/emulator/beam/erl_gc.h +++ b/erts/emulator/beam/erl_gc.h @@ -55,7 +55,9 @@ do { \ nelts = header_arity(HDR); \ switch ((HDR) & _HEADER_SUBTAG_MASK) { \ case SUB_BINARY_SUBTAG: nelts++; break; \ - case MAP_SUBTAG: nelts+=map_get_size(PTR) + 1; break; \ + case MAP_SUBTAG: nelts+=flatmap_get_size(PTR) + 1; break; \ + case HASHMAP_SUBTAG: nelts=hashmap_bitcount(MAP_HEADER_VAL(HDR)); \ + nelts += is_hashmap_header_head(HDR) ? 1 : 0; break; \ case FUN_SUBTAG: nelts+=((ErlFunThing*)(PTR))->num_free+1; break; \ } \ gval = make_boxed(HTOP); \ @@ -63,7 +65,6 @@ do { \ *HTOP++ = HDR; \ *PTR++ = gval; \ while (nelts--) *HTOP++ = *PTR++; \ - \ } while(0) #define in_area(ptr,start,nbytes) \ diff --git a/erts/emulator/beam/erl_map.c b/erts/emulator/beam/erl_map.c index b2a16eb5ed..bd6da0a6f1 100644 --- a/erts/emulator/beam/erl_map.c +++ b/erts/emulator/beam/erl_map.c @@ -16,6 +16,9 @@ * * %CopyrightEnd% * + * hashmaps are an adaption of Rich Hickeys Persistent HashMaps + * which were an adaption of Phil Bagwells - Hash Array Mapped Tries + * * Author: Björn-Egil Dahlberg */ @@ -62,39 +65,78 @@ * - erts_internal:map_to_tuple_keys/1 */ +#ifndef DECL_AM +#define DECL_AM(S) Eterm AM_ ## S = am_atom_put(#S, sizeof(#S) - 1) +#endif + +/* for hashmap_from_list/1 */ +typedef struct { + Uint32 hx; + Uint32 skip; + Uint i; + Eterm val; +} hxnode_t; + + +static Eterm flatmap_merge(Process *p, Eterm nodeA, Eterm nodeB); +static Eterm map_merge_mixed(Process *p, Eterm flat, Eterm tree, int swap_args); +static Eterm hashmap_merge(Process *p, Eterm nodeA, Eterm nodeB); +static Eterm hashmap_to_list(Process *p, Eterm map); +static Eterm hashmap_keys(Process *p, Eterm map); +static Eterm hashmap_values(Process *p, Eterm map); +static Eterm hashmap_delete(Process *p, Uint32 hx, Eterm key, Eterm node); +static Eterm map_from_validated_list(Process *p, Eterm list, Uint size); +static Eterm hashmap_from_validated_list(Process *p, Eterm list, Uint size); +static Eterm hashmap_from_unsorted_array(ErtsHeapFactory*, hxnode_t *hxns, Uint n, int reject_dupkeys); +static Eterm hashmap_from_sorted_unique_array(ErtsHeapFactory*, hxnode_t *hxns, Uint n, int is_root); +static Eterm hashmap_from_chunked_array(ErtsHeapFactory*, hxnode_t *hxns, Uint n, int is_root); +static Eterm hashmap_info(Process *p, Eterm node); +static Eterm hashmap_bld_tuple_uint(Uint **hpp, Uint *szp, Uint n, Uint nums[]); +static int hxnodecmp(hxnode_t* a, hxnode_t* b); +static int hxnodecmpkey(hxnode_t* a, hxnode_t* b); + /* erlang:map_size/1 * the corresponding instruction is implemented in: * beam/erl_bif_guard.c */ BIF_RETTYPE map_size_1(BIF_ALIST_1) { - if (is_map(BIF_ARG_1)) { + if (is_flatmap(BIF_ARG_1)) { Eterm *hp; Uint hsz = 0; - map_t *mp = (map_t*)map_val(BIF_ARG_1); - Uint n = map_get_size(mp); + flatmap_t *mp = (flatmap_t*)flatmap_val(BIF_ARG_1); + Uint n = flatmap_get_size(mp); erts_bld_uint(NULL, &hsz, n); hp = HAlloc(BIF_P, hsz); BIF_RET(erts_bld_uint(&hp, NULL, n)); + } else if (is_hashmap(BIF_ARG_1)) { + Eterm *head, *hp, res; + Uint size, hsz=0; + + head = hashmap_val(BIF_ARG_1); + size = head[1]; + (void) erts_bld_uint(NULL, &hsz, size); + hp = HAlloc(BIF_P, hsz); + res = erts_bld_uint(&hp, NULL, size); + BIF_RET(res); } BIF_ERROR(BIF_P, BADARG); } -/* maps:to_list/1 - */ +/* maps:to_list/1 */ BIF_RETTYPE maps_to_list_1(BIF_ALIST_1) { - if (is_map(BIF_ARG_1)) { + if (is_flatmap(BIF_ARG_1)) { Uint n; Eterm* hp; Eterm *ks,*vs, res, tup; - map_t *mp = (map_t*)map_val(BIF_ARG_1); + flatmap_t *mp = (flatmap_t*)flatmap_val(BIF_ARG_1); - ks = map_get_keys(mp); - vs = map_get_values(mp); - n = map_get_size(mp); + ks = flatmap_get_keys(mp); + vs = flatmap_get_values(mp); + n = flatmap_get_size(mp); hp = HAlloc(BIF_P, (2 + 3) * n); res = NIL; @@ -104,6 +146,8 @@ BIF_RETTYPE maps_to_list_1(BIF_ALIST_1) { } BIF_RET(res); + } else if (is_hashmap(BIF_ARG_1)) { + return hashmap_to_list(BIF_P, BIF_ARG_1); } BIF_ERROR(BIF_P, BADARG); @@ -120,34 +164,41 @@ erts_maps_get_rel(Eterm key, Eterm map, Eterm *map_base) erts_maps_get(Eterm key, Eterm map) #endif { - Eterm *ks, *vs; - map_t *mp; - Uint n, i; + Uint32 hx; + if (is_flatmap_rel(map, map_base)) { + Eterm *ks, *vs; + flatmap_t *mp; + Uint n, i; - mp = (map_t *)map_val_rel(map, map_base); - n = map_get_size(mp); + mp = (flatmap_t *)flatmap_val_rel(map, map_base); + n = flatmap_get_size(mp); - if (n == 0) { - return NULL; - } + if (n == 0) { + return NULL; + } - ks = (Eterm *)tuple_val_rel(mp->keys, map_base) + 1; - vs = map_get_values(mp); + ks = (Eterm *)tuple_val_rel(mp->keys, map_base) + 1; + vs = flatmap_get_values(mp); - if (is_immed(key)) { - for (i = 0; i < n; i++) { - if (ks[i] == key) { - return &vs[i]; - } - } - } + if (is_immed(key)) { + for (i = 0; i < n; i++) { + if (ks[i] == key) { + return &vs[i]; + } + } + } - for (i = 0; i < n; i++) { - if (eq_rel(ks[i], NULL, key, map_base)) { - return &vs[i]; - } + for (i = 0; i < n; i++) { + if (eq_rel(ks[i], map_base, key, NULL)) { + return &vs[i]; + } + } + return NULL; } - return NULL; + ASSERT(is_hashmap_rel(map, map_base)); + hx = hashmap_make_hash(key); + + return erts_hashmap_get_rel(hx, key, map, map_base); } BIF_RETTYPE maps_find_2(BIF_ALIST_2) { @@ -164,7 +215,6 @@ BIF_RETTYPE maps_find_2(BIF_ALIST_2) { *hp++ = *value; BIF_RET(res); } - BIF_RET(am_error); } BIF_ERROR(BIF_P, BADARG); @@ -202,13 +252,8 @@ BIF_RETTYPE maps_get_2(BIF_ALIST_2) { */ BIF_RETTYPE maps_from_list_1(BIF_ALIST_1) { - Eterm *kv, item = BIF_ARG_1; - Eterm *hp, *thp,*vs, *ks, keys, res; - map_t *mp; - Uint size = 0, unused_size = 0; - Sint c = 0; - Sint idx = 0; - + Eterm item = BIF_ARG_1, res, *kv; + Uint size = 0; if (is_list(item) || is_nil(item)) { /* Calculate size and check validity */ @@ -229,368 +274,1091 @@ BIF_RETTYPE maps_from_list_1(BIF_ALIST_1) { if (is_not_nil(item)) goto error; - hp = HAlloc(BIF_P, 3 + 1 + (2 * size)); - thp = hp; + if (size > MAP_SMALL_MAP_LIMIT) { + BIF_RET(hashmap_from_validated_list(BIF_P, BIF_ARG_1, size)); + } else { + BIF_RET(map_from_validated_list(BIF_P, BIF_ARG_1, size)); + } + } + +error: + + BIF_ERROR(BIF_P, BADARG); +} + +static Eterm map_from_validated_list(Process *p, Eterm list, Uint size) { + Eterm *kv, item = list; + Eterm *hp, *thp,*vs, *ks, keys, res; + flatmap_t *mp; + Uint unused_size = 0; + Sint c = 0; + Sint idx = 0; + + + hp = HAlloc(p, 3 + 1 + (2 * size)); + thp = hp; + keys = make_tuple(hp); + *hp++ = make_arityval(size); + ks = hp; + hp += size; + mp = (flatmap_t*)hp; + res = make_flatmap(mp); + hp += MAP_HEADER_SIZE; + vs = hp; + + mp->thing_word = MAP_HEADER; + mp->size = size; /* set later, might shrink*/ + mp->keys = keys; + + if (size == 0) + return res; + + /* first entry */ + kv = tuple_val(CAR(list_val(item))); + ks[0] = kv[1]; + vs[0] = kv[2]; + size = 1; + item = CDR(list_val(item)); + + /* insert sort key/value pairs */ + while(is_list(item)) { + + kv = tuple_val(CAR(list_val(item))); + + /* compare ks backwards + * idx represent word index to be written (hole position). + * We cannot copy the elements when searching since we might + * have an equal key. So we search for just the index first =( + * + * It is perhaps faster to move the values in the first pass. + * Check for uniqueness during insert phase and then have a + * second phace compacting the map if duplicates are found + * during insert. .. or do someother sort .. shell-sort perhaps. + */ + + idx = size; + + while(idx > 0 && (c = CMP_TERM(kv[1],ks[idx-1])) < 0) { idx--; } + + if (c == 0) { + /* last compare was equal, + * i.e. we have to release memory + * and overwrite that key/value + */ + ks[idx-1] = kv[1]; + vs[idx-1] = kv[2]; + unused_size++; + } else { + Uint i = size; + while(i > idx) { + ks[i] = ks[i-1]; + vs[i] = vs[i-1]; + i--; + } + ks[idx] = kv[1]; + vs[idx] = kv[2]; + size++; + } + item = CDR(list_val(item)); + } + + if (unused_size) { + /* the key tuple is embedded in the heap + * write a bignum to clear it. + */ + /* release values as normal since they are on the top of the heap */ + + ks[size] = make_pos_bignum_header(unused_size - 1); + HRelease(p, vs + size + unused_size, vs + size); + } + + *thp = make_arityval(size); + mp->size = size; + return res; +} + +#define swizzle32(D,S) \ + do { \ + (D) = ((S) & 0x0000000f) << 28 | ((S) & 0x000000f0) << 20 \ + | ((S) & 0x00000f00) << 12 | ((S) & 0x0000f000) << 4 \ + | ((S) & 0x000f0000) >> 4 | ((S) & 0x00f00000) >> 12 \ + | ((S) & 0x0f000000) >> 20 | ((S) & 0xf0000000) >> 28; \ + } while(0) + +#define maskval(V,L) (((V) >> ((7 - (L))*4)) & 0xf) +#define cdepth(V1,V2) (hashmap_clz((V1) ^ (V2)) >> 2) + +static Eterm hashmap_from_validated_list(Process *p, Eterm list, Uint size) { + Eterm item = list; + Eterm *hp; + Eterm *kv, res; + Eterm tmp[2]; + Uint32 sw, hx; + Uint ix = 0; + hxnode_t *hxns; + ErtsHeapFactory factory; + + ASSERT(size > 0); + + hp = HAlloc(p, (2 * size)); + + /* create tmp hx values and leaf ptrs */ + hxns = (hxnode_t *)erts_alloc(ERTS_ALC_T_TMP, size * sizeof(hxnode_t)); + + while(is_list(item)) { + res = CAR(list_val(item)); + kv = tuple_val(res); + hx = hashmap_restore_hash(tmp,0,kv[1]); + swizzle32(sw,hx); + hxns[ix].hx = sw; + hxns[ix].val = CONS(hp, kv[1], kv[2]); hp += 2; + hxns[ix].skip = 1; /* will be reassigned in from_array */ + hxns[ix].i = ix; + ix++; + item = CDR(list_val(item)); + } + + factory.p = p; + res = hashmap_from_unsorted_array(&factory, hxns, size, 0); + + erts_free(ERTS_ALC_T_TMP, (void *) hxns); + ERTS_VERIFY_UNUSED_TEMP_ALLOC(p); + + if (hashmap_size(res) <= MAP_SMALL_MAP_LIMIT) { + DECLARE_WSTACK(wstack); + Eterm *kv, *ks, *vs; + flatmap_t *mp; + Eterm keys; + Uint n = hashmap_size(res); + + /* build flat structure */ + hp = HAlloc(p, 3 + 1 + (2 * n)); keys = make_tuple(hp); - *hp++ = make_arityval(size); + *hp++ = make_arityval(n); ks = hp; - hp += size; - mp = (map_t*)hp; - res = make_map(mp); + hp += n; + mp = (flatmap_t*)hp; hp += MAP_HEADER_SIZE; vs = hp; mp->thing_word = MAP_HEADER; - mp->size = size; /* set later, might shrink*/ + mp->size = n; mp->keys = keys; - if (size == 0) - BIF_RET(res); + hashmap_iterator_init(&wstack, res, 0); + + while ((kv=hashmap_iterator_next(&wstack)) != NULL) { + *ks++ = CAR(kv); + *vs++ = CDR(kv); + } - item = BIF_ARG_1; + /* it cannot have multiple keys */ + erts_validate_and_sort_flatmap(mp); - /* first entry */ - kv = tuple_val(CAR(list_val(item))); - ks[0] = kv[1]; - vs[0] = kv[2]; - size = 1; - item = CDR(list_val(item)); + DESTROY_WSTACK(wstack); + return make_flatmap(mp); + } - /* insert sort key/value pairs */ - while(is_list(item)) { + return res; +} - kv = tuple_val(CAR(list_val(item))); - - /* compare ks backwards - * idx represent word index to be written (hole position). - * We cannot copy the elements when searching since we might - * have an equal key. So we search for just the index first =( - * - * It is perhaps faster to move the values in the first pass. - * Check for uniqueness during insert phase and then have a - * second phace compacting the map if duplicates are found - * during insert. .. or do someother sort .. shell-sort perhaps. - */ +Eterm erts_hashmap_from_array(ErtsHeapFactory* factory, Eterm *leafs, Uint n, + int reject_dupkeys) { + Uint32 sw, hx; + Uint ix; + hxnode_t *hxns; + Eterm res; + + /* create tmp hx values and leaf ptrs */ + hxns = (hxnode_t *)erts_alloc(ERTS_ALC_T_TMP, n * sizeof(hxnode_t)); + + for (ix = 0; ix < n; ix++) { + hx = hashmap_make_hash(*leafs); + swizzle32(sw,hx); + hxns[ix].hx = sw; + hxns[ix].val = make_list(leafs); + hxns[ix].skip = 1; + hxns[ix].i = ix; + leafs += 2; + } - idx = size; + res = hashmap_from_unsorted_array(factory, hxns, n, reject_dupkeys); - while(idx > 0 && (c = CMP_TERM(kv[1],ks[idx-1])) < 0) { idx--; } + erts_free(ERTS_ALC_T_TMP, (void *) hxns); - if (c == 0) { - /* last compare was equal, - * i.e. we have to release memory - * and overwrite that key/value - */ - ks[idx-1] = kv[1]; - vs[idx-1] = kv[2]; - unused_size++; - } else { - Uint i = size; - while(i > idx) { - ks[i] = ks[i-1]; - vs[i] = vs[i-1]; - i--; + return res; +} + + +Eterm erts_hashmap_from_ks_and_vs_extra(Process *p, Eterm *ks, Eterm *vs, Uint n, + Eterm key, Eterm value) { + Uint32 sw, hx; + Uint i,sz; + hxnode_t *hxns; + ErtsHeapFactory factory; + Eterm *hp, res; + + sz = (key == THE_NON_VALUE) ? n : (n + 1); + ASSERT(sz > MAP_SMALL_MAP_LIMIT); + hp = HAlloc(p, 2 * sz); + + /* create tmp hx values and leaf ptrs */ + hxns = (hxnode_t *)erts_alloc(ERTS_ALC_T_TMP, sz * sizeof(hxnode_t)); + + for(i = 0; i < n; i++) { + hx = hashmap_make_hash(ks[i]); + swizzle32(sw,hx); + hxns[i].hx = sw; + hxns[i].val = CONS(hp, ks[i], vs[i]); hp += 2; + hxns[i].skip = 1; /* will be reassigned in from_array */ + hxns[i].i = i; + } + + if (key != THE_NON_VALUE) { + hx = hashmap_make_hash(key); + swizzle32(sw,hx); + hxns[i].hx = sw; + hxns[i].val = CONS(hp, key, value); hp += 2; + hxns[i].skip = 1; + hxns[i].i = i; + } + + factory.p = p; + res = hashmap_from_unsorted_array(&factory, hxns, sz, 0); + + erts_free(ERTS_ALC_T_TMP, (void *) hxns); + ERTS_VERIFY_UNUSED_TEMP_ALLOC(p); + + return res; +} + +static Eterm hashmap_from_unsorted_array(ErtsHeapFactory* factory, + hxnode_t *hxns, Uint n, + int reject_dupkeys) { + Uint jx = 0, ix = 0, lx, cx; + Eterm res; + + if (n == 0) { + Eterm *hp; + hp = erts_produce_heap(factory, 2, 0); + hp[0] = MAP_HEADER_HAMT_HEAD_BITMAP(0); + hp[1] = 0; + + return make_hashmap(hp); + } + + /* sort and compact array (remove non-unique entries) */ + qsort(hxns, n, sizeof(hxnode_t), (int (*)(const void *, const void *)) hxnodecmp); + + ix = 0, cx = 0; + while(ix < n - 1) { + if (hxns[ix].hx == hxns[ix+1].hx) { + + /* find region of equal hash values */ + jx = ix + 1; + while(jx < n && hxns[ix].hx == hxns[jx].hx) { jx++; } + /* find all correct keys from region + * (last in list but now hash sorted so we check highest id instead) */ + + /* resort with keys instead of hash value within region */ + + qsort(&hxns[ix], jx - ix, sizeof(hxnode_t), + (int (*)(const void *, const void *)) hxnodecmpkey); + + while(ix < jx) { + lx = ix; + while(ix < jx && EQ(CAR(list_val(hxns[ix].val)), CAR(list_val(hxns[lx].val)))) { + if (reject_dupkeys) + return THE_NON_VALUE; + + if (hxns[ix].i > hxns[lx].i) { + lx = ix; + } + ix++; } - ks[idx] = kv[1]; - vs[idx] = kv[2]; - size++; + hxns[cx].hx = hxns[lx].hx; + hxns[cx].val = hxns[lx].val; + cx++; } - item = CDR(list_val(item)); + ix = jx; + continue; } + if (ix > cx) { + hxns[cx].hx = hxns[ix].hx; + hxns[cx].val = hxns[ix].val; + } + cx++; + ix++; + } - if (unused_size) { - /* the key tuple is embedded in the heap - * write a bignum to clear it. - */ - /* release values as normal since they are on the top of the heap */ + if (ix < n) { + hxns[cx].hx = hxns[ix].hx; + hxns[cx].val = hxns[ix].val; + cx++; + } - ks[size] = make_pos_bignum_header(unused_size - 1); - HRelease(BIF_P, vs + size + unused_size, vs + size); - } + if (cx > 1) { + /* recursive decompose array */ + res = hashmap_from_sorted_unique_array(factory, hxns, cx, 0); + } else { + Eterm *hp; - *thp = make_arityval(size); - mp->size = size; - BIF_RET(res); + /* we only have one item, either because n was 1 or + * because we hade multiples of the same key. + * + * hash value has been swizzled, need to drag it down to get the + * correct slot. */ + + hp = erts_produce_heap(factory, HAMT_HEAD_BITMAP_SZ(1), 0); + hp[0] = MAP_HEADER_HAMT_HEAD_BITMAP(1 << ((hxns[0].hx >> 0x1c) & 0xf)); + hp[1] = 1; + hp[2] = hxns[0].val; + res = make_hashmap(hp); } -error: + return res; +} - BIF_ERROR(BIF_P, BADARG); +static Eterm hashmap_from_sorted_unique_array(ErtsHeapFactory* factory, + hxnode_t *hxns, Uint n, int lvl) { + Eterm res = NIL; + Uint i,ix,jx,elems; + Uint32 sw, hx; + Eterm val; + Eterm th[2]; + hxnode_t *tmp; + + ASSERT(lvl < 32); + ix = 0; + elems = 1; + while (ix < n - 1) { + if (hxns[ix].hx == hxns[ix+1].hx) { + jx = ix + 1; + while (jx < n && hxns[ix].hx == hxns[jx].hx) { jx++; } + tmp = (hxnode_t *)erts_alloc(ERTS_ALC_T_TMP, ((jx - ix)) * sizeof(hxnode_t)); + + for(i = 0; i < jx - ix; i++) { + val = hxns[i + ix].val; + hx = hashmap_restore_hash(th, lvl + 8, CAR(list_val(val))); + swizzle32(sw,hx); + tmp[i].hx = sw; + tmp[i].val = val; + tmp[i].i = i; + tmp[i].skip = 1; + } + + qsort(tmp, jx - ix, sizeof(hxnode_t), (int (*)(const void *, const void *)) hxnodecmp); + + hxns[ix].skip = jx - ix; + hxns[ix].val = hashmap_from_sorted_unique_array(factory, tmp, jx - ix, lvl + 8); + erts_free(ERTS_ALC_T_TMP, (void *) tmp); + ix = jx; + if (ix < n) { elems++; } + continue; + } + hxns[ix].skip = 1; + elems++; + ix++; + } + + res = hashmap_from_chunked_array(factory, hxns, elems, !lvl); + + ERTS_FACTORY_HOLE_CHECK(factory); + + return res; } -/* maps:is_key/2 - */ +#define HALLOC_EXTRA 200 +static Eterm hashmap_from_chunked_array(ErtsHeapFactory *factory, + hxnode_t *hxns, Uint n, int is_root) { + Uint ix, d, dn, dc, slot, elems; + Uint32 v, vp, vn, hdr; + Uint bp, sz; + DECLARE_ESTACK(stack); + Eterm res = NIL, *hp = NULL, *nhp; -BIF_RETTYPE maps_is_key_2(BIF_ALIST_2) { - if (is_map(BIF_ARG_2)) { - Eterm *ks, key; - map_t *mp; - Uint n,i; + ASSERT(n > 1); - mp = (map_t*)map_val(BIF_ARG_2); - key = BIF_ARG_1; - n = map_get_size(mp); - ks = map_get_keys(mp); + /* push initial nodes on the stack, + * this is the starting depth */ - if (n == 0) - BIF_RET(am_false); + ix = 0; + d = 0; + vp = hxns[ix].hx; + v = hxns[ix + hxns[ix].skip].hx; - if (is_immed(key)) { - for( i = 0; i < n; i++) { - if (ks[i] == key) { - BIF_RET(am_true); - } + ASSERT(vp > v); + slot = maskval(vp,d); + + while(slot == maskval(v,d)) { + ESTACK_PUSH(stack, 1 << slot); + d++; + slot = maskval(vp,d); + } + + res = hxns[ix].val; + + if (hxns[ix].skip > 1) { + dc = 7; + /* build collision nodes */ + while (dc > d) { + hp = erts_produce_heap(factory, HAMT_NODE_BITMAP_SZ(1), HALLOC_EXTRA); + hp[0] = MAP_HEADER_HAMT_NODE_BITMAP(1 << maskval(vp,dc)); + hp[1] = res; + res = make_hashmap(hp); + dc--; + } + } + + ESTACK_PUSH(stack, res); + ESTACK_PUSH(stack, 1 << slot); + + /* all of the other nodes .. */ + elems = n - 2; /* remove first and last elements */ + while(elems--) { + hdr = ESTACK_POP(stack); + ix = ix + hxns[ix].skip; + + /* determine if node or subtree should be built by looking + * at the next value. */ + + vn = hxns[ix + hxns[ix].skip].hx; + dn = cdepth(v,vn); + ASSERT(v > vn); + + res = hxns[ix].val; + + if (hxns[ix].skip > 1) { + int wat = (d > dn) ? d : dn; + dc = 7; + /* build collision nodes */ + while (dc > wat) { + hp = erts_produce_heap(factory, HAMT_NODE_BITMAP_SZ(1), HALLOC_EXTRA); + hp[0] = MAP_HEADER_HAMT_NODE_BITMAP(1 << maskval(v,dc)); + hp[1] = res; + res = make_hashmap(hp); + dc--; } } - for( i = 0; i < n; i++) { - if (EQ(ks[i], key)) { - BIF_RET(am_true); + /* next depth is higher (implies collision) */ + if (d < dn) { + /* hdr is the popped one initially */ + while(d < dn) { + slot = maskval(v, d); + bp = 1 << slot; + ESTACK_PUSH(stack, hdr | bp); + d++; + hdr = 0; /* clear hdr for all other collisions */ } + + slot = maskval(v, d); + bp = 1 << slot; + /* no more collisions */ + ESTACK_PUSH(stack,res); + ESTACK_PUSH(stack,bp); + } else if (d == dn) { + /* no collisions at all */ + slot = maskval(v, d); + bp = 1 << slot; + ESTACK_PUSH(stack,res); + ESTACK_PUSH(stack,hdr | bp); + } else { + /* dn < n, we have a drop and we are done + * build nodes and subtree */ + while (dn != d) { + slot = maskval(v, d); + bp = 1 << slot; + /* OR bitposition before sz calculation to handle + * redundant collisions */ + hdr |= bp; + sz = hashmap_bitcount(hdr); + hp = erts_produce_heap(factory, HAMT_NODE_BITMAP_SZ(sz), HALLOC_EXTRA); + nhp = hp; + *hp++ = (hdr == 0xffff) ? MAP_HEADER_HAMT_NODE_ARRAY : MAP_HEADER_HAMT_NODE_BITMAP(hdr); + *hp++ = res; sz--; + while (sz--) { *hp++ = ESTACK_POP(stack); } + ASSERT((hp - nhp) < 18); + res = make_hashmap(nhp); + + /* we need to pop the next hdr and push if we don't need it */ + + hdr = ESTACK_POP(stack); + d--; + } + ESTACK_PUSH(stack, res); + ESTACK_PUSH(stack, hdr); } - BIF_RET(am_false); + + vp = v; + v = vn; + d = dn; + ERTS_FACTORY_HOLE_CHECK(factory); + } + + /* v and vp are reused from above */ + dn = cdepth(vp,v); + ix = ix + hxns[ix].skip; + res = hxns[ix].val; + + if (hxns[ix].skip > 1) { + dc = 7; + /* build collision nodes */ + while (dc > dn) { + hp = erts_produce_heap(factory, HAMT_NODE_BITMAP_SZ(1), HALLOC_EXTRA); + hp[0] = MAP_HEADER_HAMT_NODE_BITMAP(1 << maskval(v,dc)); + hp[1] = res; + res = make_hashmap(hp); + dc--; + } + } + + hdr = ESTACK_POP(stack); + /* pop remaining subtree if any */ + while (dn) { + slot = maskval(v, dn); + bp = 1 << slot; + /* OR bitposition before sz calculation to handle + * redundant collisions */ + hdr |= bp; + sz = hashmap_bitcount(hdr); + hp = erts_produce_heap(factory, HAMT_NODE_BITMAP_SZ(sz), HALLOC_EXTRA); + nhp = hp; + *hp++ = (hdr == 0xffff) ? MAP_HEADER_HAMT_NODE_ARRAY : MAP_HEADER_HAMT_NODE_BITMAP(hdr); + *hp++ = res; sz--; + + while (sz--) { *hp++ = ESTACK_POP(stack); } + res = make_hashmap(nhp); + hdr = ESTACK_POP(stack); + dn--; + } + + /* and finally the root .. */ + + slot = maskval(v, dn); + bp = 1 << slot; + hdr |= bp; + sz = hashmap_bitcount(hdr); + hp = erts_produce_heap(factory, sz + /* hdr + item */ (is_root ? 2 : 1), 0); + nhp = hp; + + if (is_root) { + *hp++ = (hdr == 0xffff) ? MAP_HEADER_HAMT_HEAD_ARRAY : MAP_HEADER_HAMT_HEAD_BITMAP(hdr); + *hp++ = n; + } else { + *hp++ = (hdr == 0xffff) ? MAP_HEADER_HAMT_NODE_ARRAY : MAP_HEADER_HAMT_NODE_BITMAP(hdr); + } + + *hp++ = res; sz--; + while (sz--) { *hp++ = ESTACK_POP(stack); } + + res = make_hashmap(nhp); + + ASSERT(ESTACK_COUNT(stack) == 0); + DESTROY_ESTACK(stack); + ERTS_FACTORY_HOLE_CHECK(factory); + return res; +} +#undef HALLOC_EXTRA + +static int hxnodecmpkey(hxnode_t *a, hxnode_t *b) { + return CMP_TERM(CAR(list_val(a->val)), CAR(list_val(b->val))); +} + +static int hxnodecmp(hxnode_t *a, hxnode_t *b) { + if (a->hx < b->hx) + return 1; + else if (a->hx == b->hx) + return 0; + else + return -1; +} + +/* maps:is_key/2 */ + +BIF_RETTYPE maps_is_key_2(BIF_ALIST_2) { + if (is_map(BIF_ARG_2)) { + BIF_RET(erts_maps_get(BIF_ARG_1, BIF_ARG_2) ? am_true : am_false); } BIF_ERROR(BIF_P, BADARG); } -/* maps:keys/1 - */ +/* maps:keys/1 */ BIF_RETTYPE maps_keys_1(BIF_ALIST_1) { - if (is_map(BIF_ARG_1)) { + if (is_flatmap(BIF_ARG_1)) { Eterm *hp, *ks, res = NIL; - map_t *mp; + flatmap_t *mp; Uint n; - mp = (map_t*)map_val(BIF_ARG_1); - n = map_get_size(mp); + mp = (flatmap_t*)flatmap_val(BIF_ARG_1); + n = flatmap_get_size(mp); if (n == 0) BIF_RET(res); hp = HAlloc(BIF_P, (2 * n)); - ks = map_get_keys(mp); + ks = flatmap_get_keys(mp); while(n--) { res = CONS(hp, ks[n], res); hp += 2; } BIF_RET(res); + } else if (is_hashmap(BIF_ARG_1)) { + BIF_RET(hashmap_keys(BIF_P, BIF_ARG_1)); } BIF_ERROR(BIF_P, BADARG); } -/* maps:merge/2 - */ +/* maps:merge/2 */ BIF_RETTYPE maps_merge_2(BIF_ALIST_2) { - if (is_map(BIF_ARG_1) && is_map(BIF_ARG_2)) { - Eterm *hp,*thp; - Eterm tup; - Eterm *ks,*vs,*ks1,*vs1,*ks2,*vs2; - map_t *mp1,*mp2,*mp_new; - Uint n1,n2,i1,i2,need,unused_size=0; - int c = 0; - - mp1 = (map_t*)map_val(BIF_ARG_1); - mp2 = (map_t*)map_val(BIF_ARG_2); - n1 = map_get_size(mp1); - n2 = map_get_size(mp2); - - need = MAP_HEADER_SIZE + 1 + 2*(n1 + n2); - - hp = HAlloc(BIF_P, need); - thp = hp; - tup = make_tuple(thp); - ks = hp + 1; hp += 1 + n1 + n2; - mp_new = (map_t*)hp; hp += MAP_HEADER_SIZE; - vs = hp; hp += n1 + n2; - - mp_new->thing_word = MAP_HEADER; - mp_new->size = 0; - mp_new->keys = tup; - - i1 = 0; i2 = 0; - ks1 = map_get_keys(mp1); - vs1 = map_get_values(mp1); - ks2 = map_get_keys(mp2); - vs2 = map_get_values(mp2); - - while(i1 < n1 && i2 < n2) { - c = CMP_TERM(ks1[i1],ks2[i2]); - if ( c == 0) { - /* use righthand side arguments map value, - * but advance both maps */ - *ks++ = ks2[i2]; - *vs++ = vs2[i2]; - i1++, i2++, unused_size++; - } else if ( c < 0) { - *ks++ = ks1[i1]; - *vs++ = vs1[i1]; - i1++; - } else { - *ks++ = ks2[i2]; - *vs++ = vs2[i2]; - i2++; - } + if (is_flatmap(BIF_ARG_1)) { + if (is_flatmap(BIF_ARG_2)) { + BIF_RET(flatmap_merge(BIF_P, BIF_ARG_1, BIF_ARG_2)); + } else if (is_hashmap(BIF_ARG_2)) { + /* Will always become a tree */ + BIF_RET(map_merge_mixed(BIF_P, BIF_ARG_1, BIF_ARG_2, 0)); } + } else if (is_hashmap(BIF_ARG_1)) { + if (is_hashmap(BIF_ARG_2)) { + BIF_RET(hashmap_merge(BIF_P, BIF_ARG_1, BIF_ARG_2)); + } else if (is_flatmap(BIF_ARG_2)) { + /* Will always become a tree */ + BIF_RET(map_merge_mixed(BIF_P, BIF_ARG_2, BIF_ARG_1, 1)); + } + } + BIF_ERROR(BIF_P, BADARG); +} + +static Eterm flatmap_merge(Process *p, Eterm nodeA, Eterm nodeB) { + Eterm *hp,*thp; + Eterm tup; + Eterm *ks,*vs,*ks1,*vs1,*ks2,*vs2; + flatmap_t *mp1,*mp2,*mp_new; + Uint n,n1,n2,i1,i2,need,unused_size=0; + int c = 0; - /* copy remaining */ - while (i1 < n1) { + mp1 = (flatmap_t*)flatmap_val(nodeA); + mp2 = (flatmap_t*)flatmap_val(nodeB); + n1 = flatmap_get_size(mp1); + n2 = flatmap_get_size(mp2); + + need = MAP_HEADER_SIZE + 1 + 2*(n1 + n2); + + hp = HAlloc(p, need); + thp = hp; + tup = make_tuple(thp); + ks = hp + 1; hp += 1 + n1 + n2; + mp_new = (flatmap_t*)hp; hp += MAP_HEADER_SIZE; + vs = hp; hp += n1 + n2; + + mp_new->thing_word = MAP_HEADER; + mp_new->size = 0; + mp_new->keys = tup; + + i1 = 0; i2 = 0; + ks1 = flatmap_get_keys(mp1); + vs1 = flatmap_get_values(mp1); + ks2 = flatmap_get_keys(mp2); + vs2 = flatmap_get_values(mp2); + + while(i1 < n1 && i2 < n2) { + c = CMP_TERM(ks1[i1],ks2[i2]); + if (c == 0) { + /* use righthand side arguments map value, + * but advance both maps */ + *ks++ = ks2[i2]; + *vs++ = vs2[i2]; + i1++, i2++, unused_size++; + } else if (c < 0) { *ks++ = ks1[i1]; *vs++ = vs1[i1]; i1++; - } - - while (i2 < n2) { + } else { *ks++ = ks2[i2]; *vs++ = vs2[i2]; i2++; } + } - if (unused_size) { - /* the key tuple is embedded in the heap, write a bignum to clear it. - * - * release values as normal since they are on the top of the heap - * size = n1 + n1 - unused_size - */ + /* copy remaining */ + while (i1 < n1) { + *ks++ = ks1[i1]; + *vs++ = vs1[i1]; + i1++; + } - *ks = make_pos_bignum_header(unused_size - 1); - HRelease(BIF_P, vs + unused_size, vs); - } + while (i2 < n2) { + *ks++ = ks2[i2]; + *vs++ = vs2[i2]; + i2++; + } - mp_new->size = n1 + n2 - unused_size; - *thp = make_arityval(n1 + n2 - unused_size); + if (unused_size) { + /* the key tuple is embedded in the heap, write a bignum to clear it. + * + * release values as normal since they are on the top of the heap + * size = n1 + n1 - unused_size + */ - BIF_RET(make_map(mp_new)); + *ks = make_pos_bignum_header(unused_size - 1); + HRelease(p, vs + unused_size, vs); } - BIF_ERROR(BIF_P, BADARG); -} -/* maps:new/2 - */ -BIF_RETTYPE maps_new_0(BIF_ALIST_0) { - Eterm* hp; - Eterm tup; - map_t *mp; + n = n1 + n2 - unused_size; + *thp = make_arityval(n); - hp = HAlloc(BIF_P, (MAP_HEADER_SIZE + 1)); - tup = make_tuple(hp); - *hp++ = make_arityval(0); + /* Reshape map to a hashmap if the map exceeds the limit */ - mp = (map_t*)hp; - mp->thing_word = MAP_HEADER; - mp->size = 0; - mp->keys = tup; + if (n > MAP_SMALL_MAP_LIMIT) { + Uint32 hx,sw; + Uint i; + Eterm res; + hxnode_t *hxns; + ErtsHeapFactory factory; - BIF_RET(make_map(mp)); -} + ks = flatmap_get_keys(mp_new); + vs = flatmap_get_values(mp_new); -/* maps:put/3 - */ + hp = HAlloc(p, 2 * n); -Eterm erts_maps_put(Process *p, Eterm key, Eterm value, Eterm map) { - Sint n,i; - Sint c = 0; - Eterm* hp, *shp; - Eterm *ks,*vs, res, tup; - map_t *mp = (map_t*)map_val(map); + hxns = (hxnode_t *)erts_alloc(ERTS_ALC_T_TMP,n * sizeof(hxnode_t)); - n = map_get_size(mp); + for (i = 0; i < n; i++) { + hx = hashmap_make_hash(ks[i]); + swizzle32(sw,hx); + hxns[i].hx = sw; + hxns[i].val = CONS(hp, ks[i], vs[i]); hp += 2; + hxns[i].skip = 1; + hxns[i].i = i; + } - if (n == 0) { - hp = HAlloc(p, MAP_HEADER_SIZE + 1 + 2); - tup = make_tuple(hp); - *hp++ = make_arityval(1); - *hp++ = key; - res = make_map(hp); - *hp++ = MAP_HEADER; - *hp++ = 1; - *hp++ = tup; - *hp++ = value; + factory.p = p; + res = hashmap_from_unsorted_array(&factory, hxns, n, 0); + + erts_free(ERTS_ALC_T_TMP, (void *) hxns); + ERTS_VERIFY_UNUSED_TEMP_ALLOC(p); return res; } - ks = map_get_keys(mp); - vs = map_get_values(mp); + mp_new->size = n; - /* only allocate for values, - * assume key-tuple will be intact + return make_flatmap(mp_new); +} + +static Eterm map_merge_mixed(Process *p, Eterm flat, Eterm tree, int swap_args) { + Eterm *ks, *vs, *hp, res; + flatmap_t *mp; + Uint n, i; + hxnode_t *hxns; + Uint32 sw, hx; + ErtsHeapFactory factory; + + /* convert flat to tree */ + + ASSERT(is_flatmap(flat)); + ASSERT(is_hashmap(tree)); + + mp = (flatmap_t*)flatmap_val(flat); + n = flatmap_get_size(mp); + + ks = flatmap_get_keys(mp); + vs = flatmap_get_values(mp); + + hp = HAlloc(p, 2 * n); + + hxns = (hxnode_t *)erts_alloc(ERTS_ALC_T_TMP, n * sizeof(hxnode_t)); + + for (i = 0; i < n; i++) { + hx = hashmap_make_hash(ks[i]); + swizzle32(sw,hx); + hxns[i].hx = sw; + hxns[i].val = CONS(hp, ks[i], vs[i]); hp += 2; + hxns[i].skip = 1; + hxns[i].i = i; + } + + factory.p = p; + res = hashmap_from_unsorted_array(&factory, hxns, n, 0); + + erts_free(ERTS_ALC_T_TMP, (void *) hxns); + ERTS_VERIFY_UNUSED_TEMP_ALLOC(p); + + return swap_args ? hashmap_merge(p, tree, res) : hashmap_merge(p, res, tree); +} + +#define HALLOC_EXTRA 200 + +static Eterm hashmap_merge(Process *p, Eterm nodeA, Eterm nodeB) { +#define PSTACK_TYPE struct HashmapMergePStackType + struct HashmapMergePStackType { + Eterm *srcA, *srcB; + Uint32 abm, bbm, rbm; /* node bitmaps */ + int keepA; + int ix; + Eterm array[16]; + }; + PSTACK_DECLARE(s, 4); + struct HashmapMergePStackType* sp = PSTACK_PUSH(s); + Eterm *hp, *nhp; + Eterm hdrA, hdrB; + Eterm th[2]; + Uint32 ahx, bhx; + Uint size; /* total key-value counter */ + int keepA = 0; + unsigned lvl = 0; + Eterm res = THE_NON_VALUE; + + /* + * Strategy: Do depth-first traversal of both trees (at the same time) + * and merge each pair of nodes. */ - hp = HAlloc(p, MAP_HEADER_SIZE + n); - shp = hp; /* save hp, used if optimistic update fails */ - res = make_map(hp); - *hp++ = MAP_HEADER; - *hp++ = n; - *hp++ = mp->keys; - - if (is_immed(key)) { - for( i = 0; i < n; i ++) { - if (ks[i] == key) { - *hp++ = value; - vs++; - c = 1; + { + hashmap_head_t* a = (hashmap_head_t*) hashmap_val(nodeA); + hashmap_head_t* b = (hashmap_head_t*) hashmap_val(nodeB); + size = a->size + b->size; + } + +recurse: + + if (primary_tag(nodeA) == TAG_PRIMARY_BOXED && + primary_tag(nodeB) == TAG_PRIMARY_LIST) { + /* Avoid implementing this combination by switching places */ + Eterm tmp = nodeA; + nodeA = nodeB; + nodeB = tmp; + keepA = !keepA; + } + + switch (primary_tag(nodeA)) { + case TAG_PRIMARY_LIST: { + sp->srcA = list_val(nodeA); + switch (primary_tag(nodeB)) { + case TAG_PRIMARY_LIST: { /* LEAF + LEAF */ + sp->srcB = list_val(nodeB); + + if (EQ(CAR(sp->srcA), CAR(sp->srcB))) { + --size; + res = keepA ? nodeA : nodeB; } else { - *hp++ = *vs++; + ahx = hashmap_restore_hash(th, lvl, CAR(sp->srcA)); + bhx = hashmap_restore_hash(th, lvl, CAR(sp->srcB)); + sp->abm = 1 << hashmap_index(ahx); + sp->bbm = 1 << hashmap_index(bhx); + + sp->srcA = &nodeA; + sp->srcB = &nodeB; } + break; } - } else { - for( i = 0; i < n; i ++) { - if (EQ(ks[i], key)) { - *hp++ = value; - vs++; - c = 1; - } else { - *hp++ = *vs++; + case TAG_PRIMARY_BOXED: { /* LEAF + NODE */ + sp->srcB = boxed_val(nodeB); + ASSERT(is_header(*sp->srcB)); + hdrB = *sp->srcB++; + + ahx = hashmap_restore_hash(th, lvl, CAR(sp->srcA)); + sp->abm = 1 << hashmap_index(ahx); + sp->srcA = &nodeA; + switch(hdrB & _HEADER_MAP_SUBTAG_MASK) { + case HAMT_SUBTAG_HEAD_ARRAY: sp->srcB++; + case HAMT_SUBTAG_NODE_ARRAY: + sp->bbm = 0xffff; + break; + + case HAMT_SUBTAG_HEAD_BITMAP: sp->srcB++; + case HAMT_SUBTAG_NODE_BITMAP: + sp->bbm = MAP_HEADER_VAL(hdrB); + break; + + default: + erl_exit(1, "bad header tag %ld\r\n", *sp->srcB & _HEADER_MAP_SUBTAG_MASK); + break; } + break; + } + default: + erl_exit(1, "bad primary tag %ld\r\n", nodeB); } + break; } + case TAG_PRIMARY_BOXED: { /* NODE + NODE */ + sp->srcA = boxed_val(nodeA); + hdrA = *sp->srcA++; + ASSERT(is_header(hdrA)); + switch (hdrA & _HEADER_MAP_SUBTAG_MASK) { + case HAMT_SUBTAG_HEAD_ARRAY: sp->srcA++; + case HAMT_SUBTAG_NODE_ARRAY: { + ASSERT(primary_tag(nodeB) == TAG_PRIMARY_BOXED); + sp->abm = 0xffff; + sp->srcB = boxed_val(nodeB); + hdrB = *sp->srcB++; + ASSERT(is_header(hdrB)); + switch (hdrB & _HEADER_MAP_SUBTAG_MASK) { + case HAMT_SUBTAG_HEAD_ARRAY: sp->srcB++; + case HAMT_SUBTAG_NODE_ARRAY: + sp->bbm = 0xffff; + break; + case HAMT_SUBTAG_HEAD_BITMAP: sp->srcB++; + case HAMT_SUBTAG_NODE_BITMAP: + sp->bbm = MAP_HEADER_VAL(hdrB); + break; + default: + erl_exit(1, "bad header tag %ld\r\n", *sp->srcB & _HEADER_MAP_SUBTAG_MASK); + } + break; + } + case HAMT_SUBTAG_HEAD_BITMAP: sp->srcA++; + case HAMT_SUBTAG_NODE_BITMAP: { + ASSERT(primary_tag(nodeB) == TAG_PRIMARY_BOXED); + sp->abm = MAP_HEADER_VAL(hdrA); + sp->srcB = boxed_val(nodeB); + hdrB = *sp->srcB++; + ASSERT(is_header(hdrB)); + switch (hdrB & _HEADER_MAP_SUBTAG_MASK) { + case HAMT_SUBTAG_HEAD_ARRAY: sp->srcB++; + case HAMT_SUBTAG_NODE_ARRAY: + sp->bbm = 0xffff; + break; + case HAMT_SUBTAG_HEAD_BITMAP: sp->srcB++; + case HAMT_SUBTAG_NODE_BITMAP: + sp->bbm = MAP_HEADER_VAL(hdrB); + break; - if (c) - return res; + default: + erl_exit(1, "bad header tag %ld\r\n", *sp->srcB & _HEADER_MAP_SUBTAG_MASK); + } + break; + } + default: + erl_exit(1, "bad primary tag %ld\r\n", nodeA); + } + break; + } + default: + erl_exit(1, "bad primary tag %ld\r\n", nodeA); + } - /* need to make a new tuple, - * use old hp since it needs to be recreated anyway. - */ - tup = make_tuple(shp); - *shp++ = make_arityval(n+1); + for (;;) { + if (is_value(res)) { /* We have a complete (sub-)tree or leaf */ + if (lvl == 0) + break; + + /* Pop from stack and continue build parent node */ + lvl--; + sp = PSTACK_POP(s); + sp->array[sp->ix++] = res; + res = THE_NON_VALUE; + if (sp->rbm) { + sp->srcA++; + sp->srcB++; + keepA = sp->keepA; + } + } else { /* Start build a node */ + sp->ix = 0; + sp->rbm = sp->abm | sp->bbm; + ASSERT(!(sp->rbm == 0 && lvl > 0)); + } - hp = HAlloc(p, 3 + n + 1); - res = make_map(hp); - *hp++ = MAP_HEADER; - *hp++ = n + 1; - *hp++ = tup; + while (sp->rbm) { + Uint32 next = sp->rbm & (sp->rbm-1); + Uint32 bit = sp->rbm ^ next; + sp->rbm = next; + if (sp->abm & bit) { + if (sp->bbm & bit) { + /* Bit clash. Push and resolve by recursive merge */ + if (sp->rbm) { + sp->keepA = keepA; + } + nodeA = *sp->srcA; + nodeB = *sp->srcB; + lvl++; + sp = PSTACK_PUSH(s); + goto recurse; + } else { + sp->array[sp->ix++] = *sp->srcA++; + } + } else { + ASSERT(sp->bbm & bit); + sp->array[sp->ix++] = *sp->srcB++; + } + } - ks = map_get_keys(mp); - vs = map_get_values(mp); + ASSERT(sp->ix == hashmap_bitcount(sp->abm | sp->bbm)); + if (lvl == 0) { + nhp = HAllocX(p, HAMT_HEAD_BITMAP_SZ(sp->ix), HALLOC_EXTRA); + hp = nhp; + *hp++ = (sp->ix == 16 ? MAP_HEADER_HAMT_HEAD_ARRAY + : MAP_HEADER_HAMT_HEAD_BITMAP(sp->abm | sp->bbm)); + *hp++ = size; + } else { + nhp = HAllocX(p, HAMT_NODE_BITMAP_SZ(sp->ix), HALLOC_EXTRA); + hp = nhp; + *hp++ = (sp->ix == 16 ? make_arityval(16) + : MAP_HEADER_HAMT_NODE_BITMAP(sp->abm | sp->bbm)); + } + memcpy(hp, sp->array, sp->ix * sizeof(Eterm)); + res = make_boxed(nhp); + } + PSTACK_DESTROY(s); + return res; +} - ASSERT(n >= 0); +static int hash_cmp(Uint32 ha, Uint32 hb) +{ + int i; + for (i=0; i<8; i++) { + int cmp = (int)(ha & 0xF) - (int)(hb & 0xF); + if (cmp) + return cmp; + ha >>= 4; + hb >>= 4; + } + return 0; +} - /* copy map in order */ - while (n && ((c = CMP_TERM(*ks, key)) < 0)) { - *shp++ = *ks++; - *hp++ = *vs++; - n--; +int hashmap_key_hash_cmp(Eterm* ap, Eterm* bp) +{ + Eterm th[2]; + unsigned lvl = 0; + + if (ap && bp) { + ASSERT(CMP_TERM(CAR(ap), CAR(bp)) != 0); + for (;;) { + Uint32 ha = hashmap_restore_hash(th, lvl, CAR(ap)); + Uint32 hb = hashmap_restore_hash(th, lvl, CAR(bp)); + int cmp = hash_cmp(ha, hb); + if (cmp) + return cmp; + lvl += 8; + } } + return ap ? -1 : 1; +} - *shp++ = key; - *hp++ = value; +/* maps:new/0 */ - ASSERT(n >= 0); +BIF_RETTYPE maps_new_0(BIF_ALIST_0) { + Eterm* hp; + Eterm tup; + flatmap_t *mp; - while(n--) { - *shp++ = *ks++; - *hp++ = *vs++; - } - /* we have one word remaining - * this will work out fine once we get the size word - * in the header. - */ - *shp = make_pos_bignum_header(0); - return res; + hp = HAlloc(BIF_P, (MAP_HEADER_SIZE + 1)); + tup = make_tuple(hp); + *hp++ = make_arityval(0); + + mp = (flatmap_t*)hp; + mp->thing_word = MAP_HEADER; + mp->size = 0; + mp->keys = tup; + + BIF_RET(make_flatmap(mp)); } +/* maps:put/3 */ + BIF_RETTYPE maps_put_3(BIF_ALIST_3) { if (is_map(BIF_ARG_3)) { BIF_RET(erts_maps_put(BIF_P, BIF_ARG_1, BIF_ARG_2, BIF_ARG_3)); @@ -598,81 +1366,87 @@ BIF_RETTYPE maps_put_3(BIF_ALIST_3) { BIF_ERROR(BIF_P, BADARG); } -/* maps:remove/3 - */ +/* maps:remove/3 */ int erts_maps_remove(Process *p, Eterm key, Eterm map, Eterm *res) { - Sint n; - Uint need; - Eterm *hp_start; - Eterm *thp, *mhp; - Eterm *ks, *vs, tup; - map_t *mp = (map_t*)map_val(map); + Uint32 hx; + if (is_flatmap(map)) { + Sint n; + Uint need; + Eterm *hp_start; + Eterm *thp, *mhp; + Eterm *ks, *vs, tup; + flatmap_t *mp = (flatmap_t*)flatmap_val(map); + + n = flatmap_get_size(mp); + + if (n == 0) { + *res = map; + return 1; + } - n = map_get_size(mp); + ks = flatmap_get_keys(mp); + vs = flatmap_get_values(mp); - if (n == 0) { - *res = map; - return 1; - } + /* Assume key exists. + * Release allocated if it didn't. + * Allocate key tuple first. + */ - ks = map_get_keys(mp); - vs = map_get_values(mp); + need = n + 1 - 1 + 3 + n - 1; /* tuple - 1 + map - 1 */ + hp_start = HAlloc(p, need); + thp = hp_start; + mhp = thp + n; /* offset with tuple heap size */ - /* Assume key exists. - * Release allocated if it didn't. - * Allocate key tuple first. - */ + tup = make_tuple(thp); + *thp++ = make_arityval(n - 1); - need = n + 1 - 1 + 3 + n - 1; /* tuple - 1 + map - 1 */ - hp_start = HAlloc(p, need); - thp = hp_start; - mhp = thp + n; /* offset with tuple heap size */ + *res = make_flatmap(mhp); + *mhp++ = MAP_HEADER; + *mhp++ = n - 1; + *mhp++ = tup; - tup = make_tuple(thp); - *thp++ = make_arityval(n - 1); - - *res = make_map(mhp); - *mhp++ = MAP_HEADER; - *mhp++ = n - 1; - *mhp++ = tup; - - if (is_immed(key)) { - while (1) { - if (*ks == key) { - goto found_key; - } else if (--n) { - *mhp++ = *vs++; - *thp++ = *ks++; - } else - break; - } - } else { - while(1) { - if (EQ(*ks, key)) { - goto found_key; - } else if (--n) { - *mhp++ = *vs++; - *thp++ = *ks++; - } else - break; + if (is_immed(key)) { + while (1) { + if (*ks == key) { + goto found_key; + } else if (--n) { + *mhp++ = *vs++; + *thp++ = *ks++; + } else + break; + } + } else { + while(1) { + if (EQ(*ks, key)) { + goto found_key; + } else if (--n) { + *mhp++ = *vs++; + *thp++ = *ks++; + } else + break; + } } - } - /* Not found, remove allocated memory - * and return previous map. - */ - HRelease(p, hp_start + need, hp_start); + /* Not found, remove allocated memory + * and return previous map. + */ + HRelease(p, hp_start + need, hp_start); - *res = map; - return 1; + *res = map; + return 1; found_key: - /* Copy rest of keys and values */ - if (--n) { - sys_memcpy(mhp, vs+1, n*sizeof(Eterm)); - sys_memcpy(thp, ks+1, n*sizeof(Eterm)); + /* Copy rest of keys and values */ + if (--n) { + sys_memcpy(mhp, vs+1, n*sizeof(Eterm)); + sys_memcpy(thp, ks+1, n*sizeof(Eterm)); + } + return 1; } + ASSERT(is_hashmap(map)); + hx = hashmap_make_hash(key); + *res = hashmap_delete(p, hx, key, map); return 1; } @@ -686,21 +1460,20 @@ BIF_RETTYPE maps_remove_2(BIF_ALIST_2) { BIF_ERROR(BIF_P, BADARG); } -/* maps:update/3 - */ - int erts_maps_update(Process *p, Eterm key, Eterm value, Eterm map, Eterm *res) { + Uint32 hx; + if (is_flatmap(map)) { Sint n,i; Eterm* hp,*shp; Eterm *ks,*vs; - map_t *mp = (map_t*)map_val(map); + flatmap_t *mp = (flatmap_t*)flatmap_val(map); - if ((n = map_get_size(mp)) == 0) { + if ((n = flatmap_get_size(mp)) == 0) { return 0; } - ks = map_get_keys(mp); - vs = map_get_values(mp); + ks = flatmap_get_keys(mp); + vs = flatmap_get_values(mp); /* only allocate for values, * assume key-tuple will be intact @@ -738,10 +1511,147 @@ found_key: vs++; if (++i < n) sys_memcpy(hp, vs, (n - i)*sizeof(Eterm)); - *res = make_map(shp); + *res = make_flatmap(shp); + return 1; + } + + ASSERT(is_hashmap(map)); + hx = hashmap_make_hash(key); + *res = erts_hashmap_insert(p, hx, key, value, map, 1); + if (is_value(*res)) return 1; + + return 0; +} + +Eterm erts_maps_put(Process *p, Eterm key, Eterm value, Eterm map) { + Uint32 hx; + Eterm res; + if (is_flatmap(map)) { + Sint n,i; + Sint c = 0; + Eterm* hp, *shp; + Eterm *ks, *vs, tup; + flatmap_t *mp = (flatmap_t*)flatmap_val(map); + + n = flatmap_get_size(mp); + + if (n == 0) { + hp = HAlloc(p, MAP_HEADER_SIZE + 1 + 2); + tup = make_tuple(hp); + *hp++ = make_arityval(1); + *hp++ = key; + res = make_flatmap(hp); + *hp++ = MAP_HEADER; + *hp++ = 1; + *hp++ = tup; + *hp++ = value; + + return res; + } + + ks = flatmap_get_keys(mp); + vs = flatmap_get_values(mp); + + /* only allocate for values, + * assume key-tuple will be intact + */ + + hp = HAlloc(p, MAP_HEADER_SIZE + n); + shp = hp; /* save hp, used if optimistic update fails */ + res = make_flatmap(hp); + *hp++ = MAP_HEADER; + *hp++ = n; + *hp++ = mp->keys; + + if (is_immed(key)) { + for( i = 0; i < n; i ++) { + if (ks[i] == key) { + *hp++ = value; + vs++; + c = 1; + } else { + *hp++ = *vs++; + } + } + } else { + for( i = 0; i < n; i ++) { + if (EQ(ks[i], key)) { + *hp++ = value; + vs++; + c = 1; + } else { + *hp++ = *vs++; + } + } + } + + if (c) + return res; + + /* the map will grow */ + + if (n >= MAP_SMALL_MAP_LIMIT) { + HRelease(p, shp + MAP_HEADER_SIZE + n, shp); + ks = flatmap_get_keys(mp); + vs = flatmap_get_values(mp); + + res = erts_hashmap_from_ks_and_vs_extra(p,ks,vs,n,key,value); + + return res; + } + + /* still a small map. need to make a new tuple, + * use old hp since it needs to be recreated anyway. */ + + tup = make_tuple(shp); + *shp++ = make_arityval(n+1); + + hp = HAlloc(p, 3 + n + 1); + res = make_flatmap(hp); + *hp++ = MAP_HEADER; + *hp++ = n + 1; + *hp++ = tup; + + ks = flatmap_get_keys(mp); + vs = flatmap_get_values(mp); + + ASSERT(n >= 0); + + /* copy map in order */ + while (n && ((c = CMP_TERM(*ks, key)) < 0)) { + *shp++ = *ks++; + *hp++ = *vs++; + n--; + } + + *shp++ = key; + *hp++ = value; + + ASSERT(n >= 0); + + while(n--) { + *shp++ = *ks++; + *hp++ = *vs++; + } + /* we have one word remaining + * this will work out fine once we get the size word + * in the header. + */ + *shp = make_pos_bignum_header(0); + return res; + } + ASSERT(is_hashmap(map)); + + hx = hashmap_make_hash(key); + res = erts_hashmap_insert(p, hx, key, value, map, 0); + ASSERT(is_hashmap(res)); + + return res; } +/* maps:update/3 */ + BIF_RETTYPE maps_update_3(BIF_ALIST_3) { if (is_map(BIF_ARG_3)) { Eterm res; @@ -753,38 +1663,845 @@ BIF_RETTYPE maps_update_3(BIF_ALIST_3) { } -/* maps:values/1 - */ +/* maps:values/1 */ BIF_RETTYPE maps_values_1(BIF_ALIST_1) { - if (is_map(BIF_ARG_1)) { + if (is_flatmap(BIF_ARG_1)) { Eterm *hp, *vs, res = NIL; - map_t *mp; + flatmap_t *mp; Uint n; - mp = (map_t*)map_val(BIF_ARG_1); - n = map_get_size(mp); + mp = (flatmap_t*)flatmap_val(BIF_ARG_1); + n = flatmap_get_size(mp); if (n == 0) BIF_RET(res); hp = HAlloc(BIF_P, (2 * n)); - vs = map_get_values(mp); + vs = flatmap_get_values(mp); while(n--) { res = CONS(hp, vs[n], res); hp += 2; } BIF_RET(res); + } else if (is_hashmap(BIF_ARG_1)) { + BIF_RET(hashmap_values(BIF_P, BIF_ARG_1)); } BIF_ERROR(BIF_P, BADARG); } -int erts_validate_and_sort_map(map_t* mp) +static Eterm hashmap_to_list(Process *p, Eterm node) { + DECLARE_WSTACK(stack); + Eterm *hp, *kv; + Eterm res = NIL; + + hp = HAlloc(p, hashmap_size(node) * (2 + 3)); + hashmap_iterator_init(&stack, node, 0); + while ((kv=hashmap_iterator_next(&stack)) != NULL) { + Eterm tup = TUPLE2(hp, CAR(kv), CDR(kv)); + hp += 3; + res = CONS(hp, tup, res); + hp += 2; + } + DESTROY_WSTACK(stack); + return res; +} + +void hashmap_iterator_init(ErtsWStack* s, Eterm node, int reverse) { + Eterm hdr = *hashmap_val(node); + Uint sz; + + switch(hdr & _HEADER_MAP_SUBTAG_MASK) { + case HAMT_SUBTAG_HEAD_ARRAY: + sz = 16; + break; + case HAMT_SUBTAG_HEAD_BITMAP: + sz = hashmap_bitcount(MAP_HEADER_VAL(hdr)); + break; + default: + erl_exit(1, "bad header"); + } + + WSTACK_PUSH3((*s), (UWord)THE_NON_VALUE, /* end marker */ + (UWord)(!reverse ? 0 : sz+1), + (UWord)node); +} + +Eterm* hashmap_iterator_next(ErtsWStack* s) { + Eterm node, *ptr, hdr; + Uint32 sz; + Uint idx; + + for (;;) { + ASSERT(!WSTACK_ISEMPTY((*s))); + node = (Eterm) WSTACK_POP((*s)); + if (is_non_value(node)) { + return NULL; + } + idx = (Uint) WSTACK_POP((*s)); + for (;;) { + ASSERT(is_boxed(node)); + ptr = boxed_val(node); + hdr = *ptr; + ASSERT(is_header(hdr)); + switch(hdr & _HEADER_MAP_SUBTAG_MASK) { + case HAMT_SUBTAG_HEAD_ARRAY: + ptr++; + case HAMT_SUBTAG_NODE_ARRAY: + sz = 16; + break; + case HAMT_SUBTAG_HEAD_BITMAP: + ptr++; + case HAMT_SUBTAG_NODE_BITMAP: + sz = hashmap_bitcount(MAP_HEADER_VAL(hdr)); + ASSERT(sz < 17); + break; + default: + erl_exit(1, "bad header"); + } + + idx++; + + if (idx <= sz) { + WSTACK_PUSH2((*s), (UWord)idx, (UWord)node); + + if (is_list(ptr[idx])) { + return list_val(ptr[idx]); + } + ASSERT(is_boxed(ptr[idx])); + node = ptr[idx]; + idx = 0; + } + else + break; /* and pop parent node */ + } + } +} + +Eterm* hashmap_iterator_prev(ErtsWStack* s) { + Eterm node, *ptr, hdr; + Uint32 sz; + Uint idx; + + for (;;) { + ASSERT(!WSTACK_ISEMPTY((*s))); + node = (Eterm) WSTACK_POP((*s)); + if (is_non_value(node)) { + return NULL; + } + idx = (Uint) WSTACK_POP((*s)); + for (;;) { + ASSERT(is_boxed(node)); + ptr = boxed_val(node); + hdr = *ptr; + ASSERT(is_header(hdr)); + switch(hdr & _HEADER_MAP_SUBTAG_MASK) { + case HAMT_SUBTAG_HEAD_ARRAY: + ptr++; + case HAMT_SUBTAG_NODE_ARRAY: + sz = 16; + break; + case HAMT_SUBTAG_HEAD_BITMAP: + ptr++; + case HAMT_SUBTAG_NODE_BITMAP: + sz = hashmap_bitcount(MAP_HEADER_VAL(hdr)); + ASSERT(sz < 17); + break; + default: + erl_exit(1, "bad header"); + } + + if (idx > sz) + idx = sz; + else + idx--; + + if (idx >= 1) { + WSTACK_PUSH2((*s), (UWord)idx, (UWord)node); + + if (is_list(ptr[idx])) { + return list_val(ptr[idx]); + } + ASSERT(is_boxed(ptr[idx])); + node = ptr[idx]; + idx = 17; + } + else + break; /* and pop parent node */ + } + } +} + +const Eterm * +#if HALFWORD_HEAP +erts_hashmap_get_rel(Uint32 hx, Eterm key, Eterm node, Eterm *map_base) +#else +erts_hashmap_get(Uint32 hx, Eterm key, Eterm node) +#endif { - Eterm *ks = map_get_keys(mp); - Eterm *vs = map_get_values(mp); - Uint sz = map_get_size(mp); + Eterm *ptr, hdr; + Uint ix,slot, lvl = 0; + Uint32 hval,bp; + DeclareTmpHeapNoproc(th,2); + UseTmpHeapNoproc(2); + + for (;;) { + switch(primary_tag(node)) { + case TAG_PRIMARY_LIST: /* LEAF NODE [K|V] */ + ptr = list_val(node); + UnUseTmpHeapNoproc(2); + + if (eq_rel(CAR(ptr), map_base, key, NULL)) { + return &(CDR(ptr)); + } + return NULL; + case TAG_PRIMARY_BOXED: + ptr = boxed_val(node); + hdr = *ptr; + ASSERT(is_header(hdr)); + + switch(hdr & _HEADER_MAP_SUBTAG_MASK) { + case HAMT_SUBTAG_NODE_ARRAY: + ix = hashmap_index(hx); + hx = hashmap_shift_hash(th,hx,lvl,key); + node = ptr[ix+1]; + break; + case HAMT_SUBTAG_HEAD_ARRAY: + ix = hashmap_index(hx); + hx = hashmap_shift_hash(th,hx,lvl,key); + node = ptr[ix+2]; + break; + case HAMT_SUBTAG_NODE_BITMAP: + hval = MAP_HEADER_VAL(hdr); + ix = hashmap_index(hx); + bp = 1 << ix; + slot = hashmap_bitcount(hval & (bp - 1)); + + /* occupied */ + if (bp & hval) { + hx = hashmap_shift_hash(th,hx,lvl,key); + node = ptr[slot+1]; + break; + } + /* not occupied */ + UnUseTmpHeapNoproc(2); + return NULL; + case HAMT_SUBTAG_HEAD_BITMAP: + hval = MAP_HEADER_VAL(hdr); + ix = hashmap_index(hx); + bp = 1 << ix; + slot = hashmap_bitcount(hval & (bp - 1)); + + /* occupied */ + if (bp & hval) { + hx = hashmap_shift_hash(th,hx,lvl,key); + node = ptr[slot+2]; + break; + } + /* not occupied */ + UnUseTmpHeapNoproc(2); + return NULL; + default: + erl_exit(1, "bad header tag %ld\r\n", hdr & _HEADER_MAP_SUBTAG_MASK); + break; + } + break; + default: + erl_exit(1, "bad primary tag %p\r\n", node); + break; + } + } + UnUseTmpHeapNoproc(2); + return NULL; +} + +Eterm erts_hashmap_insert(Process *p, Uint32 hx, Eterm key, Eterm value, + Eterm map, int is_update) { + Uint size, upsz; + Eterm *hp, res = THE_NON_VALUE; + DECLARE_ESTACK(stack); + if (erts_hashmap_insert_down(hx, key, map, &size, &upsz, &stack, is_update)) { + hp = HAlloc(p, size); + res = erts_hashmap_insert_up(hp, key, value, &upsz, &stack); + } + + DESTROY_ESTACK(stack); + ERTS_VERIFY_UNUSED_TEMP_ALLOC(p); + ERTS_HOLE_CHECK(p); + + return res; +} + + +int erts_hashmap_insert_down(Uint32 hx, Eterm key, Eterm node, Uint *sz, + Uint *update_size, ErtsEStack *sp, int is_update) { + Eterm *ptr; + Eterm hdr, ckey; + Eterm th[2]; + Uint32 ix, cix, bp, hval, chx; + Uint slot, lvl = 0, clvl; + Uint size = 0, n = 0; + + *update_size = 1; + + for (;;) { + switch(primary_tag(node)) { + case TAG_PRIMARY_LIST: /* LEAF NODE [K|V] */ + ptr = list_val(node); + ckey = CAR(ptr); + if (EQ(ckey, key)) { + *update_size = 0; + goto unroll; + } + if (is_update) { + return 0; + } + goto insert_subnodes; + case TAG_PRIMARY_BOXED: + ptr = boxed_val(node); + hdr = *ptr; + ASSERT(is_header(hdr)); + + switch(hdr & _HEADER_MAP_SUBTAG_MASK) { + case HAMT_SUBTAG_NODE_ARRAY: + ix = hashmap_index(hx); + hx = hashmap_shift_hash(th,hx,lvl,key); + size += HAMT_NODE_ARRAY_SZ; + ESTACK_PUSH2(*sp, ix, node); + node = ptr[ix+1]; + break; + case HAMT_SUBTAG_HEAD_ARRAY: + ix = hashmap_index(hx); + hx = hashmap_shift_hash(th,hx,lvl,key); + size += HAMT_HEAD_ARRAY_SZ; + ESTACK_PUSH2(*sp, ix, node); + node = ptr[ix+2]; + break; + case HAMT_SUBTAG_NODE_BITMAP: + hval = MAP_HEADER_VAL(hdr); + ix = hashmap_index(hx); + bp = 1 << ix; + slot = hashmap_bitcount(hval & (bp - 1)); + n = hashmap_bitcount(hval); + + ESTACK_PUSH(*sp, n); + ESTACK_PUSH3(*sp, bp, slot, node); + + /* occupied */ + if (bp & hval) { + hx = hashmap_shift_hash(th,hx,lvl,key); + node = ptr[slot+1]; + ASSERT(HAMT_NODE_BITMAP_SZ(n) <= 17); + size += HAMT_NODE_BITMAP_SZ(n); + break; + } + /* not occupied */ + if (is_update) { + return 0; + } + size += HAMT_NODE_BITMAP_SZ(n+1); + goto unroll; + case HAMT_SUBTAG_HEAD_BITMAP: + hval = MAP_HEADER_VAL(hdr); + ix = hashmap_index(hx); + bp = 1 << ix; + slot = hashmap_bitcount(hval & (bp - 1)); + n = hashmap_bitcount(hval); + + ESTACK_PUSH(*sp, n); + ESTACK_PUSH3(*sp, bp, slot, node); + + /* occupied */ + if (bp & hval) { + hx = hashmap_shift_hash(th,hx,lvl,key); + node = ptr[slot+2]; + ASSERT(HAMT_HEAD_BITMAP_SZ(n) <= 18); + size += HAMT_HEAD_BITMAP_SZ(n); + break; + } + /* not occupied */ + if (is_update) { + return 0; + } + size += HAMT_HEAD_BITMAP_SZ(n+1); + goto unroll; + default: + erl_exit(1, "bad header tag %ld\r\n", hdr & _HEADER_MAP_SUBTAG_MASK); + break; + } + break; + default: + erl_exit(1, "bad primary tag %p\r\n", node); + break; + } + } +insert_subnodes: + clvl = lvl; + chx = hashmap_restore_hash(th,clvl,ckey); + size += HAMT_NODE_BITMAP_SZ(2); + ix = hashmap_index(hx); + cix = hashmap_index(chx); + + while (cix == ix) { + ESTACK_PUSH(*sp, 0); + ESTACK_PUSH3(*sp, 1 << ix, 0, MAP_HEADER_HAMT_NODE_BITMAP(0)); + size += HAMT_NODE_BITMAP_SZ(1); + hx = hashmap_shift_hash(th,hx,lvl,key); + chx = hashmap_shift_hash(th,chx,clvl,ckey); + ix = hashmap_index(hx); + cix = hashmap_index(chx); + } + ESTACK_PUSH3(*sp, cix, ix, node); + +unroll: + *sz = size + /* res cons */ 2; + return 1; +} + +Eterm erts_hashmap_insert_up(Eterm *hp, Eterm key, Eterm value, + Uint *update_size, ErtsEStack *sp) { + Eterm node, *ptr, hdr; + Eterm res; + Eterm *nhp = NULL; + Uint32 ix, cix, bp, hval; + Uint slot, n; + /* Needed for halfword */ + DeclareTmpHeapNoproc(fake,1); + UseTmpHeapNoproc(1); + + res = CONS(hp, key, value); hp += 2; + + do { + node = ESTACK_POP(*sp); + switch(primary_tag(node)) { + case TAG_PRIMARY_LIST: + ix = (Uint32) ESTACK_POP(*sp); + cix = (Uint32) ESTACK_POP(*sp); + + nhp = hp; + *hp++ = MAP_HEADER_HAMT_NODE_BITMAP((1 << ix) | (1 << cix)); + if (ix < cix) { + *hp++ = res; + *hp++ = node; + } else { + *hp++ = node; + *hp++ = res; + } + res = make_hashmap(nhp); + break; + case TAG_PRIMARY_HEADER: + /* subnodes, fake it */ + *fake = node; + node = make_boxed(fake); + case TAG_PRIMARY_BOXED: + ptr = boxed_val(node); + hdr = *ptr; + ASSERT(is_header(hdr)); + + switch(hdr & _HEADER_MAP_SUBTAG_MASK) { + case HAMT_SUBTAG_NODE_ARRAY: + slot = (Uint) ESTACK_POP(*sp); + nhp = hp; + n = HAMT_NODE_ARRAY_SZ; + while(n--) { *hp++ = *ptr++; } + nhp[slot+1] = res; + res = make_hashmap(nhp); + break; + case HAMT_SUBTAG_HEAD_ARRAY: + slot = (Uint) ESTACK_POP(*sp); + nhp = hp; + n = HAMT_HEAD_ARRAY_SZ - 2; + *hp++ = MAP_HEADER_HAMT_HEAD_ARRAY; ptr++; + *hp++ = (*ptr++) + *update_size; + while(n--) { *hp++ = *ptr++; } + nhp[slot+2] = res; + res = make_hashmap(nhp); + break; + case HAMT_SUBTAG_NODE_BITMAP: + slot = (Uint) ESTACK_POP(*sp); + bp = (Uint32) ESTACK_POP(*sp); + n = (Uint32) ESTACK_POP(*sp); + hval = MAP_HEADER_VAL(hdr); + nhp = hp; + *hp++ = MAP_HEADER_HAMT_NODE_BITMAP(hval | bp); ptr++; + + n -= slot; + while(slot--) { *hp++ = *ptr++; } + *hp++ = res; + if (hval & bp) { ptr++; n--; } + while(n--) { *hp++ = *ptr++; } + + if ((hval | bp) == 0xffff) { + *nhp = make_arityval(16); + } + res = make_hashmap(nhp); + break; + case HAMT_SUBTAG_HEAD_BITMAP: + slot = (Uint) ESTACK_POP(*sp); + bp = (Uint32) ESTACK_POP(*sp); + n = (Uint32) ESTACK_POP(*sp); + hval = MAP_HEADER_VAL(hdr); + nhp = hp; + *hp++ = MAP_HEADER_HAMT_HEAD_BITMAP(hval | bp); ptr++; + *hp++ = (*ptr++) + *update_size; + + n -= slot; + while(slot--) { *hp++ = *ptr++; } + *hp++ = res; + if (hval & bp) { ptr++; n--; } + while(n--) { *hp++ = *ptr++; } + + if ((hval | bp) == 0xffff) { + *nhp = MAP_HEADER_HAMT_HEAD_ARRAY; + } + res = make_hashmap(nhp); + break; + default: + erl_exit(1, "bad header tag %x\r\n", hdr & _HEADER_MAP_SUBTAG_MASK); + break; + } + break; + default: + erl_exit(1, "bad primary tag %x\r\n", primary_tag(node)); + break; + } + + } while(!ESTACK_ISEMPTY(*sp)); + + UnUseTmpHeapNoproc(1); + return res; +} + +static Eterm hashmap_keys(Process* p, Eterm node) { + DECLARE_WSTACK(stack); + hashmap_head_t* root; + Eterm *hp, *kv; + Eterm res = NIL; + + root = (hashmap_head_t*) boxed_val(node); + hp = HAlloc(p, root->size * 2); + hashmap_iterator_init(&stack, node, 0); + while ((kv=hashmap_iterator_next(&stack)) != NULL) { + res = CONS(hp, CAR(kv), res); + hp += 2; + } + DESTROY_WSTACK(stack); + return res; +} + +static Eterm hashmap_values(Process* p, Eterm node) { + DECLARE_WSTACK(stack); + hashmap_head_t* root; + Eterm *hp, *kv; + Eterm res = NIL; + + root = (hashmap_head_t*) boxed_val(node); + hp = HAlloc(p, root->size * 2); + hashmap_iterator_init(&stack, node, 0); + while ((kv=hashmap_iterator_next(&stack)) != NULL) { + res = CONS(hp, CDR(kv), res); + hp += 2; + } + DESTROY_WSTACK(stack); + return res; +} + +static Eterm hashmap_delete(Process *p, Uint32 hx, Eterm key, Eterm map) { + Eterm *hp = NULL, *nhp = NULL, *hp_end = NULL; + Eterm th[2]; + Eterm *ptr; + Eterm hdr, res = map, node = map; + Uint32 ix, bp, hval; + Uint slot, lvl = 0; + Uint size = 0, n = 0; + DECLARE_ESTACK(stack); + + for (;;) { + switch(primary_tag(node)) { + case TAG_PRIMARY_LIST: + if (EQ(CAR(list_val(node)), key)) { + goto unroll; + } + goto not_found; + case TAG_PRIMARY_BOXED: + ptr = boxed_val(node); + hdr = *ptr; + ASSERT(is_header(hdr)); + + switch(hdr & _HEADER_MAP_SUBTAG_MASK) { + case HAMT_SUBTAG_NODE_ARRAY: + ix = hashmap_index(hx); + hx = hashmap_shift_hash(th,hx,lvl,key); + size += HAMT_NODE_ARRAY_SZ; + ESTACK_PUSH2(stack, ix, node); + node = ptr[ix+1]; + break; + case HAMT_SUBTAG_HEAD_ARRAY: + ix = hashmap_index(hx); + hx = hashmap_shift_hash(th,hx,lvl,key); + size += HAMT_HEAD_ARRAY_SZ; + ESTACK_PUSH2(stack, ix, node); + node = ptr[ix+2]; + break; + case HAMT_SUBTAG_NODE_BITMAP: + hval = MAP_HEADER_VAL(hdr); + ix = hashmap_index(hx); + bp = 1 << ix; + slot = hashmap_bitcount(hval & (bp - 1)); + n = hashmap_bitcount(hval); + + ESTACK_PUSH(stack, n); + ESTACK_PUSH3(stack, bp, slot, node); + + /* occupied */ + if (bp & hval) { + hx = hashmap_shift_hash(th,hx,lvl,key); + node = ptr[slot+1]; + ASSERT(HAMT_NODE_BITMAP_SZ(n) <= 17); + size += HAMT_NODE_BITMAP_SZ(n); + break; + } + /* not occupied */ + goto not_found; + case HAMT_SUBTAG_HEAD_BITMAP: + hval = MAP_HEADER_VAL(hdr); + ix = hashmap_index(hx); + bp = 1 << ix; + slot = hashmap_bitcount(hval & (bp - 1)); + n = hashmap_bitcount(hval); + + ESTACK_PUSH(stack, n); + ESTACK_PUSH3(stack, bp, slot, node); + + /* occupied */ + if (bp & hval) { + hx = hashmap_shift_hash(th,hx,lvl,key); + node = ptr[slot+2]; + ASSERT(HAMT_HEAD_BITMAP_SZ(n) <= 18); + size += HAMT_HEAD_BITMAP_SZ(n); + break; + } + /* not occupied */ + goto not_found; + default: + erl_exit(1, "bad header tag %ld\r\n", hdr & _HEADER_MAP_SUBTAG_MASK); + break; + } + break; + default: + erl_exit(1, "bad primary tag %p\r\n", node); + break; + } + } + +unroll: + /* the size is bounded and atleast one less than the previous size */ + size -= 1; + n = hashmap_size(map) - 1; + + if (n <= MAP_SMALL_MAP_LIMIT) { + DECLARE_WSTACK(wstack); + Eterm *kv, *ks, *vs; + flatmap_t *mp; + Eterm keys; + + DESTROY_ESTACK(stack); + + /* build flat structure */ + hp = HAlloc(p, 3 + 1 + (2 * n)); + keys = make_tuple(hp); + *hp++ = make_arityval(n); + ks = hp; + hp += n; + mp = (flatmap_t*)hp; + hp += MAP_HEADER_SIZE; + vs = hp; + + mp->thing_word = MAP_HEADER; + mp->size = n; + mp->keys = keys; + + hashmap_iterator_init(&wstack, map, 0); + + while ((kv=hashmap_iterator_next(&wstack)) != NULL) { + if (EQ(CAR(kv),key)) + continue; + *ks++ = CAR(kv); + *vs++ = CDR(kv); + } + + /* it cannot have multiple keys */ + erts_validate_and_sort_flatmap(mp); + + DESTROY_WSTACK(wstack); + return make_flatmap(mp); + } + + hp = HAlloc(p, size); + hp_end = hp + size; + res = THE_NON_VALUE; + + do { + node = ESTACK_POP(stack); + + /* all nodes are things */ + ptr = boxed_val(node); + hdr = *ptr; + ASSERT(is_header(hdr)); + + switch(hdr & _HEADER_MAP_SUBTAG_MASK) { + case HAMT_SUBTAG_NODE_ARRAY: + ix = (Uint) ESTACK_POP(stack); + nhp = hp; + if (res == THE_NON_VALUE) { + *hp++ = MAP_HEADER_HAMT_NODE_BITMAP(0xffff ^ (1 << ix)); ptr++; + n = 16; + n -= ix; + while(ix--) { *hp++ = *ptr++; } + ptr++; n--; + while(n--) { *hp++ = *ptr++; } + res = make_hashmap(nhp); + } else { + n = HAMT_NODE_ARRAY_SZ; + while(n--) { *hp++ = *ptr++; } + nhp[ix+1] = res; + res = make_hashmap(nhp); + } + break; + case HAMT_SUBTAG_HEAD_ARRAY: + ix = (Uint) ESTACK_POP(stack); + nhp = hp; + if (res == THE_NON_VALUE) { + n = 16; + n -= ix; + *hp++ = MAP_HEADER_HAMT_HEAD_BITMAP(0xffff ^ (1 << ix)); ptr++; + *hp++ = (*ptr++) - 1; + while(ix--) { *hp++ = *ptr++; } + ptr++; n--; + while(n--) { *hp++ = *ptr++; } + res = make_hashmap(nhp); + } else { + n = 16; + *hp++ = MAP_HEADER_HAMT_HEAD_ARRAY; ptr++; + *hp++ = (*ptr++) - 1; + while(n--) { *hp++ = *ptr++; } + nhp[ix+2] = res; + res = make_hashmap(nhp); + } + break; + case HAMT_SUBTAG_NODE_BITMAP: + slot = (Uint) ESTACK_POP(stack); + bp = (Uint32) ESTACK_POP(stack); + n = (Uint32) ESTACK_POP(stack); + nhp = hp; + + /* bitmap change matrix + * res | none leaf bitmap + * ---------------------------- + * n=1 | remove remove keep + * n=2 | other keep keep + * n>2 | shrink keep keep + * + * other: (remember, n is 2) + * shrink if the other bitmap value is a bitmap node + * remove if the other bitmap value is a leaf + * + * remove: + * this bitmap node is removed, res is moved up in tree (could be none) + * this is a special case of shrink + * + * keep: + * the current path index is still used down in the tree, need to keep it + * copy as usual with the updated res + * + * shrink: + * the current path index is no longer used down in the tree, remove it (shrink) + */ + if (res == THE_NON_VALUE) { + if (n == 1) { + break; + } else if (n == 2) { + if (slot == 0) { + ix = 2; /* off by one 'cause hdr */ + } else { + ix = 1; /* off by one 'cause hdr */ + } + if (primary_tag(ptr[ix]) == TAG_PRIMARY_LIST) { + res = ptr[ix]; + } else { + hval = MAP_HEADER_VAL(hdr); + *hp++ = MAP_HEADER_HAMT_NODE_BITMAP(hval ^ bp); + *hp++ = ptr[ix]; + res = make_hashmap(nhp); + } + } else { + /* n > 2 */ + hval = MAP_HEADER_VAL(hdr); + *hp++ = MAP_HEADER_HAMT_NODE_BITMAP(hval ^ bp); ptr++; + n -= slot; + while(slot--) { *hp++ = *ptr++; } + ptr++; n--; + while(n--) { *hp++ = *ptr++; } + res = make_hashmap(nhp); + } + } else if (primary_tag(res) == TAG_PRIMARY_LIST && n == 1) { + break; + } else { + /* res is bitmap or leaf && n > 1, keep */ + n -= slot; + *hp++ = *ptr++; + while(slot--) { *hp++ = *ptr++; } + *hp++ = res; + ptr++; n--; + while(n--) { *hp++ = *ptr++; } + res = make_hashmap(nhp); + } + break; + case HAMT_SUBTAG_HEAD_BITMAP: + slot = (Uint) ESTACK_POP(stack); + bp = (Uint32) ESTACK_POP(stack); + n = (Uint32) ESTACK_POP(stack); + nhp = hp; + + if (res != THE_NON_VALUE) { + *hp++ = *ptr++; + *hp++ = (*ptr++) - 1; + n -= slot; + while(slot--) { *hp++ = *ptr++; } + *hp++ = res; + ptr++; n--; + while(n--) { *hp++ = *ptr++; } + } else { + hval = MAP_HEADER_VAL(hdr); + *hp++ = MAP_HEADER_HAMT_HEAD_BITMAP(hval ^ bp); ptr++; + *hp++ = (*ptr++) - 1; + n -= slot; + while(slot--) { *hp++ = *ptr++; } + ptr++; n--; + while(n--) { *hp++ = *ptr++; } + } + res = make_hashmap(nhp); + break; + default: + erl_exit(1, "bad header tag %x\r\n", hdr & _HEADER_MAP_SUBTAG_MASK); + break; + } + } while(!ESTACK_ISEMPTY(stack)); + HRelease(p, hp_end, hp); +not_found: + DESTROY_ESTACK(stack); + ERTS_VERIFY_UNUSED_TEMP_ALLOC(p); + ERTS_HOLE_CHECK(p); + return res; +} + + +int erts_validate_and_sort_flatmap(flatmap_t* mp) +{ + Eterm *ks = flatmap_get_keys(mp); + Eterm *vs = flatmap_get_values(mp); + Uint sz = flatmap_get_size(mp); Uint ix,jx; Eterm tmp; int c; @@ -811,6 +2528,50 @@ int erts_validate_and_sort_map(map_t* mp) return 1; } +/* Really rough estimate of sqrt(x) + * Guaranteed not to be less than sqrt(x) + */ +static int int_sqrt_ceiling(Uint x) +{ + int n; + + if (x <= 2) + return x; + + n = erts_fit_in_bits_uint(x-1); + if (n & 1) { + /* Calc: sqrt(2^n) = 2^(n/2) * sqrt(2) ~= 2^(n/2) * 3 / 2 */ + return (1 << (n/2 - 1)) * 3; + } + else { + /* Calc: sqrt(2^n) = 2^(n/2) */ + return 1 << (n / 2); + } +} + +Uint hashmap_over_estimated_heap_size(Uint n) +{ + /* n is nr of key-value pairs. + Average nr of nodes is about n/3. + Standard deviation is about sqrt(n)/3. + Assuming normal probability distribution, + we overestimate nr of nodes by 14 std.devs, which gives a probability + for overrun of 1.0e-49 (same magnitude as a git SHA1 collision). + */ + Uint nodes = (n + int_sqrt_ceiling(n)*14) / 3; + return (n*2 + /* leaf cons cells */ + n + /* leaf list terms */ + nodes*2); /* headers + parent refs */ +} + + +BIF_RETTYPE erts_debug_map_info_1(BIF_ALIST_1) { + if (is_hashmap(BIF_ARG_1)) { + BIF_RET(hashmap_info(BIF_P,BIF_ARG_1)); + } + BIF_ERROR(BIF_P, BADARG); +} + /* * erts_internal:map_to_tuple_keys/1 * @@ -818,9 +2579,233 @@ int erts_validate_and_sort_map(map_t* mp) */ BIF_RETTYPE erts_internal_map_to_tuple_keys_1(BIF_ALIST_1) { - if (is_map(BIF_ARG_1)) { - map_t *mp = (map_t*)map_val(BIF_ARG_1); + if (is_flatmap(BIF_ARG_1)) { + flatmap_t *mp = (flatmap_t*)flatmap_val(BIF_ARG_1); BIF_RET(mp->keys); } BIF_ERROR(BIF_P, BADARG); } + +/* + * erts_internal:map_type/1 + * + * Used in erts_debug:size/1 + */ + +BIF_RETTYPE erts_internal_map_type_1(BIF_ALIST_1) { + DECL_AM(hashmap); + DECL_AM(hashmap_node); + DECL_AM(flatmap); + if (is_flatmap(BIF_ARG_1)) { + BIF_RET(AM_flatmap); + } else if (is_hashmap(BIF_ARG_1)) { + Eterm hdr = *(boxed_val(BIF_ARG_1)); + ASSERT(is_header(hdr)); + switch (hdr & _HEADER_MAP_SUBTAG_MASK) { + case HAMT_SUBTAG_HEAD_ARRAY: + case HAMT_SUBTAG_HEAD_BITMAP: + BIF_RET(AM_hashmap); + case HAMT_SUBTAG_NODE_ARRAY: + case HAMT_SUBTAG_NODE_BITMAP: + BIF_RET(AM_hashmap_node); + default: + erl_exit(1, "bad header"); + } + } + BIF_ERROR(BIF_P, BADARG); +} + +/* + * erts_internal:map_hashmap_children/1 + * + * Used in erts_debug:size/1 + */ + +BIF_RETTYPE erts_internal_map_hashmap_children_1(BIF_ALIST_1) { + if (is_hashmap(BIF_ARG_1)) { + Eterm node = BIF_ARG_1; + Eterm *ptr, hdr, *hp, res = NIL; + Uint sz = 0; + ptr = boxed_val(node); + hdr = *ptr; + + ASSERT(is_header(hdr)); + + switch(hdr & _HEADER_MAP_SUBTAG_MASK) { + case HAMT_SUBTAG_NODE_ARRAY: + sz = 16; + ptr += 1; + break; + case HAMT_SUBTAG_NODE_BITMAP: + sz = hashmap_bitcount(MAP_HEADER_VAL(hdr)); + ptr += 1; + break; + case HAMT_SUBTAG_HEAD_BITMAP: + sz = hashmap_bitcount(MAP_HEADER_VAL(hdr)); + ptr += 2; + break; + case HAMT_SUBTAG_HEAD_ARRAY: + sz = 16; + ptr += 2; + break; + default: + erl_exit(1, "bad header\r\n"); + break; + } + ASSERT(sz < 17); + hp = HAlloc(BIF_P, 2*sz); + while(sz--) { res = CONS(hp, *ptr++, res); hp += 2; } + BIF_RET(res); + } + BIF_ERROR(BIF_P, BADARG); +} + + +static Eterm hashmap_info(Process *p, Eterm node) { + Eterm *hp; + Eterm res = NIL, info = NIL; + Eterm *ptr, tup, hdr; + Uint sz; + DECL_AM(depth); + DECL_AM(leafs); + DECL_AM(bitmaps); + DECL_AM(arrays); + Uint nleaf=0, nbitmap=0, narray=0; + Uint bitmap_usage[16], leaf_usage[16]; + Uint lvl = 0, clvl; + DECLARE_ESTACK(stack); + + for (sz = 0; sz < 16; sz++) { + bitmap_usage[sz] = 0; + leaf_usage[sz] = 0; + } + + ptr = boxed_val(node); + ESTACK_PUSH(stack, 0); + ESTACK_PUSH(stack, node); + do { + node = ESTACK_POP(stack); + clvl = ESTACK_POP(stack); + if (lvl < clvl) + lvl = clvl; + switch(primary_tag(node)) { + case TAG_PRIMARY_LIST: + nleaf++; + leaf_usage[clvl] += 1; + break; + case TAG_PRIMARY_BOXED: + ptr = boxed_val(node); + hdr = *ptr; + ASSERT(is_header(hdr)); + switch(hdr & _HEADER_MAP_SUBTAG_MASK) { + case HAMT_SUBTAG_NODE_ARRAY: + narray++; + sz = 16; + while(sz--) { + ESTACK_PUSH(stack, clvl + 1); + ESTACK_PUSH(stack, ptr[sz+1]); + } + break; + case HAMT_SUBTAG_NODE_BITMAP: + nbitmap++; + sz = hashmap_bitcount(MAP_HEADER_VAL(hdr)); + ASSERT(sz < 17); + bitmap_usage[sz-1] += 1; + while(sz--) { + ESTACK_PUSH(stack, clvl + 1); + ESTACK_PUSH(stack, ptr[sz+1]); + } + break; + case HAMT_SUBTAG_HEAD_BITMAP: + nbitmap++; + sz = hashmap_bitcount(MAP_HEADER_VAL(hdr)); + bitmap_usage[sz-1] += 1; + while(sz--) { + ESTACK_PUSH(stack, clvl + 1); + ESTACK_PUSH(stack, ptr[sz+2]); + } + break; + case HAMT_SUBTAG_HEAD_ARRAY: + narray++; + sz = 16; + while(sz--) { + ESTACK_PUSH(stack, clvl + 1); + ESTACK_PUSH(stack, ptr[sz+2]); + } + break; + default: + erl_exit(1, "bad header\r\n"); + break; + } + } + } while(!ESTACK_ISEMPTY(stack)); + + + /* size */ + sz = 0; + hashmap_bld_tuple_uint(NULL,&sz,16,leaf_usage); + hashmap_bld_tuple_uint(NULL,&sz,16,bitmap_usage); + + /* alloc */ + hp = HAlloc(p, 2+3 + 3*(2+4) + sz); + + info = hashmap_bld_tuple_uint(&hp,NULL,16,leaf_usage); + tup = TUPLE3(hp, AM_leafs, make_small(nleaf),info); hp += 4; + res = CONS(hp, tup, res); hp += 2; + + info = hashmap_bld_tuple_uint(&hp,NULL,16,bitmap_usage); + tup = TUPLE3(hp, AM_bitmaps, make_small(nbitmap), info); hp += 4; + res = CONS(hp, tup, res); hp += 2; + + tup = TUPLE3(hp, AM_arrays, make_small(narray),NIL); hp += 4; + res = CONS(hp, tup, res); hp += 2; + + tup = TUPLE2(hp, AM_depth, make_small(lvl)); hp += 3; + res = CONS(hp, tup, res); hp += 2; + + DESTROY_ESTACK(stack); + ERTS_HOLE_CHECK(p); + return res; +} + +static Eterm hashmap_bld_tuple_uint(Uint **hpp, Uint *szp, Uint n, Uint nums[]) { + Eterm res = THE_NON_VALUE; + Eterm *ts = (Eterm *)erts_alloc(ERTS_ALC_T_TMP, n * sizeof(Eterm)); + Uint i; + + for (i = 0; i < n; i++) { + ts[i] = erts_bld_uint(hpp, szp, nums[i]); + } + res = erts_bld_tuplev(hpp, szp, n, ts); + erts_free(ERTS_ALC_T_TMP, (void *) ts); + return res; +} + + +/* implementation of builtin emulations */ + +#if !ERTS_AT_LEAST_GCC_VSN__(3, 4, 0) +/* Count leading zeros emulation */ +Uint32 hashmap_clz(Uint32 x) { + Uint32 y; + int n = 32; + y = x >>16; if (y != 0) {n = n -16; x = y;} + y = x >> 8; if (y != 0) {n = n - 8; x = y;} + y = x >> 4; if (y != 0) {n = n - 4; x = y;} + y = x >> 2; if (y != 0) {n = n - 2; x = y;} + y = x >> 1; if (y != 0) return n - 2; + return n - x; +} + +const Uint32 SK5 = 0x55555555, SK3 = 0x33333333; +const Uint32 SKF0 = 0xF0F0F0F, SKFF = 0xFF00FF; + +/* CTPOP emulation */ +Uint32 hashmap_bitcount(Uint32 x) { + x -= ((x >> 1 ) & SK5); + x = (x & SK3 ) + ((x >> 2 ) & SK3 ); + x = (x & SKF0) + ((x >> 4 ) & SKF0); + x += x >> 8; + return (x + (x >> 16)) & 0x3F; +} +#endif diff --git a/erts/emulator/beam/erl_map.h b/erts/emulator/beam/erl_map.h index 2e02ca4677..1333a734a8 100644 --- a/erts/emulator/beam/erl_map.h +++ b/erts/emulator/beam/erl_map.h @@ -22,13 +22,23 @@ #define __ERL_MAP_H__ #include "sys.h" + +/* instrinsic wrappers */ +#if ERTS_AT_LEAST_GCC_VSN__(3, 4, 0) +#define hashmap_clz(x) ((Uint32) __builtin_clz((unsigned int)(x))) +#define hashmap_bitcount(x) ((Uint32) __builtin_popcount((unsigned int) (x))) +#else +Uint32 hashmap_clz(Uint32 x); +Uint32 hashmap_bitcount(Uint32 x); +#endif + /* MAP */ -typedef struct map_s { +typedef struct flatmap_s { Eterm thing_word; Uint size; Eterm keys; /* tuple */ -} map_t; +} flatmap_t; /* map node * * ----------- @@ -42,39 +52,152 @@ typedef struct map_s { * ----------- */ +/* the head-node is a bitmap or array with an untagged size */ + + +#define hashmap_size(x) (((hashmap_head_t*) hashmap_val(x))->size) +#define hashmap_size_rel(RTERM, BASE) hashmap_size(rterm2wterm(RTERM, BASE)) +#define hashmap_make_hash(Key) make_internal_hash(Key) + +#define hashmap_restore_hash(Heap,Lvl,Key) \ + (((Lvl) < 8) ? hashmap_make_hash(Key) >> (4*(Lvl)) : hashmap_make_hash(CONS(Heap, make_small((Lvl)>>3), (Key))) >> (4*((Lvl) & 7))) +#define hashmap_shift_hash(Heap,Hx,Lvl,Key) \ + (((++(Lvl)) & 7) ? (Hx) >> 4 : hashmap_make_hash(CONS(Heap, make_small((Lvl)>>3), Key))) /* erl_term.h stuff */ -#define make_map(x) make_boxed((Eterm*)(x)) -#define make_map_rel(x, BASE) make_boxed_rel((Eterm*)(x),(BASE)) -#define is_map(x) (is_boxed((x)) && is_map_header(*boxed_val((x)))) -#define is_map_rel(RTERM,BASE) is_map(rterm2wterm(RTERM,BASE)) -#define is_not_map(x) (!is_map((x))) -#define is_map_header(x) (((x) & (_TAG_HEADER_MASK)) == _TAG_HEADER_MAP) -#define header_is_map(x) ((((x) & (_HEADER_SUBTAG_MASK)) == MAP_SUBTAG)) -#define map_val(x) (_unchecked_boxed_val((x))) -#define map_val_rel(RTERM, BASE) map_val(rterm2wterm(RTERM, BASE)) - -#define map_get_values(x) (((Eterm *)(x)) + 3) -#define map_get_keys(x) (((Eterm *)tuple_val(((map_t *)(x))->keys)) + 1) -#define map_get_size(x) (((map_t*)(x))->size) +#define make_flatmap(x) make_boxed((Eterm*)(x)) +#define make_flatmap_rel(x, BASE) make_boxed_rel((Eterm*)(x),(BASE)) +#define is_flatmap(x) (is_boxed((x)) && is_flatmap_header(*boxed_val((x)))) +#define is_flatmap_rel(RTERM,BASE) is_flatmap(rterm2wterm(RTERM,BASE)) +#define is_not_flatmap(x) (!is_flatmap((x))) +#define is_flatmap_header(x) (((x) & (_TAG_HEADER_MASK)) == _TAG_HEADER_MAP) +#define header_is_flatmap(x) ((((x) & (_HEADER_SUBTAG_MASK)) == MAP_SUBTAG)) +#define flatmap_val(x) (_unchecked_boxed_val((x))) +#define flatmap_val_rel(RTERM, BASE) flatmap_val(rterm2wterm(RTERM, BASE)) + +#define flatmap_get_values(x) (((Eterm *)(x)) + 3) +#define flatmap_get_keys(x) (((Eterm *)tuple_val(((flatmap_t *)(x))->keys)) + 1) +#define flatmap_get_size(x) (((flatmap_t*)(x))->size) +#ifdef DEBUG +#define MAP_SMALL_MAP_LIMIT (3) +#else +#define MAP_SMALL_MAP_LIMIT (32) +#endif #define MAP_HEADER _make_header(1,_TAG_HEADER_MAP) -#define MAP_HEADER_SIZE (sizeof(map_t) / sizeof(Eterm)) +#define MAP_HEADER_SIZE (sizeof(flatmap_t) / sizeof(Eterm)) -Eterm erts_maps_put(Process *p, Eterm key, Eterm value, Eterm map); -int erts_maps_update(Process *p, Eterm key, Eterm value, Eterm map, Eterm *res); -int erts_maps_remove(Process *p, Eterm key, Eterm map, Eterm *res); -int erts_validate_and_sort_map(map_t* map); +struct ErtsWStack_; +struct ErtsEStack_; + +Eterm erts_maps_put(Process *p, Eterm key, Eterm value, Eterm map); +int erts_maps_update(Process *p, Eterm key, Eterm value, Eterm map, Eterm *res); +int erts_maps_remove(Process *p, Eterm key, Eterm map, Eterm *res); + +Eterm erts_hashmap_insert(Process *p, Uint32 hx, Eterm key, Eterm value, + Eterm node, int is_update); +int erts_hashmap_insert_down(Uint32 hx, Eterm key, Eterm node, Uint *sz, + Uint *upsz, struct ErtsEStack_ *sp, int is_update); +Eterm erts_hashmap_insert_up(Eterm *hp, Eterm key, Eterm value, + Uint *upsz, struct ErtsEStack_ *sp); + +int erts_validate_and_sort_flatmap(flatmap_t* map); +Uint hashmap_over_estimated_heap_size(Uint n); +void hashmap_iterator_init(struct ErtsWStack_* s, Eterm node, int reverse); +Eterm* hashmap_iterator_next(struct ErtsWStack_* s); +Eterm* hashmap_iterator_prev(struct ErtsWStack_* s); +int hashmap_key_hash_cmp(Eterm* ap, Eterm* bp); +Eterm erts_hashmap_from_array(ErtsHeapFactory*, Eterm *leafs, Uint n, int reject_dupkeys); + +#define erts_hashmap_from_ks_and_vs(P, KS, VS, N) \ + erts_hashmap_from_ks_and_vs_extra((P), (KS), (VS), (N), THE_NON_VALUE, THE_NON_VALUE); + +Eterm erts_hashmap_from_ks_and_vs_extra(Process *p, Eterm *ks, Eterm *vs, Uint n, + Eterm k, Eterm v); -#if HALFWORD_HEAP const Eterm * +#if HALFWORD_HEAP erts_maps_get_rel(Eterm key, Eterm map, Eterm *map_base); # define erts_maps_get(A, B) erts_maps_get_rel(A, B, NULL) #else -const Eterm * erts_maps_get(Eterm key, Eterm map); # define erts_maps_get_rel(A, B, B_BASE) erts_maps_get(A, B) #endif +const Eterm * +#if HALFWORD_HEAP +erts_hashmap_get_rel(Uint32 hx, Eterm key, Eterm node, Eterm *map_base); +# define erts_hashmap_get(Hx, K, M) erts_hashmap_get_rel(Hx, K, M, NULL) +#else +erts_hashmap_get(Uint32 hx, Eterm key, Eterm map); +# define erts_hashmap_get_rel(Hx, K, M, M_BASE) erts_hashmap_get(Hx, K, M) +#endif + +/* hamt nodes v2.0 + * + * node :: leaf | array | bitmap + * head + */ +typedef struct hashmap_head_s { + Eterm thing_word; + Uint size; + Eterm items[1]; +} hashmap_head_t; + +/* thing_word tagscheme + * Need two bits for map subtags + * + * Original HEADER representation: + * + * aaaaaaaaaaaaaaaa aaaaaaaaaatttt00 arity:26, tag:4 + * + * For maps we have: + * + * vvvvvvvvvvvvvvvv aaaaaaaamm111100 val:16, arity:8, mtype:2 + * + * unsure about trailing zeros + * + * map-tag: + * 00 - flat map tag (non-hamt) -> val:16 = #items + * 01 - map-node bitmap tag -> val:16 = bitmap + * 10 - map-head (array-node) -> val:16 = 0xffff + * 11 - map-head (bitmap-node) -> val:16 = bitmap + */ + +/* erl_map.h stuff */ + +#define is_hashmap_header_head(x) ((MAP_HEADER_TYPE(x) & (0x2))) + +#define MAKE_MAP_HEADER(Type,Arity,Val) \ + (_make_header(((((Uint16)(Val)) << MAP_HEADER_ARITY_SZ) | (Arity)) << MAP_HEADER_TAG_SZ | (Type) , _TAG_HEADER_HASHMAP)) + +#define MAP_HEADER_HAMT_HEAD_ARRAY \ + MAKE_MAP_HEADER(MAP_HEADER_TAG_HAMT_HEAD_ARRAY,0x1,0xffff) + +#define MAP_HEADER_HAMT_HEAD_BITMAP(Bmp) \ + MAKE_MAP_HEADER(MAP_HEADER_TAG_HAMT_HEAD_BITMAP,0x1,Bmp) + +#define MAP_HEADER_HAMT_NODE_ARRAY \ + make_arityval(16) + +#define MAP_HEADER_HAMT_NODE_BITMAP(Bmp) \ + MAKE_MAP_HEADER(MAP_HEADER_TAG_HAMT_NODE_BITMAP,0x0,Bmp) + +#define HAMT_HEAD_EMPTY_SZ (2) +#define HAMT_NODE_ARRAY_SZ (17) +#define HAMT_HEAD_ARRAY_SZ (18) +#define HAMT_NODE_BITMAP_SZ(n) (1 + n) +#define HAMT_HEAD_BITMAP_SZ(n) (2 + n) + +#define _HEADER_MAP_SUBTAG_MASK (0xfc) /* 2 bits maps tag + 4 bits subtag + 2 ignore bits */ +/* SUBTAG_NODE_ARRAY is in fact a tuple with 16 elements */ +#define HAMT_SUBTAG_NODE_ARRAY (((16 << _HEADER_ARITY_OFFS) | ARITYVAL_SUBTAG) & _HEADER_MAP_SUBTAG_MASK) +#define HAMT_SUBTAG_NODE_BITMAP ((MAP_HEADER_TAG_HAMT_NODE_BITMAP << _HEADER_ARITY_OFFS) | HASHMAP_SUBTAG) +#define HAMT_SUBTAG_HEAD_ARRAY ((MAP_HEADER_TAG_HAMT_HEAD_ARRAY << _HEADER_ARITY_OFFS) | HASHMAP_SUBTAG) +#define HAMT_SUBTAG_HEAD_BITMAP ((MAP_HEADER_TAG_HAMT_HEAD_BITMAP << _HEADER_ARITY_OFFS) | HASHMAP_SUBTAG) + +#define hashmap_index(hash) (((Uint32)hash) & 0xf) + + #endif diff --git a/erts/emulator/beam/erl_message.c b/erts/emulator/beam/erl_message.c index 8870fac7d9..43a03c793e 100644 --- a/erts/emulator/beam/erl_message.c +++ b/erts/emulator/beam/erl_message.c @@ -1146,3 +1146,15 @@ erts_deliver_exit_message(Eterm from, Process *to, ErtsProcLocks *to_locksp, } } +Eterm* erts_produce_heap(ErtsHeapFactory* factory, Uint need, Uint xtra) +{ + Eterm* res; + if (factory->p) { + res = HAllocX(factory->p, need, xtra); + } else { + res = factory->hp; + factory->hp += need; + } + return res; +} + diff --git a/erts/emulator/beam/erl_message.h b/erts/emulator/beam/erl_message.h index 0f3bb8d281..6b8c3cebc7 100644 --- a/erts/emulator/beam/erl_message.h +++ b/erts/emulator/beam/erl_message.h @@ -68,6 +68,21 @@ struct erl_heap_fragment { Eterm mem[1]; /* Data */ }; +typedef struct { + Process* p; + Eterm* hp; +} ErtsHeapFactory; + +Eterm* erts_produce_heap(ErtsHeapFactory*, Uint need, Uint xtra); +#ifdef CHECK_FOR_HOLES +# define ERTS_FACTORY_HOLE_CHECK(f) do { \ + if ((f)->p) erts_check_for_holes((f)->p); \ + } while (0) +#else +# define ERTS_FACTORY_HOLE_CHECK(p) +#endif + + typedef struct erl_mesg { struct erl_mesg* next; /* Next message */ union { diff --git a/erts/emulator/beam/erl_nif.c b/erts/emulator/beam/erl_nif.c index 198acfd128..e28365cb1b 100644 --- a/erts/emulator/beam/erl_nif.c +++ b/erts/emulator/beam/erl_nif.c @@ -1910,12 +1910,16 @@ int enif_is_map(ErlNifEnv* env, ERL_NIF_TERM term) int enif_get_map_size(ErlNifEnv* env, ERL_NIF_TERM term, size_t *size) { - if (is_map(term)) { - map_t *mp; - mp = (map_t*)map_val(term); - *size = map_get_size(mp); + if (is_flatmap(term)) { + flatmap_t *mp; + mp = (flatmap_t*)flatmap_val(term); + *size = flatmap_get_size(mp); return 1; } + else if (is_hashmap(term)) { + *size = hashmap_size(term); + return 1; + } return 0; } @@ -1923,16 +1927,16 @@ ERL_NIF_TERM enif_make_new_map(ErlNifEnv* env) { Eterm* hp = alloc_heap(env,MAP_HEADER_SIZE+1); Eterm tup; - map_t *mp; + flatmap_t *mp; tup = make_tuple(hp); *hp++ = make_arityval(0); - mp = (map_t*)hp; + mp = (flatmap_t*)hp; mp->thing_word = MAP_HEADER; mp->size = 0; mp->keys = tup; - return make_map(mp); + return make_flatmap(mp); } int enif_make_map_put(ErlNifEnv* env, @@ -1941,7 +1945,7 @@ int enif_make_map_put(ErlNifEnv* env, Eterm value, Eterm *map_out) { - if (is_not_map(map_in)) { + if (!is_map(map_in)) { return 0; } flush_env(env); @@ -1956,7 +1960,7 @@ int enif_get_map_value(ErlNifEnv* env, Eterm *value) { const Eterm *ret; - if (is_not_map(map)) { + if (!is_map(map)) { return 0; } ret = erts_maps_get(key, map); @@ -1974,7 +1978,7 @@ int enif_make_map_update(ErlNifEnv* env, Eterm *map_out) { int res; - if (is_not_map(map_in)) { + if (!is_map(map_in)) { return 0; } @@ -1990,7 +1994,7 @@ int enif_make_map_remove(ErlNifEnv* env, Eterm *map_out) { int res; - if (is_not_map(map_in)) { + if (!is_map(map_in)) { return 0; } flush_env(env); @@ -2004,13 +2008,13 @@ int enif_map_iterator_create(ErlNifEnv *env, ErlNifMapIterator *iter, ErlNifMapIteratorEntry entry) { - if (is_map(map)) { - map_t *mp = (map_t*)map_val(map); + if (is_flatmap(map)) { + flatmap_t *mp = (flatmap_t*)flatmap_val(map); size_t offset; switch (entry) { case ERL_NIF_MAP_ITERATOR_HEAD: offset = 0; break; - case ERL_NIF_MAP_ITERATOR_TAIL: offset = map_get_size(mp) - 1; break; + case ERL_NIF_MAP_ITERATOR_TAIL: offset = flatmap_get_size(mp) - 1; break; default: goto error; } @@ -2019,14 +2023,37 @@ int enif_map_iterator_create(ErlNifEnv *env, */ iter->map = map; - iter->ks = ((Eterm *)map_get_keys(mp)) + offset; - iter->vs = ((Eterm *)map_get_values(mp)) + offset; - iter->t_limit = map_get_size(mp) + 1; + iter->u.flat.ks = ((Eterm *)flatmap_get_keys(mp)) + offset; + iter->u.flat.vs = ((Eterm *)flatmap_get_values(mp)) + offset; + iter->size = flatmap_get_size(mp); iter->idx = offset + 1; return 1; } - + else if (is_hashmap(map)) { + iter->map = map; + iter->size = hashmap_size(map); + iter->u.hash.wstack = erts_alloc(ERTS_ALC_T_NIF, sizeof(ErtsDynamicWStack)); + WSTACK_INIT(iter->u.hash.wstack, ERTS_ALC_T_NIF); + + switch (entry) { + case ERL_NIF_MAP_ITERATOR_HEAD: + iter->idx = 1; + hashmap_iterator_init(&iter->u.hash.wstack->ws, map, 0); + iter->u.hash.kv = hashmap_iterator_next(&iter->u.hash.wstack->ws); + break; + case ERL_NIF_MAP_ITERATOR_TAIL: + iter->idx = hashmap_size(map); + hashmap_iterator_init(&iter->u.hash.wstack->ws, map, 1); + iter->u.hash.kv = hashmap_iterator_prev(&iter->u.hash.wstack->ws); + break; + default: + goto error; + } + ASSERT(!!iter->u.hash.kv == (iter->idx >= 1 && + iter->idx <= iter->size)); + return 1; + } error: #ifdef DEBUG iter->map = THE_NON_VALUE; @@ -2036,48 +2063,97 @@ error: void enif_map_iterator_destroy(ErlNifEnv *env, ErlNifMapIterator *iter) { - /* not used */ + if (is_hashmap(iter->map)) { + WSTACK_DESTROY(iter->u.hash.wstack->ws); + erts_free(ERTS_ALC_T_NIF, iter->u.hash.wstack); + } + else + ASSERT(is_flatmap(iter->map)); + #ifdef DEBUG iter->map = THE_NON_VALUE; #endif - } int enif_map_iterator_is_tail(ErlNifEnv *env, ErlNifMapIterator *iter) { - ASSERT(iter && is_map(iter->map)); - ASSERT(iter->idx >= 0 && (iter->idx <= map_get_size(map_val(iter->map)) + 1)); - return (iter->t_limit == 1 || iter->idx == iter->t_limit); + ASSERT(iter); + if (is_flatmap(iter->map)) { + ASSERT(iter->idx >= 0); + ASSERT(iter->idx <= flatmap_get_size(flatmap_val(iter->map)) + 1); + return (iter->size == 0 || iter->idx > iter->size); + } + else { + ASSERT(is_hashmap(iter->map)); + return iter->idx > iter->size; + } } int enif_map_iterator_is_head(ErlNifEnv *env, ErlNifMapIterator *iter) { - ASSERT(iter && is_map(iter->map)); - ASSERT(iter->idx >= 0 && (iter->idx <= map_get_size(map_val(iter->map)) + 1)); - return (iter->t_limit == 1 || iter->idx == 0); + ASSERT(iter); + if (is_flatmap(iter->map)) { + ASSERT(iter->idx >= 0); + ASSERT(iter->idx <= flatmap_get_size(flatmap_val(iter->map)) + 1); + return (iter->size == 0 || iter->idx == 0); + } + else { + ASSERT(is_hashmap(iter->map)); + return iter->idx == 0; + } } int enif_map_iterator_next(ErlNifEnv *env, ErlNifMapIterator *iter) { - ASSERT(iter && is_map(iter->map)); - if (iter->idx < iter->t_limit) { - iter->idx++; - iter->ks++; - iter->vs++; + ASSERT(iter); + if (is_flatmap(iter->map)) { + if (iter->idx <= iter->size) { + iter->idx++; + iter->u.flat.ks++; + iter->u.flat.vs++; + } + return (iter->idx <= iter->size); + } + else { + ASSERT(is_hashmap(iter->map)); + + if (iter->idx <= hashmap_size(iter->map)) { + if (iter->idx < 1) { + hashmap_iterator_init(&iter->u.hash.wstack->ws, iter->map, 0); + } + iter->u.hash.kv = hashmap_iterator_next(&iter->u.hash.wstack->ws); + iter->idx++; + ASSERT(!!iter->u.hash.kv == (iter->idx <= iter->size)); + } + return iter->idx <= iter->size; } - return (iter->idx != iter->t_limit); } int enif_map_iterator_prev(ErlNifEnv *env, ErlNifMapIterator *iter) { - ASSERT(iter && is_map(iter->map)); - if (iter->idx > 0) { - iter->idx--; - iter->ks--; - iter->vs--; + ASSERT(iter); + if (is_flatmap(iter->map)) { + if (iter->idx > 0) { + iter->idx--; + iter->u.flat.ks--; + iter->u.flat.vs--; + } + return iter->idx > 0; + } + else { + ASSERT(is_hashmap(iter->map)); + + if (iter->idx > 0) { + if (iter->idx > iter->size) { + hashmap_iterator_init(&iter->u.hash.wstack->ws, iter->map, 1); + } + iter->u.hash.kv = hashmap_iterator_prev(&iter->u.hash.wstack->ws); + iter->idx--; + ASSERT(!!iter->u.hash.kv == (iter->idx > 0)); + } + return iter->idx > 0; } - return (iter->idx > 0); } int enif_map_iterator_get_pair(ErlNifEnv *env, @@ -2085,15 +2161,25 @@ int enif_map_iterator_get_pair(ErlNifEnv *env, Eterm *key, Eterm *value) { - ASSERT(iter && is_map(iter->map)); - if (iter->idx > 0 && iter->idx < iter->t_limit) { - ASSERT(iter->ks >= map_get_keys(map_val(iter->map)) && - iter->ks < (map_get_keys(map_val(iter->map)) + map_get_size(map_val(iter->map)))); - ASSERT(iter->vs >= map_get_values(map_val(iter->map)) && - iter->vs < (map_get_values(map_val(iter->map)) + map_get_size(map_val(iter->map)))); - *key = *(iter->ks); - *value = *(iter->vs); - return 1; + ASSERT(iter); + if (is_flatmap(iter->map)) { + if (iter->idx > 0 && iter->idx <= iter->size) { + ASSERT(iter->u.flat.ks >= flatmap_get_keys(flatmap_val(iter->map)) && + iter->u.flat.ks < (flatmap_get_keys(flatmap_val(iter->map)) + flatmap_get_size(flatmap_val(iter->map)))); + ASSERT(iter->u.flat.vs >= flatmap_get_values(flatmap_val(iter->map)) && + iter->u.flat.vs < (flatmap_get_values(flatmap_val(iter->map)) + flatmap_get_size(flatmap_val(iter->map)))); + *key = *(iter->u.flat.ks); + *value = *(iter->u.flat.vs); + return 1; + } + } + else { + ASSERT(is_hashmap(iter->map)); + if (iter->idx > 0 && iter->idx <= iter->size) { + *key = CAR(iter->u.hash.kv); + *value = CDR(iter->u.hash.kv); + return 1; + } } return 0; } diff --git a/erts/emulator/beam/erl_nif.h b/erts/emulator/beam/erl_nif.h index 849024453c..9b2b90c82d 100644 --- a/erts/emulator/beam/erl_nif.h +++ b/erts/emulator/beam/erl_nif.h @@ -201,10 +201,18 @@ typedef enum typedef struct /* All fields all internal and may change */ { ERL_NIF_TERM map; - ERL_NIF_UINT t_limit; + ERL_NIF_UINT size; ERL_NIF_UINT idx; - ERL_NIF_TERM *ks; - ERL_NIF_TERM *vs; + union { + struct { + ERL_NIF_TERM *ks; + ERL_NIF_TERM *vs; + }flat; + struct { + struct ErtsDynamicWStack_* wstack; + ERL_NIF_TERM* kv; + }hash; + }u; void* __spare__[2]; /* for future additions to be ABI compatible (same struct size) */ } ErlNifMapIterator; diff --git a/erts/emulator/beam/erl_printf_term.c b/erts/emulator/beam/erl_printf_term.c index c982dc2080..ac5b139f8d 100644 --- a/erts/emulator/beam/erl_printf_term.c +++ b/erts/emulator/beam/erl_printf_term.c @@ -247,6 +247,17 @@ static int print_atom_name(fmtfn_t fn, void* arg, Eterm atom, long *dcount) #define PRT_PATCH_FUN_SIZE ((Eterm) 7) #define PRT_LAST_ARRAY_ELEMENT ((Eterm) 8) /* Note! Must be last... */ +#if 0 +static char *format_binary(Uint16 x, char *b) { + int z; + b[16] = '\0'; + for (z = 0; z < 16; z++) { + b[15-z] = ((x>>z) & 0x1) ? '1' : '0'; + } + return b; +} +#endif + static int print_term(fmtfn_t fn, void* arg, Eterm obj, long *dcount, Eterm* obj_base) /* ignored if !HALFWORD_HEAP */ @@ -557,10 +568,10 @@ print_term(fmtfn_t fn, void* arg, Eterm obj, long *dcount, { Uint n; Eterm *ks, *vs; - map_t *mp = (map_t *)map_val(wobj); - n = map_get_size(mp); - ks = map_get_keys(mp); - vs = map_get_values(mp); + flatmap_t *mp = (flatmap_t *)flatmap_val(wobj); + n = flatmap_get_size(mp); + ks = flatmap_get_keys(mp); + vs = flatmap_get_values(mp); PRINT_CHAR(res, fn, arg, '#'); PRINT_CHAR(res, fn, arg, '{'); @@ -575,6 +586,55 @@ print_term(fmtfn_t fn, void* arg, Eterm obj, long *dcount, } } break; + case HASHMAP_DEF: + { + Uint n,mapval; + Eterm *head; + head = hashmap_val(wobj); + mapval = MAP_HEADER_VAL(*head); + switch (MAP_HEADER_TYPE(*head)) { + case MAP_HEADER_TAG_HAMT_HEAD_ARRAY: + case MAP_HEADER_TAG_HAMT_HEAD_BITMAP: + PRINT_STRING(res, fn, arg, "#<"); + PRINT_UWORD(res, fn, arg, 'x', 0, 1, mapval); + PRINT_STRING(res, fn, arg, ">{"); + WSTACK_PUSH(s,PRT_CLOSE_TUPLE); + n = hashmap_bitcount(mapval); + ASSERT(n < 17); + head += 2; + if (n > 0) { + n--; + WSTACK_PUSH(s, head[n]); + WSTACK_PUSH(s, PRT_TERM); + while (n--) { + WSTACK_PUSH(s, PRT_COMMA); + WSTACK_PUSH(s, head[n]); + WSTACK_PUSH(s, PRT_TERM); + } + } + break; + case MAP_HEADER_TAG_HAMT_NODE_BITMAP: + n = hashmap_bitcount(mapval); + head++; + PRINT_CHAR(res, fn, arg, '<'); + PRINT_UWORD(res, fn, arg, 'x', 0, 1, mapval); + PRINT_STRING(res, fn, arg, ">{"); + WSTACK_PUSH(s,PRT_CLOSE_TUPLE); + ASSERT(n < 17); + if (n > 0) { + n--; + WSTACK_PUSH(s, head[n]); + WSTACK_PUSH(s, PRT_TERM); + while (n--) { + WSTACK_PUSH(s, PRT_COMMA); + WSTACK_PUSH(s, head[n]); + WSTACK_PUSH(s, PRT_TERM); + } + } + break; + } + } + break; default: PRINT_STRING(res, fn, arg, "<unknown:"); PRINT_POINTER(res, fn, arg, wobj); @@ -584,11 +644,11 @@ print_term(fmtfn_t fn, void* arg, Eterm obj, long *dcount, } L_done: - DESTROY_WSTACK(s); return res; } + int erts_printf_term(fmtfn_t fn, void* arg, ErlPfEterm term, long precision, ErlPfEterm* term_base) diff --git a/erts/emulator/beam/erl_term.c b/erts/emulator/beam/erl_term.c index 28cbe7004f..d6fb88ea61 100644 --- a/erts/emulator/beam/erl_term.c +++ b/erts/emulator/beam/erl_term.c @@ -86,11 +86,13 @@ unsigned tag_val_def(Wterm x) case (_TAG_HEADER_EXTERNAL_PID >> _TAG_PRIMARY_SIZE): return EXTERNAL_PID_DEF; case (_TAG_HEADER_EXTERNAL_PORT >> _TAG_PRIMARY_SIZE): return EXTERNAL_PORT_DEF; case (_TAG_HEADER_EXTERNAL_REF >> _TAG_PRIMARY_SIZE): return EXTERNAL_REF_DEF; + case (_TAG_HEADER_MAP >> _TAG_PRIMARY_SIZE): return MAP_DEF; case (_TAG_HEADER_REFC_BIN >> _TAG_PRIMARY_SIZE): return BINARY_DEF; case (_TAG_HEADER_HEAP_BIN >> _TAG_PRIMARY_SIZE): return BINARY_DEF; case (_TAG_HEADER_SUB_BIN >> _TAG_PRIMARY_SIZE): return BINARY_DEF; - case (_TAG_HEADER_MAP >> _TAG_PRIMARY_SIZE): return MAP_DEF; + case (_TAG_HEADER_HASHMAP >> _TAG_PRIMARY_SIZE): return HASHMAP_DEF; } + break; } case TAG_PRIMARY_IMMED1: { diff --git a/erts/emulator/beam/erl_term.h b/erts/emulator/beam/erl_term.h index 37014ccf94..1625a4ec15 100644 --- a/erts/emulator/beam/erl_term.h +++ b/erts/emulator/beam/erl_term.h @@ -141,6 +141,7 @@ struct erl_node_; /* Declared in erl_node_tables.h */ #define HEAP_BINARY_SUBTAG (0x9 << _TAG_PRIMARY_SIZE) /* BINARY */ #define SUB_BINARY_SUBTAG (0xA << _TAG_PRIMARY_SIZE) /* BINARY */ /* _BINARY_XXX_MASK depends on 0xB being unused */ +#define HASHMAP_SUBTAG (0xB << _TAG_PRIMARY_SIZE) /* HASHMAP */ #define EXTERNAL_PID_SUBTAG (0xC << _TAG_PRIMARY_SIZE) /* EXTERNAL_PID */ #define EXTERNAL_PORT_SUBTAG (0xD << _TAG_PRIMARY_SIZE) /* EXTERNAL_PORT */ #define EXTERNAL_REF_SUBTAG (0xE << _TAG_PRIMARY_SIZE) /* EXTERNAL_REF */ @@ -162,6 +163,7 @@ struct erl_node_; /* Declared in erl_node_tables.h */ #define _TAG_HEADER_EXTERNAL_REF (TAG_PRIMARY_HEADER|EXTERNAL_REF_SUBTAG) #define _TAG_HEADER_BIN_MATCHSTATE (TAG_PRIMARY_HEADER|BIN_MATCHSTATE_SUBTAG) #define _TAG_HEADER_MAP (TAG_PRIMARY_HEADER|MAP_SUBTAG) +#define _TAG_HEADER_HASHMAP (TAG_PRIMARY_HEADER|HASHMAP_SUBTAG) #define _TAG_HEADER_MASK 0x3F @@ -296,9 +298,11 @@ _ET_DECLARE_CHECKED(Uint,atom_val,Eterm) #define atom_val(x) _ET_APPLY(atom_val,(x)) /* header (arityval or thing) access methods */ -#define _make_header(sz,tag) ((Uint)(((sz) << _HEADER_ARITY_OFFS) + (tag))) +#define _make_header(sz,tag) ((Uint)(((Uint)(sz) << _HEADER_ARITY_OFFS) + (tag))) #define is_header(x) (((x) & _TAG_PRIMARY_MASK) == TAG_PRIMARY_HEADER) -#define _unchecked_header_arity(x) ((x) >> _HEADER_ARITY_OFFS) +//#define _unchecked_header_arity(x) ((x) >> _HEADER_ARITY_OFFS) +#define _unchecked_header_arity(x) \ + (is_hashmap_header(x) ? MAP_HEADER_ARITY(x) : ((x) >> _HEADER_ARITY_OFFS)) _ET_DECLARE_CHECKED(Uint,header_arity,Eterm) #define header_arity(x) _ET_APPLY(header_arity,(x)) @@ -361,6 +365,7 @@ _ET_DECLARE_CHECKED(Uint,thing_subtag,Eterm) ((((x) & (_TAG_HEADER_MASK)) == _TAG_HEADER_REFC_BIN) || \ (((x) & (_TAG_HEADER_MASK)) == _TAG_HEADER_HEAP_BIN) || \ (((x) & (_TAG_HEADER_MASK)) == _TAG_HEADER_SUB_BIN)) + #define make_binary(x) make_boxed((Eterm*)(x)) #define is_binary(x) (is_boxed((x)) && is_binary_header(*boxed_val((x)))) #define is_not_binary(x) (!is_binary((x))) @@ -990,6 +995,33 @@ _ET_DECLARE_CHECKED(Uint32*,external_ref_data,Wterm) _ET_DECLARE_CHECKED(struct erl_node_*,external_ref_node,Eterm) #define external_ref_node(x) _ET_APPLY(external_ref_node,(x)) +/* maps */ + +#define MAP_HEADER_TAG_SZ (2) +#define MAP_HEADER_ARITY_SZ (8) +#define MAP_HEADER_VAL_SZ (16) + +#define MAP_HEADER_TAG_FLAT (0x0) +#define MAP_HEADER_TAG_HAMT_NODE_BITMAP (0x1) +#define MAP_HEADER_TAG_HAMT_HEAD_ARRAY (0x2) +#define MAP_HEADER_TAG_HAMT_HEAD_BITMAP (0x3) + +#define MAP_HEADER_TYPE(Hdr) (((Hdr) >> (_HEADER_ARITY_OFFS)) & (0x3)) +#define MAP_HEADER_ARITY(Hdr) (((Hdr) >> (_HEADER_ARITY_OFFS + MAP_HEADER_TAG_SZ)) & (0xff)) +#define MAP_HEADER_VAL(Hdr) (((Hdr) >> (_HEADER_ARITY_OFFS + MAP_HEADER_TAG_SZ + MAP_HEADER_ARITY_SZ)) & (0xffff)) + +#define make_hashmap(x) make_boxed((Eterm*)(x)) +#define make_hashmap_rel make_boxed_rel +#define is_hashmap(x) (is_boxed((x)) && is_hashmap_header(*boxed_val((x)))) +#define is_not_hashmap(x) (!is_hashmap(x)) +#define is_hashmap_rel(RTERM,BASE) is_hashmap(rterm2wterm(RTERM,BASE)) +#define is_hashmap_header(x) (((x) & (_TAG_HEADER_MASK)) == _TAG_HEADER_HASHMAP) +#define hashmap_val(x) _unchecked_boxed_val((x)) +#define hashmap_val_rel(RTERM, BASE) hashmap_val(rterm2wterm(RTERM, BASE)) + +#define is_map(x) (is_flatmap(x) || is_hashmap(x)) +#define is_map_rel(x,BASE) (is_flatmap_rel(x,BASE) || is_hashmap_rel(x,BASE)) + /* number tests */ #define is_integer(x) (is_small(x) || is_big(x)) @@ -1081,20 +1113,23 @@ _ET_DECLARE_CHECKED(Uint,y_reg_index,Uint) #define BINARY_DEF 0x0 #define LIST_DEF 0x1 #define NIL_DEF 0x2 -#define MAP_DEF 0x3 -#define TUPLE_DEF 0x4 -#define PID_DEF 0x5 -#define EXTERNAL_PID_DEF 0x6 -#define PORT_DEF 0x7 -#define EXTERNAL_PORT_DEF 0x8 -#define EXPORT_DEF 0x9 -#define FUN_DEF 0xa -#define REF_DEF 0xb -#define EXTERNAL_REF_DEF 0xc -#define ATOM_DEF 0xd -#define FLOAT_DEF 0xe -#define BIG_DEF 0xf -#define SMALL_DEF 0x10 +#define HASHMAP_DEF 0x3 +#define MAP_DEF 0x4 +#define TUPLE_DEF 0x5 +#define PID_DEF 0x6 +#define EXTERNAL_PID_DEF 0x7 +#define PORT_DEF 0x8 +#define EXTERNAL_PORT_DEF 0x9 +#define EXPORT_DEF 0xa +#define FUN_DEF 0xb +#define REF_DEF 0xc +#define EXTERNAL_REF_DEF 0xd +#define ATOM_DEF 0xe +#define FLOAT_DEF 0xf +#define BIG_DEF 0x10 +#define SMALL_DEF 0x11 + +#define FIRST_VACANT_TAG_DEF 0x12 #if ET_DEBUG extern unsigned tag_val_def_debug(Wterm, const char*, unsigned); diff --git a/erts/emulator/beam/erl_utils.h b/erts/emulator/beam/erl_utils.h index c32f8fd61c..7cb8972e29 100644 --- a/erts/emulator/beam/erl_utils.h +++ b/erts/emulator/beam/erl_utils.h @@ -113,12 +113,14 @@ void erts_silence_warn_unused_result(long unused); int erts_fit_in_bits_int64(Sint64); int erts_fit_in_bits_int32(Sint32); +int erts_fit_in_bits_uint(Uint); int erts_list_length(Eterm); int erts_is_builtin(Eterm, Eterm, int); Uint32 make_broken_hash(Eterm); Uint32 block_hash(byte *, unsigned, Uint32); Uint32 make_hash2(Eterm); Uint32 make_hash(Eterm); +Uint32 make_internal_hash(Eterm); void erts_save_emu_args(int argc, char **argv); Eterm erts_get_emu_args(struct process *c_p); diff --git a/erts/emulator/beam/erl_vm.h b/erts/emulator/beam/erl_vm.h index 6e9216bef3..3a9fb1e07b 100644 --- a/erts/emulator/beam/erl_vm.h +++ b/erts/emulator/beam/erl_vm.h @@ -117,9 +117,9 @@ #if defined(DEBUG) || defined(CHECK_FOR_HOLES) #if HALFWORD_HEAP -# define ERTS_HOLE_MARKER (0xaf5e78ccU) +# define ERTS_HOLE_MARKER (0xdeadbeef) #else -# define ERTS_HOLE_MARKER (((0xaf5e78ccUL << 24) << 8) | 0xaf5e78ccUL) +# define ERTS_HOLE_MARKER (((0xdeadbeef << 24) << 8) | 0xdeadbeef) #endif #endif diff --git a/erts/emulator/beam/external.c b/erts/emulator/beam/external.c index e5fb2d3ec1..9a5ef56c47 100644 --- a/erts/emulator/beam/external.c +++ b/erts/emulator/beam/external.c @@ -1182,7 +1182,8 @@ typedef struct { Eterm* hp_end; int remaining_n; char* remaining_bytes; - Eterm* maps_head; + Eterm* maps_list; + struct dec_term_hamt_placeholder* hamt_list; } B2TDecodeContext; typedef struct { @@ -1508,7 +1509,8 @@ static BIF_RETTYPE binary_to_term_int(Process* p, Uint32 flags, Eterm bin, Binar ctx->u.dc.hp_start = HAlloc(p, ctx->heap_size); ctx->u.dc.hp = ctx->u.dc.hp_start; ctx->u.dc.hp_end = ctx->u.dc.hp_start + ctx->heap_size; - ctx->u.dc.maps_head = NULL; + ctx->u.dc.maps_list = NULL; + ctx->u.dc.hamt_list = NULL; ctx->state = B2TDecode; /*fall through*/ case B2TDecode: @@ -2304,7 +2306,8 @@ dec_pid(ErtsDistExternal *edep, Eterm** hpp, byte* ep, ErlOffHeap* off_heap, Ete #define ENC_PATCH_FUN_SIZE ((Eterm) 2) #define ENC_BIN_COPY ((Eterm) 3) #define ENC_MAP_PAIR ((Eterm) 4) -#define ENC_LAST_ARRAY_ELEMENT ((Eterm) 5) +#define ENC_HASHMAP_NODE ((Eterm) 5) +#define ENC_LAST_ARRAY_ELEMENT ((Eterm) 6) static byte* enc_term(ErtsAtomCacheMap *acmp, Eterm obj, byte* ep, Uint32 dflags, @@ -2413,6 +2416,13 @@ enc_term_int(TTBEncodeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj, byte* ep, WSTACK_PUSH2(s, ENC_TERM, *vptr); break; } + case ENC_HASHMAP_NODE: + if (is_list(obj)) { /* leaf node [K|V] */ + ptr = list_val(obj); + WSTACK_PUSH2(s, ENC_TERM, CDR(ptr)); + obj = CAR(ptr); + } + break; case ENC_LAST_ARRAY_ELEMENT: /* obj is the tuple */ { @@ -2595,15 +2605,15 @@ enc_term_int(TTBEncodeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj, byte* ep, case MAP_DEF: { - map_t *mp = (map_t*)map_val(obj); - Uint size = map_get_size(mp); + flatmap_t *mp = (flatmap_t*)flatmap_val(obj); + Uint size = flatmap_get_size(mp); *ep++ = MAP_EXT; put_int32(size, ep); ep += 4; if (size > 0) { - Eterm *kptr = map_get_keys(mp); - Eterm *vptr = map_get_values(mp); + Eterm *kptr = flatmap_get_keys(mp); + Eterm *vptr = flatmap_get_values(mp); WSTACK_PUSH4(s, (UWord)kptr, (UWord)vptr, ENC_MAP_PAIR, size); @@ -2611,6 +2621,44 @@ enc_term_int(TTBEncodeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj, byte* ep, } break; + case HASHMAP_DEF: + { + Eterm hdr; + Uint node_sz; + ptr = boxed_val(obj); + hdr = *ptr; + ASSERT(is_header(hdr)); + switch(hdr & _HEADER_MAP_SUBTAG_MASK) { + case HAMT_SUBTAG_HEAD_ARRAY: + *ep++ = MAP_EXT; + ptr++; + put_int32(*ptr, ep); ep += 4; + /*fall through*/ + case HAMT_SUBTAG_NODE_ARRAY: + node_sz = 16; + break; + case HAMT_SUBTAG_HEAD_BITMAP: + *ep++ = MAP_EXT; + ptr++; + put_int32(*ptr, ep); ep += 4; + /*fall through*/ + case HAMT_SUBTAG_NODE_BITMAP: + node_sz = hashmap_bitcount(MAP_HEADER_VAL(hdr)); + ASSERT(node_sz < 17); + break; + default: + erl_exit(1, "bad header\r\n"); + } + + ptr++; + WSTACK_RESERVE(s, node_sz*2); + while(node_sz--) { + WSTACK_FAST_PUSH(s, ENC_HASHMAP_NODE); + WSTACK_FAST_PUSH(s, *ptr++); + } + } + break; + case FLOAT_DEF: GET_DOUBLE(obj, f); if (dflags & DFLAG_NEW_FLOATS) { @@ -2892,9 +2940,19 @@ undo_offheap_in_area(ErlOffHeap* off_heap, Eterm* start, Eterm* end) #endif /* DEBUG */ } +struct dec_term_hamt_placeholder +{ + struct dec_term_hamt_placeholder* next; + Eterm* objp; /* write result here */ + Uint size; /* nr of leafs */ + Eterm leafs[1]; +}; + +#define DEC_TERM_HAMT_PLACEHOLDER_SIZE \ + (offsetof(struct dec_term_hamt_placeholder, leafs) / sizeof(Eterm)) /* Decode term from external format into *objp. -** On failure return NULL and (R13B04) *hpp will be unchanged. +** On failure return NULL and *hpp will be unchanged. */ static byte* dec_term(ErtsDistExternal *edep, Eterm** hpp, byte* ep, ErlOffHeap* off_heap, @@ -2904,7 +2962,8 @@ dec_term(ErtsDistExternal *edep, Eterm** hpp, byte* ep, ErlOffHeap* off_heap, int n; ErtsAtomEncoding char_enc; register Eterm* hp; /* Please don't take the address of hp */ - Eterm *maps_head; /* for validation of maps */ + Eterm *maps_list; /* for preprocessing of small maps */ + struct dec_term_hamt_placeholder* hamt_list; /* for preprocessing of big maps */ Eterm* next; SWord reds; @@ -2914,7 +2973,8 @@ dec_term(ErtsDistExternal *edep, Eterm** hpp, byte* ep, ErlOffHeap* off_heap, next = ctx->u.dc.next; ep = ctx->u.dc.ep; hpp = &ctx->u.dc.hp; - maps_head = ctx->u.dc.maps_head; + maps_list = ctx->u.dc.maps_list; + hamt_list = ctx->u.dc.hamt_list; if (ctx->state != B2TDecode) { int n_limit = reds; @@ -2995,7 +3055,8 @@ dec_term(ErtsDistExternal *edep, Eterm** hpp, byte* ep, ErlOffHeap* off_heap, reds = ERTS_SWORD_MAX; next = objp; *next = (Eterm) (UWord) NULL; - maps_head = NULL; + maps_list = NULL; + hamt_list = NULL; } hp = *hpp; @@ -3531,46 +3592,67 @@ dec_term_atom_common: break; case MAP_EXT: { - map_t *mp; Uint32 size,n; Eterm *kptr,*vptr; Eterm keys; size = get_int32(ep); ep += 4; - keys = make_tuple(hp); - *hp++ = make_arityval(size); - hp += size; - kptr = hp - 1; - - mp = (map_t*)hp; - hp += MAP_HEADER_SIZE; - hp += size; - vptr = hp - 1; - - /* kptr, last word for keys - * vptr, last word for values - */ - - /* - * Use thing_word to link through decoded maps. - * The list of maps is for later validation. - */ - - mp->thing_word = (Eterm) COMPRESS_POINTER(maps_head); - maps_head = (Eterm *) mp; - - mp->size = size; - mp->keys = keys; - *objp = make_map(mp); - - for (n = size; n; n--) { - *vptr = (Eterm) COMPRESS_POINTER(next); - *kptr = (Eterm) COMPRESS_POINTER(vptr); - next = kptr; - vptr--; - kptr--; - } + if (size <= MAP_SMALL_MAP_LIMIT) { + flatmap_t *mp; + + keys = make_tuple(hp); + *hp++ = make_arityval(size); + hp += size; + kptr = hp - 1; + + mp = (flatmap_t*)hp; + hp += MAP_HEADER_SIZE; + hp += size; + vptr = hp - 1; + + /* kptr, last word for keys + * vptr, last word for values + */ + + /* + * Use thing_word to link through decoded maps. + * The list of maps is for later validation. + */ + + mp->thing_word = (Eterm) COMPRESS_POINTER(maps_list); + maps_list = (Eterm *) mp; + + mp->size = size; + mp->keys = keys; + *objp = make_flatmap(mp); + + for (n = size; n; n--) { + *vptr = (Eterm) COMPRESS_POINTER(next); + *kptr = (Eterm) COMPRESS_POINTER(vptr); + next = kptr; + vptr--; + kptr--; + } + } + else { /* Make hamt */ + struct dec_term_hamt_placeholder* holder = + (struct dec_term_hamt_placeholder*) hp; + + holder->next = hamt_list; + hamt_list = holder; + holder->objp = objp; + holder->size = size; + + hp += DEC_TERM_HAMT_PLACEHOLDER_SIZE; + + for (n = size; n; n--) { + CDR(hp) = (Eterm) COMPRESS_POINTER(next); + CAR(hp) = (Eterm) COMPRESS_POINTER(&CDR(hp)); + next = &CAR(hp); + hp += 2; + } + } } break; case NEW_FUN_EXT: @@ -3791,7 +3873,7 @@ dec_term_atom_common: ctx->u.dc.ep = ep; ctx->u.dc.next = next; ctx->u.dc.hp = hp; - ctx->u.dc.maps_head = maps_head; + ctx->u.dc.maps_list = maps_list; ctx->reds = 0; return NULL; } @@ -3806,12 +3888,40 @@ dec_term_atom_common: * - done here for when we know it is complete. */ - while (maps_head) { - next = (Eterm *)(EXPAND_POINTER(*maps_head)); - *maps_head = MAP_HEADER; - if (!erts_validate_and_sort_map((map_t*)maps_head)) + while (maps_list) { + next = (Eterm *)(EXPAND_POINTER(*maps_list)); + *maps_list = MAP_HEADER; + if (!erts_validate_and_sort_flatmap((flatmap_t*)maps_list)) goto error; - maps_head = next; + maps_list = next; + } + + /* Iterate through all the hamts and build tree nodes. + */ + if (hamt_list) { + struct dec_term_hamt_placeholder* hamt = hamt_list; + ErtsHeapFactory factory; + + factory.p = NULL; + factory.hp = hp; + /* We assume heap will suffice (see hashmap_over_estimated_heap_size) */ + + do { + *hamt->objp = erts_hashmap_from_array(&factory, + hamt->leafs, + hamt->size, + 1); + if (is_non_value(*hamt->objp)) + goto error; + + hamt_list = hamt->next; + + /* Yes, we waste a couple of heap words per hamt + for the temporary placeholder */ + *(Eterm*)hamt = make_pos_bignum_header(DEC_TERM_HAMT_PLACEHOLDER_SIZE-1); + } while (hamt_list); + + hp = factory.hp; } if (ctx) { @@ -4009,15 +4119,15 @@ encode_size_struct_int(TTBSizeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj, break; case MAP_DEF: { - map_t *mp = (map_t*)map_val(obj); - Uint size = map_get_size(mp); + flatmap_t *mp = (flatmap_t*)flatmap_val(obj); + Uint size = flatmap_get_size(mp); Uint i; Eterm *ptr; result += 1 + 4; /* tag + 4 bytes size */ /* push values first */ - ptr = map_get_values(mp); + ptr = flatmap_get_values(mp); i = size; while(i--) { if (is_list(*ptr)) { @@ -4031,7 +4141,7 @@ encode_size_struct_int(TTBSizeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj, ++ptr; } - ptr = map_get_keys(mp); + ptr = flatmap_get_keys(mp); i = size; while(i--) { if (is_list(*ptr)) { @@ -4047,6 +4157,38 @@ encode_size_struct_int(TTBSizeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj, goto outer_loop; } break; + + case HASHMAP_DEF: + { + Eterm *ptr; + Eterm hdr; + Uint node_sz; + ptr = boxed_val(obj); + hdr = *ptr; + ASSERT(is_header(hdr)); + switch(hdr & _HEADER_MAP_SUBTAG_MASK) { + case HAMT_SUBTAG_HEAD_ARRAY: ptr++; + case HAMT_SUBTAG_NODE_ARRAY: + node_sz = 16; + break; + case HAMT_SUBTAG_HEAD_BITMAP: ptr++; + case HAMT_SUBTAG_NODE_BITMAP: + node_sz = hashmap_bitcount(MAP_HEADER_VAL(hdr)); + ASSERT(node_sz < 17); + break; + default: + erl_exit(1, "bad header\r\n"); + } + + ptr++; + ESTACK_RESERVE(s, node_sz); + while(node_sz--) { + ESTACK_FAST_PUSH(s, *ptr++); + } + result += 1 + 4; /* tag + 4 bytes size */ + } + + break; case FLOAT_DEF: if (dflags & DFLAG_NEW_FLOATS) { result += 9; @@ -4149,6 +4291,8 @@ encode_size_struct_int(TTBSizeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj, return 0; } + + static Sint decoded_size(byte *ep, byte* endp, int internal_tags, B2TContext* ctx) { @@ -4342,7 +4486,11 @@ init_done: n = get_int32(ep); ep += 4; ADDTERMS(2*n); - heap_size += 3 + n + 1 + n; + if (n <= MAP_SMALL_MAP_LIMIT) { + heap_size += 3 + n + 1 + n; + } else { + heap_size += hashmap_over_estimated_heap_size(n); + } break; case STRING_EXT: CHKSIZE(2); diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h index 5330f389e0..42daa2c9ef 100644 --- a/erts/emulator/beam/global.h +++ b/erts/emulator/beam/global.h @@ -372,16 +372,17 @@ extern int stackdump_on_exit; * DESTROY_ESTACK(Stack) */ -typedef struct { +typedef struct ErtsEStack_ { Eterm* start; Eterm* sp; Eterm* end; + Eterm* edefault; ErtsAlcType_t alloc_type; }ErtsEStack; #define DEF_ESTACK_SIZE (16) -void erl_grow_estack(ErtsEStack*, Eterm* def_stack); +void erl_grow_estack(ErtsEStack*, Uint need); #define ESTK_CONCAT(a,b) a##b #define ESTK_DEF_STACK(s) ESTK_CONCAT(s,_default_estack) @@ -391,22 +392,23 @@ void erl_grow_estack(ErtsEStack*, Eterm* def_stack); ESTK_DEF_STACK(s), /* start */ \ ESTK_DEF_STACK(s), /* sp */ \ ESTK_DEF_STACK(s) + DEF_ESTACK_SIZE, /* end */ \ + ESTK_DEF_STACK(s), /* default */ \ ERTS_ALC_T_ESTACK /* alloc_type */ \ } #define ESTACK_CHANGE_ALLOCATOR(s,t) \ do { \ - if (s.start != ESTK_DEF_STACK(s)) { \ + if ((s).start != ESTK_DEF_STACK(s)) { \ erl_exit(1, "Internal error - trying to change allocator " \ "type of active estack\n"); \ } \ - s.alloc_type = (t); \ + (s).alloc_type = (t); \ } while (0) #define DESTROY_ESTACK(s) \ do { \ - if (s.start != ESTK_DEF_STACK(s)) { \ - erts_free(s.alloc_type, s.start); \ + if ((s).start != ESTK_DEF_STACK(s)) { \ + erts_free((s).alloc_type, (s).start); \ } \ } while(0) @@ -417,16 +419,17 @@ do { \ */ #define ESTACK_SAVE(s,dst)\ do {\ - if (s.start == ESTK_DEF_STACK(s)) {\ + if ((s).start == ESTK_DEF_STACK(s)) {\ UWord _wsz = ESTACK_COUNT(s);\ - (dst)->start = erts_alloc(s.alloc_type,\ + (dst)->start = erts_alloc((s).alloc_type,\ DEF_ESTACK_SIZE * sizeof(Eterm));\ - memcpy((dst)->start, s.start,_wsz*sizeof(Eterm));\ + memcpy((dst)->start, (s).start,_wsz*sizeof(Eterm));\ (dst)->sp = (dst)->start + _wsz;\ (dst)->end = (dst)->start + DEF_ESTACK_SIZE;\ - (dst)->alloc_type = s.alloc_type;\ + (dst)->edefault = NULL;\ + (dst)->alloc_type = (s).alloc_type;\ } else\ - *(dst) = s;\ + *(dst) = (s);\ } while (0) #define DESTROY_SAVED_ESTACK(estack)\ @@ -445,83 +448,114 @@ do {\ */ #define ESTACK_RESTORE(s, src) \ do { \ - ASSERT(s.start == ESTK_DEF_STACK(s)); \ - s = *(src); /* struct copy */ \ + ASSERT((s).start == ESTK_DEF_STACK(s)); \ + (s) = *(src); /* struct copy */ \ (src)->start = NULL; \ - ASSERT(s.sp >= s.start); \ - ASSERT(s.sp <= s.end); \ + ASSERT((s).sp >= (s).start); \ + ASSERT((s).sp <= (s).end); \ } while (0) -#define ESTACK_IS_STATIC(s) (s.start == ESTK_DEF_STACK(s))) +#define ESTACK_IS_STATIC(s) ((s).start == ESTK_DEF_STACK(s)) -#define ESTACK_PUSH(s, x) \ -do { \ - if (s.sp == s.end) { \ - erl_grow_estack(&s, ESTK_DEF_STACK(s)); \ - } \ - *s.sp++ = (x); \ +#define ESTACK_PUSH(s, x) \ +do { \ + if ((s).sp == (s).end) { \ + erl_grow_estack(&(s), 1); \ + } \ + *(s).sp++ = (x); \ } while(0) #define ESTACK_PUSH2(s, x, y) \ do { \ - if (s.sp > s.end - 2) { \ - erl_grow_estack(&s, ESTK_DEF_STACK(s)); \ + if ((s).sp > (s).end - 2) { \ + erl_grow_estack(&(s), 2); \ } \ - *s.sp++ = (x); \ - *s.sp++ = (y); \ + *(s).sp++ = (x); \ + *(s).sp++ = (y); \ } while(0) #define ESTACK_PUSH3(s, x, y, z) \ do { \ - if (s.sp > s.end - 3) { \ - erl_grow_estack(&s, ESTK_DEF_STACK(s)); \ + if ((s).sp > (s).end - 3) { \ + erl_grow_estack(&s, 3); \ } \ - *s.sp++ = (x); \ - *s.sp++ = (y); \ - *s.sp++ = (z); \ + *(s).sp++ = (x); \ + *(s).sp++ = (y); \ + *(s).sp++ = (z); \ } while(0) #define ESTACK_PUSH4(s, E1, E2, E3, E4) \ do { \ - if (s.sp > s.end - 4) { \ - erl_grow_estack(&s, ESTK_DEF_STACK(s)); \ + if ((s).sp > (s).end - 4) { \ + erl_grow_estack(&s, 4); \ } \ - *s.sp++ = (E1); \ - *s.sp++ = (E2); \ - *s.sp++ = (E3); \ - *s.sp++ = (E4); \ + *(s).sp++ = (E1); \ + *(s).sp++ = (E2); \ + *(s).sp++ = (E3); \ + *(s).sp++ = (E4); \ } while(0) -#define ESTACK_COUNT(s) (s.sp - s.start) -#define ESTACK_ISEMPTY(s) (s.sp == s.start) -#define ESTACK_POP(s) (*(--s.sp)) +#define ESTACK_RESERVE(s, push_cnt) \ +do { \ + if ((s).sp > (s).end - (push_cnt)) { \ + erl_grow_estack(&(s), (push_cnt)); \ + } \ +} while(0) + +/* Must be preceded by ESTACK_RESERVE */ +#define ESTACK_FAST_PUSH(s, x) \ +do { \ + ASSERT((s).sp < (s).end); \ + *s.sp++ = (x); \ +} while(0) + +#define ESTACK_COUNT(s) ((s).sp - (s).start) +#define ESTACK_ISEMPTY(s) ((s).sp == (s).start) +#define ESTACK_POP(s) (*(--(s).sp)) /* * WSTACK: same as ESTACK but with UWord instead of Eterm */ -typedef struct { +typedef struct ErtsWStack_ { UWord* wstart; UWord* wsp; UWord* wend; + UWord* wdefault; ErtsAlcType_t alloc_type; }ErtsWStack; #define DEF_WSTACK_SIZE (16) -void erl_grow_wstack(ErtsWStack*, UWord* def_stack); +void erl_grow_wstack(ErtsWStack*, Uint need); #define WSTK_CONCAT(a,b) a##b #define WSTK_DEF_STACK(s) WSTK_CONCAT(s,_default_wstack) -#define DECLARE_WSTACK(s) \ +#define WSTACK_DECLARE(s) \ UWord WSTK_DEF_STACK(s)[DEF_WSTACK_SIZE]; \ ErtsWStack s = { \ WSTK_DEF_STACK(s), /* wstart */ \ WSTK_DEF_STACK(s), /* wsp */ \ WSTK_DEF_STACK(s) + DEF_WSTACK_SIZE, /* wend */ \ + WSTK_DEF_STACK(s), /* wdflt */ \ ERTS_ALC_T_ESTACK /* alloc_type */ \ } +#define DECLARE_WSTACK WSTACK_DECLARE + +typedef struct ErtsDynamicWStack_ { + UWord default_stack[DEF_WSTACK_SIZE]; + ErtsWStack ws; +}ErtsDynamicWStack; + +#define WSTACK_INIT(dwsp, ALC_TYPE) \ +do { \ + (dwsp)->ws.wstart = (dwsp)->default_stack; \ + (dwsp)->ws.wsp = (dwsp)->default_stack; \ + (dwsp)->ws.wend = (dwsp)->default_stack + DEF_WSTACK_SIZE;\ + (dwsp)->ws.wdefault = (dwsp)->default_stack; \ + (dwsp)->ws.alloc_type = ALC_TYPE; \ +} while (0) #define WSTACK_CHANGE_ALLOCATOR(s,t) \ do { \ @@ -532,13 +566,20 @@ do { \ s.alloc_type = (t); \ } while (0) -#define DESTROY_WSTACK(s) \ +#define WSTACK_DESTROY(s) \ do { \ - if (s.wstart != WSTK_DEF_STACK(s)) { \ + if (s.wstart != s.wdefault) { \ erts_free(s.alloc_type, s.wstart); \ } \ } while(0) +#define DESTROY_WSTACK WSTACK_DESTROY +#define WSTACK_DEBUG(s) \ + do { \ + fprintf(stderr, "wstack size = %ld\r\n", s.wsp - s.wstart); \ + fprintf(stderr, "wstack wstart = %p\r\n", s.wstart); \ + fprintf(stderr, "wstack wsp = %p\r\n", s.wsp); \ + } while(0) /* * Do not free the stack after this, it may have pointers into what @@ -553,6 +594,7 @@ do {\ memcpy((dst)->wstart, s.wstart,_wsz*sizeof(UWord));\ (dst)->wsp = (dst)->wstart + _wsz;\ (dst)->wend = (dst)->wstart + DEF_WSTACK_SIZE;\ + (dst)->wdefault = NULL;\ (dst)->alloc_type = s.alloc_type;\ } else\ *(dst) = s;\ @@ -581,12 +623,12 @@ do { \ ASSERT(s.wsp <= s.wend); \ } while (0) -#define WSTACK_IS_STATIC(s) (s.wstart == WSTK_DEF_STACK(s))) +#define WSTACK_IS_STATIC(s) (s.wstart == WSTK_DEF_STACK(s)) #define WSTACK_PUSH(s, x) \ do { \ if (s.wsp == s.wend) { \ - erl_grow_wstack(&s, WSTK_DEF_STACK(s)); \ + erl_grow_wstack(&s, 1); \ } \ *s.wsp++ = (x); \ } while(0) @@ -594,7 +636,7 @@ do { \ #define WSTACK_PUSH2(s, x, y) \ do { \ if (s.wsp > s.wend - 2) { \ - erl_grow_wstack(&s, WSTK_DEF_STACK(s)); \ + erl_grow_wstack(&s, 2); \ } \ *s.wsp++ = (x); \ *s.wsp++ = (y); \ @@ -602,8 +644,8 @@ do { \ #define WSTACK_PUSH3(s, x, y, z) \ do { \ - if (s.wsp > s.wend - 3) { \ - erl_grow_wstack(&s, WSTK_DEF_STACK(s)); \ + if (s.wsp > s.wend - 3) { \ + erl_grow_wstack(&s, 3); \ } \ *s.wsp++ = (x); \ *s.wsp++ = (y); \ @@ -612,8 +654,8 @@ do { \ #define WSTACK_PUSH4(s, A1, A2, A3, A4) \ do { \ - if (s.wsp > s.wend - 4) { \ - erl_grow_wstack(&s, WSTK_DEF_STACK(s)); \ + if (s.wsp > s.wend - 4) { \ + erl_grow_wstack(&s, 4); \ } \ *s.wsp++ = (A1); \ *s.wsp++ = (A2); \ @@ -624,7 +666,7 @@ do { \ #define WSTACK_PUSH5(s, A1, A2, A3, A4, A5) \ do { \ if (s.wsp > s.wend - 5) { \ - erl_grow_wstack(&s, WSTK_DEF_STACK(s)); \ + erl_grow_wstack(&s, 5); \ } \ *s.wsp++ = (A1); \ *s.wsp++ = (A2); \ @@ -636,7 +678,7 @@ do { \ #define WSTACK_PUSH6(s, A1, A2, A3, A4, A5, A6) \ do { \ if (s.wsp > s.wend - 6) { \ - erl_grow_wstack(&s, WSTK_DEF_STACK(s)); \ + erl_grow_wstack(&s, 6); \ } \ *s.wsp++ = (A1); \ *s.wsp++ = (A2); \ @@ -646,9 +688,85 @@ do { \ *s.wsp++ = (A6); \ } while(0) +#define WSTACK_RESERVE(s, push_cnt) \ +do { \ + if (s.wsp > s.wend - (push_cnt)) { \ + erl_grow_wstack(&s, (push_cnt)); \ + } \ +} while(0) + +/* Must be preceded by WSTACK_RESERVE */ +#define WSTACK_FAST_PUSH(s, x) \ +do { \ + ASSERT(s.wsp < s.wend); \ + *s.wsp++ = (x); \ +} while(0) + #define WSTACK_COUNT(s) (s.wsp - s.wstart) #define WSTACK_ISEMPTY(s) (s.wsp == s.wstart) -#define WSTACK_POP(s) (*(--s.wsp)) +#define WSTACK_POP(s) ((ASSERT(s.wsp > s.wstart)),*(--s.wsp)) + +#define WSTACK_ROLLBACK(s, count) (ASSERT(WSTACK_COUNT(s) >= (count)), \ + s.wsp = s.wstart + (count)) + +/* PSTACK - Stack of any type. + * Usage: + * { + * #define PSTACK_TYPE MyType + * PSTACK_DECLARE(s,16); + * MyType *sp = PSTACK_PUSH(s); + * + * sp->x = .... + * sp->y = .... + * sp = PSTACK_PUSH(s); + * ... + * sp = PSTACK_POP(s); + * if (PSTACK_IS_EMPTY(s)) { + * // sp is invalid when stack is empty after pop + * } + * + * PSTACK_DESTROY(s); + * } + */ + + +typedef struct ErtsPStack_ { + byte* pstart; + byte* psp; + byte* pend; + ErtsAlcType_t alloc_type; +}ErtsPStack; + +void erl_grow_pstack(ErtsPStack* s, void* default_pstack, unsigned need_bytes); +#define PSTK_CONCAT(a,b) a##b +#define PSTK_DEF_STACK(s) PSTK_CONCAT(s,_default_pstack) + +#define PSTACK_DECLARE(s, DEF_PSTACK_SIZE) \ +PSTACK_TYPE PSTK_DEF_STACK(s)[DEF_PSTACK_SIZE]; \ +ErtsPStack s = { (byte*)PSTK_DEF_STACK(s), /* pstart */ \ + (byte*)(PSTK_DEF_STACK(s) - 1), /* psp */ \ + (byte*)(PSTK_DEF_STACK(s) + (DEF_PSTACK_SIZE)), /* pend */\ + ERTS_ALC_T_ESTACK /* alloc_type */ \ +} + +#define PSTACK_DESTROY(s) \ +do { \ + if (s.pstart != (byte*)PSTK_DEF_STACK(s)) { \ + erts_free(s.alloc_type, s.pstart); \ + } \ +} while(0) + +#define PSTACK_IS_EMPTY(s) (s.psp < s.pstart) + +#define PSTACK_TOP(s) (ASSERT(!PSTACK_IS_EMPTY(s)), (PSTACK_TYPE*)(s.psp)) + +#define PSTACK_PUSH(s) \ + (s.psp += sizeof(PSTACK_TYPE), \ + ((s.psp == s.pend) ? erl_grow_pstack(&s, PSTK_DEF_STACK(s), \ + sizeof(PSTACK_TYPE)) : (void)0), \ + ((PSTACK_TYPE*) s.psp)) + +#define PSTACK_POP(s) ((PSTACK_TYPE*) (s.psp -= sizeof(PSTACK_TYPE))) /* binary.c */ diff --git a/erts/emulator/beam/io.c b/erts/emulator/beam/io.c index 9377237475..fb0edbcb1a 100644 --- a/erts/emulator/beam/io.c +++ b/erts/emulator/beam/io.c @@ -5350,7 +5350,11 @@ driver_deliver_term(Eterm to, ErlDrvTermData* data, int len) case ERL_DRV_MAP: { /* int */ ERTS_DDT_CHK_ENOUGH_ARGS(1); if ((int) ptr[0] < 0) ERTS_DDT_FAIL; - need += MAP_HEADER_SIZE + 1 + 2*ptr[0]; + if (ptr[0] > MAP_SMALL_MAP_LIMIT) { + need += hashmap_over_estimated_heap_size(ptr[0]); + } else { + need += MAP_HEADER_SIZE + 1 + 2*ptr[0]; + } depth -= 2*ptr[0]; if (depth < 0) ERTS_DDT_FAIL; ptr++; @@ -5594,31 +5598,52 @@ driver_deliver_term(Eterm to, ErlDrvTermData* data, int len) case ERL_DRV_MAP: { /* int */ int size = (int)ptr[0]; - Eterm* tp = hp; - Eterm* vp; - map_t *mp; - - *tp = make_arityval(size); - - hp += 1 + size; - mp = (map_t*)hp; - mp->thing_word = MAP_HEADER; - mp->size = size; - mp->keys = make_tuple(tp); - mess = make_map(mp); - - hp += MAP_HEADER_SIZE + size; /* advance "heap" pointer */ - - tp += size; /* point at last key */ - vp = hp - 1; /* point at last value */ - - while(size--) { - *vp-- = ESTACK_POP(stack); - *tp-- = ESTACK_POP(stack); - } - if (!erts_validate_and_sort_map(mp)) - ERTS_DDT_FAIL; - ptr++; + if (size > MAP_SMALL_MAP_LIMIT) { + int ix = 2*size; + ErtsHeapFactory factory; + Eterm* leafs = hp; + + hp += 2*size; + while(ix--) { *--hp = ESTACK_POP(stack); } + + hp += 2*size; + factory.p = NULL; + factory.hp = hp; + /* We assume heap will suffice (see hashmap_over_estimated_heap_size) */ + + mess = erts_hashmap_from_array(&factory, leafs, size, 1); + + if (is_non_value(mess)) + ERTS_DDT_FAIL; + + hp = factory.hp; + } else { + Eterm* tp = hp; + Eterm* vp; + flatmap_t *mp; + + *tp = make_arityval(size); + + hp += 1 + size; + mp = (flatmap_t*)hp; + mp->thing_word = MAP_HEADER; + mp->size = size; + mp->keys = make_tuple(tp); + mess = make_flatmap(mp); + + hp += MAP_HEADER_SIZE + size; /* advance "heap" pointer */ + + tp += size; /* point at last key */ + vp = hp - 1; /* point at last value */ + + while(size--) { + *vp-- = ESTACK_POP(stack); + *tp-- = ESTACK_POP(stack); + } + if (!erts_validate_and_sort_flatmap(mp)) + ERTS_DDT_FAIL; + } + ptr++; break; } diff --git a/erts/emulator/beam/sys.h b/erts/emulator/beam/sys.h index 828f5b427a..d2a8b9e7f4 100644 --- a/erts/emulator/beam/sys.h +++ b/erts/emulator/beam/sys.h @@ -188,6 +188,16 @@ __decl_noreturn void __noreturn erl_assert_error(const char* expr, const char *f # define ASSERT(e) ((void) 1) #endif +/* ERTS_UNDEF can be used to silence false warnings about + * "variable may be used uninitialized" while keeping the variable + * marked as undefined by valgrind. + */ +#ifdef VALGRIND +# define ERTS_UNDEF(V,I) +#else +# define ERTS_UNDEF(V,I) V = I +#endif + /* * Compile time assert * (the actual compiler error msg can be a bit confusing) diff --git a/erts/emulator/beam/utils.c b/erts/emulator/beam/utils.c index b8f58754b3..3549e18538 100644 --- a/erts/emulator/beam/utils.c +++ b/erts/emulator/beam/utils.c @@ -190,12 +190,18 @@ erts_set_hole_marker(Eterm* ptr, Uint sz) * Helper function for the ESTACK macros defined in global.h. */ void -erl_grow_estack(ErtsEStack* s, Eterm* default_estack) +erl_grow_estack(ErtsEStack* s, Uint need) { Uint old_size = (s->end - s->start); - Uint new_size = old_size * 2; + Uint new_size; Uint sp_offs = s->sp - s->start; - if (s->start != default_estack) { + + if (need < old_size) + new_size = 2*old_size; + else + new_size = ((need / old_size) + 2) * old_size; + + if (s->start != s->edefault) { s->start = erts_realloc(s->alloc_type, s->start, new_size*sizeof(Eterm)); } else { @@ -210,12 +216,18 @@ erl_grow_estack(ErtsEStack* s, Eterm* default_estack) * Helper function for the WSTACK macros defined in global.h. */ void -erl_grow_wstack(ErtsWStack* s, UWord* default_wstack) +erl_grow_wstack(ErtsWStack* s, Uint need) { Uint old_size = (s->wend - s->wstart); - Uint new_size = old_size * 2; + Uint new_size; Uint sp_offs = s->wsp - s->wstart; - if (s->wstart != default_wstack) { + + if (need < old_size) + new_size = 2 * old_size; + else + new_size = ((need / old_size) + 2) * old_size; + + if (s->wstart != s->wdefault) { s->wstart = erts_realloc(s->alloc_type, s->wstart, new_size*sizeof(UWord)); } else { @@ -227,6 +239,32 @@ erl_grow_wstack(ErtsWStack* s, UWord* default_wstack) s->wsp = s->wstart + sp_offs; } +/* + * Helper function for the PSTACK macros defined in global.h. + */ +void +erl_grow_pstack(ErtsPStack* s, void* default_pstack, unsigned need_bytes) +{ + Uint old_size = s->pend - s->pstart; + Uint new_size; + Uint sp_offs = s->psp - s->pstart; + + if (need_bytes < old_size) + new_size = 2 * old_size; + else + new_size = ((need_bytes / old_size) + 2) * old_size; + + if (s->pstart != default_pstack) { + s->pstart = erts_realloc(s->alloc_type, s->pstart, new_size); + } else { + byte* new_ptr = erts_alloc(s->alloc_type, new_size); + sys_memcpy(new_ptr, s->pstart, old_size); + s->pstart = new_ptr; + } + s->pend = s->pstart + new_size; + s->psp = s->pstart + sp_offs; +} + /* CTYPE macros */ #define LATIN1 @@ -314,6 +352,17 @@ int erts_fit_in_bits_int32(Sint32 value) return fit_in_bits((Sint64) (Uint32) value, 4); } +int erts_fit_in_bits_uint(Uint value) +{ +#if ERTS_SIZEOF_ETERM == 4 + return fit_in_bits((Sint64) (Uint32) value, 4); +#elif ERTS_SIZEOF_ETERM == 8 + return fit_in_bits(value, 5); +#else +# error "No way, Jose" +#endif +} + int erts_print(int to, void *arg, char *format, ...) { @@ -792,10 +841,10 @@ Uint32 make_hash(Eterm term_arg) unsigned op; /* Must not collide with the real tag_val_def's: */ -#define MAKE_HASH_TUPLE_OP 0x11 -#define MAKE_HASH_TERM_ARRAY_OP 0x12 -#define MAKE_HASH_CDR_PRE_OP 0x13 -#define MAKE_HASH_CDR_POST_OP 0x14 +#define MAKE_HASH_TUPLE_OP (FIRST_VACANT_TAG_DEF) +#define MAKE_HASH_TERM_ARRAY_OP (FIRST_VACANT_TAG_DEF+1) +#define MAKE_HASH_CDR_PRE_OP (FIRST_VACANT_TAG_DEF+2) +#define MAKE_HASH_CDR_POST_OP (FIRST_VACANT_TAG_DEF+3) /* ** Convenience macro for calculating a bytewise hash on an unsigned 32 bit @@ -975,21 +1024,9 @@ tail_recur: break; } case MAP_DEF: + case HASHMAP_DEF: { - map_t *mp = (map_t *)map_val(term); - int size = map_get_size(mp); - Eterm *ks = map_get_keys(mp); - Eterm *vs = map_get_values(mp); - - /* Use a prime with size to remedy some of - * the {} and <<>> hash problems */ - hash = hash*FUNNY_NUMBER13 + FUNNY_NUMBER14 + size; - if (size == 0) - break; - - /* push values first */ - WSTACK_PUSH3(stack, (UWord)vs, (UWord) size, MAKE_HASH_TERM_ARRAY_OP); - WSTACK_PUSH3(stack, (UWord)ks, (UWord) size, MAKE_HASH_TERM_ARRAY_OP); + hash = hash*FUNNY_NUMBER13 + FUNNY_NUMBER14 + make_hash2(term); break; } case TUPLE_DEF: @@ -1095,10 +1132,11 @@ Uint32 make_hash2(Eterm term) { Uint32 hash; - Uint32 hash_xor_keys = 0; - Uint32 hash_xor_values = 0; + Uint32 hash_xor_pairs; DeclareTmpHeapNoproc(tmp_big,2); + ERTS_UNDEF(hash_xor_pairs, 0); + /* (HCONST * {2, ..., 16}) mod 2^32 */ #define HCONST_2 0x3c6ef372UL #define HCONST_3 0xdaa66d2bUL @@ -1115,10 +1153,15 @@ make_hash2(Eterm term) #define HCONST_14 0xa708a81eUL #define HCONST_15 0x454021d7UL #define HCONST_16 0xe3779b90UL +#define HCONST_17 0x81af1549UL +#define HCONST_18 0x1fe68f02UL +#define HCONST_19 0xbe1e08bbUL +#define HCONST_20 0x5c558274UL +#define HCONST_21 0xfa8cfc2dUL #define HASH_MAP_TAIL (_make_header(1,_TAG_HEADER_REF)) -#define HASH_MAP_KEY (_make_header(2,_TAG_HEADER_REF)) -#define HASH_MAP_VAL (_make_header(3,_TAG_HEADER_REF)) +#define HASH_MAP_PAIR (_make_header(2,_TAG_HEADER_REF)) +#define HASH_CDR (_make_header(3,_TAG_HEADER_REF)) #define UINT32_HASH_2(Expr1, Expr2, AConst) \ do { \ @@ -1141,6 +1184,13 @@ make_hash2(Eterm term) } while(0) #define IS_SSMALL28(x) (((Uint) (((x) >> (28-1)) + 1)) < 2) + +#ifdef ARCH_64 +# define POINTER_HASH(Ptr, AConst) UINT32_HASH_2((Uint32)(UWord)(Ptr), (((UWord)(Ptr)) >> 32), AConst) +#else +# define POINTER_HASH(Ptr, AConst) UINT32_HASH(Ptr, AConst) +#endif + /* Optimization. Simple cases before declaration of estack. */ if (primary_tag(term) == TAG_PRIMARY_IMMED1) { switch (term & _TAG_IMMED1_MASK) { @@ -1195,9 +1245,9 @@ make_hash2(Eterm term) if (c > 0) UINT32_HASH(sh, HCONST_4); if (is_list(term)) { - term = *ptr; - tmp = *++ptr; - ESTACK_PUSH(s, tmp); + tmp = CDR(ptr); + ESTACK_PUSH(s, tmp); + term = CAR(ptr); } } break; @@ -1214,46 +1264,92 @@ make_hash2(Eterm term) UINT32_HASH(arity, HCONST_9); if (arity == 0) /* Empty tuple */ goto hash2_common; - for (i = arity; i >= 1; i--) { - tmp = elem[i]; - ESTACK_PUSH(s, tmp); + for (i = arity; ; i--) { + term = elem[i]; + if (i == 1) + break; + ESTACK_PUSH(s, term); } - goto hash2_common; } break; case MAP_SUBTAG: { - map_t *mp = (map_t *)map_val(term); + flatmap_t *mp = (flatmap_t *)flatmap_val(term); int i; - int size = map_get_size(mp); - Eterm *ks = map_get_keys(mp); - Eterm *vs = map_get_values(mp); + int size = flatmap_get_size(mp); + Eterm *ks = flatmap_get_keys(mp); + Eterm *vs = flatmap_get_values(mp); UINT32_HASH(size, HCONST_16); if (size == 0) { goto hash2_common; } - ESTACK_PUSH4(s, hash_xor_values, hash_xor_keys, hash, HASH_MAP_TAIL); - hash = 0; - hash_xor_keys = 0; - hash_xor_values = 0; - for (i = size - 1; i >= 0; i--) { - tmp = vs[i]; - ESTACK_PUSH2(s, HASH_MAP_VAL, tmp); - } - /* We do not want to expose the tuple representation. - * Do not push the keys as a tuple. + /* We want a portable hash function that is *independent* of + * the order in which keys and values are encountered. + * We therefore calculate context independent hashes for all . + * key-value pairs and then xor them together. */ + ESTACK_PUSH(s, hash_xor_pairs); + ESTACK_PUSH(s, hash); + ESTACK_PUSH(s, HASH_MAP_TAIL); + hash = 0; + hash_xor_pairs = 0; for (i = size - 1; i >= 0; i--) { - tmp = ks[i]; - ESTACK_PUSH2(s, HASH_MAP_KEY, tmp); + ESTACK_PUSH(s, HASH_MAP_PAIR); + ESTACK_PUSH(s, vs[i]); + ESTACK_PUSH(s, ks[i]); } goto hash2_common; } break; + case HASHMAP_SUBTAG: + { + Eterm* ptr = boxed_val(term) + 1; + Uint size; + int i; + switch (hdr & _HEADER_MAP_SUBTAG_MASK) { + case HAMT_SUBTAG_HEAD_ARRAY: + case HAMT_SUBTAG_HEAD_BITMAP: + size = *ptr++; + UINT32_HASH(size, HCONST_16); + if (size == 0) + goto hash2_common; + ESTACK_PUSH(s, hash_xor_pairs); + ESTACK_PUSH(s, hash); + ESTACK_PUSH(s, HASH_MAP_TAIL); + hash = 0; + hash_xor_pairs = 0; + } + switch (hdr & _HEADER_MAP_SUBTAG_MASK) { + case HAMT_SUBTAG_HEAD_ARRAY: + case HAMT_SUBTAG_NODE_ARRAY: + i = 16; + break; + case HAMT_SUBTAG_HEAD_BITMAP: + case HAMT_SUBTAG_NODE_BITMAP: + i = hashmap_bitcount(MAP_HEADER_VAL(hdr)); + break; + default: + erl_exit(1, "bad header"); + } + while (i) { + if (is_list(*ptr)) { + Eterm* cons = list_val(*ptr); + ESTACK_PUSH(s, HASH_MAP_PAIR); + ESTACK_PUSH(s, CDR(cons)); + ESTACK_PUSH(s, CAR(cons)); + } + else { + ASSERT(is_boxed(*ptr)); + ESTACK_PUSH(s, *ptr); + } + i--; ptr++; + } + goto hash2_common; + } + break; case EXPORT_SUBTAG: { Export* ep = *((Export **) (export_val(term) + 1)); - UINT32_HASH_2 (ep->code[2], atom_tab(atom_val(ep->code[0]))->slot.bucket.hvalue, @@ -1268,7 +1364,6 @@ make_hash2(Eterm term) { ErlFunThing* funp = (ErlFunThing *) fun_val(term); Uint num_free = funp->num_free; - UINT32_HASH_2 (num_free, atom_tab(atom_val(funp->fe->module))->slot.bucket.hvalue, @@ -1349,7 +1444,8 @@ make_hash2(Eterm term) do { Uint t; Uint32 x, y; - t = i < n ? BIG_DIGIT(ptr, i++) : 0; + ASSERT(i < n); + t = BIG_DIGIT(ptr, i++); x = t & 0xffffffff; y = t >> 32; UINT32_HASH_2(x, y, con); @@ -1457,20 +1553,397 @@ make_hash2(Eterm term) switch (term) { case HASH_MAP_TAIL: { hash = (Uint32) ESTACK_POP(s); - UINT32_HASH(hash_xor_keys, HCONST_16); - UINT32_HASH(hash_xor_values, HCONST_16); - hash_xor_keys = (Uint32) ESTACK_POP(s); - hash_xor_values = (Uint32) ESTACK_POP(s); + UINT32_HASH(hash_xor_pairs, HCONST_19); + hash_xor_pairs = (Uint32) ESTACK_POP(s); goto hash2_common; } - case HASH_MAP_KEY: - hash_xor_keys ^= hash; + case HASH_MAP_PAIR: + hash_xor_pairs ^= hash; hash = 0; goto hash2_common; - case HASH_MAP_VAL: - hash_xor_values ^= hash; + default: + break; + } + } + } + } +} + +/* Term hash function for internal use. + * + * Limitation #1: Is not "portable" in any way between different VM instances. + * + * Limitation #2: The hash value is only valid as long as the term exists + * somewhere in the VM. Why? Because external pids, ports and refs are hashed + * by mixing the node *pointer* value. If a node disappears and later reappears + * with a new ErlNode struct, externals from that node will hash different than + * before. + * + * One IMPORTANT property must hold (for hamt). + * EVERY BIT of the term that is significant for equality (see EQ) + * MUST BE USED AS INPUT FOR THE HASH. Two different terms must always have a + * chance of hashing different when salted: hash([Salt|A]) vs hash([Salt|B]). + * + * This is why we can not use cached hash values for atoms for example. + * + */ + +#define CONST_HASH(AConst) \ +do { /* Lightweight mixing of constant (type info) */ \ + hash ^= AConst; \ + hash = (hash << 17) ^ (hash >> (32-17)); \ +} while (0) + +Uint32 +make_internal_hash(Eterm term) +{ + Uint32 hash; + Uint32 hash_xor_pairs; + + ERTS_UNDEF(hash_xor_pairs, 0); + + /* Optimization. Simple cases before declaration of estack. */ + if (primary_tag(term) == TAG_PRIMARY_IMMED1) { + hash = 0; + #if ERTS_SIZEOF_ETERM == 8 + UINT32_HASH_2((Uint32)term, (Uint32)(term >> 32), HCONST); + #elif ERTS_SIZEOF_ETERM == 4 + UINT32_HASH(term, HCONST); + #else + # error "No you don't" + #endif + return hash; + } + { + Eterm tmp; + DECLARE_ESTACK(s); + + UseTmpHeapNoproc(2); + hash = 0; + for (;;) { + switch (primary_tag(term)) { + case TAG_PRIMARY_LIST: + { + int c = 0; + Uint32 sh = 0; + Eterm* ptr = list_val(term); + while (is_byte(*ptr)) { + /* Optimization for strings. */ + sh = (sh << 8) + unsigned_val(*ptr); + if (c == 3) { + UINT32_HASH(sh, HCONST_4); + c = sh = 0; + } else { + c++; + } + term = CDR(ptr); + if (is_not_list(term)) + break; + ptr = list_val(term); + } + if (c > 0) + UINT32_HASH(sh, HCONST_4); + if (is_list(term)) { + tmp = CDR(ptr); + CONST_HASH(HCONST_17); /* Hash CAR in cons cell */ + ESTACK_PUSH(s, tmp); + if (is_not_list(tmp)) { + ESTACK_PUSH(s, HASH_CDR); + } + term = CAR(ptr); + } + } + break; + case TAG_PRIMARY_BOXED: + { + Eterm hdr = *boxed_val(term); + ASSERT(is_header(hdr)); + switch (hdr & _TAG_HEADER_MASK) { + case ARITYVAL_SUBTAG: + { + int i; + int arity = header_arity(hdr); + Eterm* elem = tuple_val(term); + UINT32_HASH(arity, HCONST_9); + if (arity == 0) /* Empty tuple */ + goto pop_next; + for (i = arity; ; i--) { + term = elem[i]; + if (i == 1) + break; + ESTACK_PUSH(s, term); + } + } + break; + case MAP_SUBTAG: + { + flatmap_t *mp = (flatmap_t *)flatmap_val(term); + int i; + int size = flatmap_get_size(mp); + Eterm *ks = flatmap_get_keys(mp); + Eterm *vs = flatmap_get_values(mp); + UINT32_HASH(size, HCONST_16); + if (size == 0) { + goto pop_next; + } + /* We want a hash function that is *independent* of + * the order in which keys and values are encountered. + * We therefore calculate context independent hashes for all . + * key-value pairs and then xor them together. + */ + ESTACK_PUSH(s, hash_xor_pairs); + ESTACK_PUSH(s, hash); + ESTACK_PUSH(s, HASH_MAP_TAIL); + hash = 0; + hash_xor_pairs = 0; + for (i = size - 1; i >= 0; i--) { + ESTACK_PUSH(s, HASH_MAP_PAIR); + ESTACK_PUSH(s, vs[i]); + ESTACK_PUSH(s, ks[i]); + } + goto pop_next; + } + break; + case HASHMAP_SUBTAG: + { + Eterm* ptr = boxed_val(term) + 1; + Uint size; + int i; + switch (hdr & _HEADER_MAP_SUBTAG_MASK) { + case HAMT_SUBTAG_HEAD_ARRAY: + case HAMT_SUBTAG_HEAD_BITMAP: + size = *ptr++; + UINT32_HASH(size, HCONST_16); + if (size == 0) + goto pop_next; + ESTACK_PUSH(s, hash_xor_pairs); + ESTACK_PUSH(s, hash); + ESTACK_PUSH(s, HASH_MAP_TAIL); + hash = 0; + hash_xor_pairs = 0; + } + switch (hdr & _HEADER_MAP_SUBTAG_MASK) { + case HAMT_SUBTAG_HEAD_ARRAY: + case HAMT_SUBTAG_NODE_ARRAY: + i = 16; + break; + case HAMT_SUBTAG_HEAD_BITMAP: + case HAMT_SUBTAG_NODE_BITMAP: + i = hashmap_bitcount(MAP_HEADER_VAL(hdr)); + break; + default: + erl_exit(1, "bad header"); + } + while (i) { + if (is_list(*ptr)) { + Eterm* cons = list_val(*ptr); + ESTACK_PUSH(s, HASH_MAP_PAIR); + ESTACK_PUSH(s, CDR(cons)); + ESTACK_PUSH(s, CAR(cons)); + } + else { + ASSERT(is_boxed(*ptr)); + ESTACK_PUSH(s, *ptr); + } + i--; ptr++; + } + goto pop_next; + } + break; + case EXPORT_SUBTAG: + { + Export* ep = *((Export **) (export_val(term) + 1)); + /* Assumes Export entries never moves */ + POINTER_HASH(ep, HCONST_14); + goto pop_next; + } + + case FUN_SUBTAG: + { + ErlFunThing* funp = (ErlFunThing *) fun_val(term); + Uint num_free = funp->num_free; + UINT32_HASH_2(num_free, funp->fe->module, HCONST_20); + UINT32_HASH_2(funp->fe->old_index, funp->fe->old_uniq, HCONST_21); + if (num_free == 0) { + goto pop_next; + } else { + Eterm* bptr = funp->env + num_free - 1; + while (num_free-- > 1) { + term = *bptr--; + ESTACK_PUSH(s, term); + } + term = *bptr; + } + } + break; + case REFC_BINARY_SUBTAG: + case HEAP_BINARY_SUBTAG: + case SUB_BINARY_SUBTAG: + { + byte* bptr; + unsigned sz = binary_size(term); + Uint32 con = HCONST_13 + hash; + Uint bitoffs; + Uint bitsize; + + ERTS_GET_BINARY_BYTES(term, bptr, bitoffs, bitsize); + if (sz == 0 && bitsize == 0) { + hash = con; + } else { + if (bitoffs == 0) { + hash = block_hash(bptr, sz, con); + if (bitsize > 0) { + UINT32_HASH_2(bitsize, (bptr[sz] >> (8 - bitsize)), + HCONST_15); + } + } else { + byte* buf = (byte *) erts_alloc(ERTS_ALC_T_TMP, + sz + (bitsize != 0)); + erts_copy_bits(bptr, bitoffs, 1, buf, 0, 1, sz*8+bitsize); + hash = block_hash(buf, sz, con); + if (bitsize > 0) { + UINT32_HASH_2(bitsize, (buf[sz] >> (8 - bitsize)), + HCONST_15); + } + erts_free(ERTS_ALC_T_TMP, (void *) buf); + } + } + goto pop_next; + } + break; + case POS_BIG_SUBTAG: + case NEG_BIG_SUBTAG: + { + Eterm* ptr = big_val(term); + Uint i = 0; + Uint n = BIG_SIZE(ptr); + Uint32 con = BIG_SIGN(ptr) ? HCONST_10 : HCONST_11; +#if D_EXP == 16 + do { + Uint32 x, y; + x = i < n ? BIG_DIGIT(ptr, i++) : 0; + x += (Uint32)(i < n ? BIG_DIGIT(ptr, i++) : 0) << 16; + y = i < n ? BIG_DIGIT(ptr, i++) : 0; + y += (Uint32)(i < n ? BIG_DIGIT(ptr, i++) : 0) << 16; + UINT32_HASH_2(x, y, con); + } while (i < n); +#elif D_EXP == 32 + do { + Uint32 x, y; + x = i < n ? BIG_DIGIT(ptr, i++) : 0; + y = i < n ? BIG_DIGIT(ptr, i++) : 0; + UINT32_HASH_2(x, y, con); + } while (i < n); +#elif D_EXP == 64 + do { + Uint t; + Uint32 x, y; + ASSERT(i < n); + t = BIG_DIGIT(ptr, i++); + x = t & 0xffffffff; + y = t >> 32; + UINT32_HASH_2(x, y, con); + } while (i < n); +#else +#error "unsupported D_EXP size" +#endif + goto pop_next; + } + break; + case REF_SUBTAG: + UINT32_HASH(internal_ref_numbers(term)[0], HCONST_7); + ASSERT(internal_ref_no_of_numbers(term) == 3); + UINT32_HASH_2(internal_ref_numbers(term)[1], + internal_ref_numbers(term)[2], HCONST_8); + goto pop_next; + + case EXTERNAL_REF_SUBTAG: + { + ExternalThing* thing = external_thing_ptr(term); + + ASSERT(external_thing_ref_no_of_numbers(thing) == 3); + /* See limitation #2 */ + #ifdef ARCH_64 + POINTER_HASH(thing->node, HCONST_7); + UINT32_HASH(external_thing_ref_numbers(thing)[0], HCONST_7); + #else + UINT32_HASH_2(thing->node, + external_thing_ref_numbers(thing)[0], HCONST_7); + #endif + UINT32_HASH_2(external_thing_ref_numbers(thing)[1], + external_thing_ref_numbers(thing)[2], HCONST_8); + goto pop_next; + } + case EXTERNAL_PID_SUBTAG: { + ExternalThing* thing = external_thing_ptr(term); + /* See limitation #2 */ + #ifdef ARCH_64 + POINTER_HASH(thing->node, HCONST_5); + UINT32_HASH(thing->data.ui[0], HCONST_5); + #else + UINT32_HASH_2(thing->node, thing->data.ui[0], HCONST_5); + #endif + goto pop_next; + } + case EXTERNAL_PORT_SUBTAG: { + ExternalThing* thing = external_thing_ptr(term); + /* See limitation #2 */ + #ifdef ARCH_64 + POINTER_HASH(thing->node, HCONST_6); + UINT32_HASH(thing->data.ui[0], HCONST_6); + #else + UINT32_HASH_2(thing->node, thing->data.ui[0], HCONST_6); + #endif + goto pop_next; + } + case FLOAT_SUBTAG: + { + FloatDef ff; + GET_DOUBLE(term, ff); + UINT32_HASH_2(ff.fw[0], ff.fw[1], HCONST_12); + goto pop_next; + } + + default: + erl_exit(1, "Invalid tag in make_hash2(0x%X)\n", term); + } + } + break; + case TAG_PRIMARY_IMMED1: + #if ERTS_SIZEOF_ETERM == 8 + UINT32_HASH_2((Uint32)term, (Uint32)(term >> 32), HCONST); + #else + UINT32_HASH(term, HCONST); + #endif + goto pop_next; + + default: + erl_exit(1, "Invalid tag in make_hash2(0x%X)\n", term); + + pop_next: + if (ESTACK_ISEMPTY(s)) { + DESTROY_ESTACK(s); + UnUseTmpHeapNoproc(2); + return hash; + } + + term = ESTACK_POP(s); + + switch (term) { + case HASH_MAP_TAIL: { + hash = (Uint32) ESTACK_POP(s); + UINT32_HASH(hash_xor_pairs, HCONST_19); + hash_xor_pairs = (Uint32) ESTACK_POP(s); + goto pop_next; + } + case HASH_MAP_PAIR: + hash_xor_pairs ^= hash; hash = 0; - goto hash2_common; + goto pop_next; + + case HASH_CDR: + CONST_HASH(HCONST_18); /* Hash CDR i cons cell */ + goto pop_next; default: break; } @@ -1478,9 +1951,10 @@ make_hash2(Eterm term) } } +#undef CONST_HASH #undef HASH_MAP_TAIL -#undef HASH_MAP_KEY -#undef HASH_MAP_VAL +#undef HASH_MAP_PAIR +#undef HASH_CDR #undef UINT32_HASH_2 #undef UINT32_HASH @@ -1697,21 +2171,9 @@ tail_recur: break; case MAP_DEF: + case HASHMAP_DEF: { - map_t *mp = (map_t *)map_val(term); - int size = map_get_size(mp); - Eterm *ks = map_get_keys(mp); - Eterm *vs = map_get_values(mp); - - /* Use a prime with size to remedy some of - * the {} and <<>> hash problems */ - hash = hash*FUNNY_NUMBER13 + FUNNY_NUMBER14 + size; - if (size == 0) - break; - - /* push values first */ - WSTACK_PUSH3(stack, (UWord)vs, (UWord) size, MAKE_HASH_TERM_ARRAY_OP); - WSTACK_PUSH3(stack, (UWord)ks, (UWord) size, MAKE_HASH_TERM_ARRAY_OP); + hash = hash*FUNNY_NUMBER13 + FUNNY_NUMBER14 + make_hash2(term); break; } case TUPLE_DEF: @@ -2120,13 +2582,13 @@ tailrecur_ne: } case MAP_SUBTAG: { - aa = map_val_rel(a, a_base); + aa = flatmap_val_rel(a, a_base); if (!is_boxed(b) || *boxed_val_rel(b,b_base) != *aa) goto not_equal; - bb = map_val_rel(b,b_base); - sz = map_get_size((map_t*)aa); + bb = flatmap_val_rel(b,b_base); + sz = flatmap_get_size((flatmap_t*)aa); - if (sz != map_get_size((map_t*)bb)) goto not_equal; + if (sz != flatmap_get_size((flatmap_t*)bb)) goto not_equal; if (sz == 0) goto pop_next; aa += 2; @@ -2325,6 +2787,32 @@ tailrecur_ne: } break; /* not equal */ } + case HASHMAP_SUBTAG: + { + if (!is_boxed(b) || *boxed_val_rel(b,b_base) != hdr) + goto not_equal; + + aa = hashmap_val_rel(a, a_base) + 1; + bb = hashmap_val_rel(b, b_base) + 1; + switch (hdr & _HEADER_MAP_SUBTAG_MASK) { + case HAMT_SUBTAG_HEAD_ARRAY: + aa++; bb++; + case HAMT_SUBTAG_NODE_ARRAY: + sz = 16; + break; + case HAMT_SUBTAG_HEAD_BITMAP: + aa++; bb++; + case HAMT_SUBTAG_NODE_BITMAP: + sz = hashmap_bitcount(MAP_HEADER_VAL(hdr)); + ASSERT(sz > 0 && sz < 16); + break; + default: + erl_exit(1, "Unknown hashmap subsubtag\n"); + } + goto term_array; + } + default: + ASSERT(!"Unknown boxed subtab in EQ"); } break; } @@ -2448,7 +2936,18 @@ Sint erts_cmp_rel_opt(Eterm a, Eterm* a_base, Eterm b, Eterm* b_base, int exact) Sint erts_cmp(Eterm a, Eterm b, int exact) #endif { - DECLARE_WSTACK(stack); +#define PSTACK_TYPE struct erts_cmp_hashmap_state + struct erts_cmp_hashmap_state { + Sint wstack_rollback; + int was_exact; + Eterm *ap; + Eterm *bp; + Eterm min_key; + Sint cmp_res; /* result so far -1,0,+1 */ + }; + PSTACK_DECLARE(hmap_stack, 1); + WSTACK_DECLARE(stack); + WSTACK_DECLARE(b_stack); /* only used by hashmaps */ Eterm* aa; Eterm* bb; int i; @@ -2464,6 +2963,26 @@ Sint erts_cmp(Eterm a, Eterm b, int exact) Uint32 *anum; Uint32 *bnum; +/* The WSTACK contains naked Eterms and Operations marked with header-tags */ +#define OP_BITS 4 +#define OP_MASK 0xF +#define TERM_ARRAY_OP 0 +#define SWITCH_EXACT_OFF_OP 1 +#define HASHMAP_PHASE1_ARE_KEYS_EQUAL 2 +#define HASHMAP_PHASE1_IS_MIN_KEY 3 +#define HASHMAP_PHASE1_CMP_VALUES 4 +#define HASHMAP_PHASE2_ARE_KEYS_EQUAL 5 +#define HASHMAP_PHASE2_IS_MIN_KEY_A 6 +#define HASHMAP_PHASE2_IS_MIN_KEY_B 7 + + +#define OP_WORD(OP) (((OP) << _TAG_PRIMARY_SIZE) | TAG_PRIMARY_HEADER) +#define TERM_ARRAY_OP_WORD(SZ) OP_WORD(((SZ) << OP_BITS) | TERM_ARRAY_OP) + +#define GET_OP(WORD) (ASSERT(is_header(WORD)), ((WORD) >> _TAG_PRIMARY_SIZE) & OP_MASK) +#define GET_OP_ARG(WORD) (ASSERT(is_header(WORD)), ((WORD) >> (OP_BITS + _TAG_PRIMARY_SIZE))) + + #define RETURN_NEQ(cmp) { j=(cmp); ASSERT(j != 0); goto not_equal; } #define ON_CMP_GOTO(cmp) if ((j=(cmp)) == 0) goto pop_next; else goto not_equal @@ -2479,6 +2998,8 @@ Sint erts_cmp(Eterm a, Eterm b, int exact) } while (0) +bodyrecur: + j = 0; tailrecur: if (is_same(a,a_base,b,b_base)) { /* Equal values or pointers. */ goto pop_next; @@ -2606,24 +3127,83 @@ tailrecur_ne: ++bb; goto term_array; case (_TAG_HEADER_MAP >> _TAG_PRIMARY_SIZE) : - if (!is_map_rel(b,b_base)) { + if (!is_flatmap_rel(b,b_base)) { a_tag = MAP_DEF; goto mixed_types; } - aa = (Eterm *)map_val_rel(a,a_base); - bb = (Eterm *)map_val_rel(b,b_base); + aa = (Eterm *)flatmap_val_rel(a,a_base); + bb = (Eterm *)flatmap_val_rel(b,b_base); - i = map_get_size((map_t*)aa); - if (i != map_get_size((map_t*)bb)) { - RETURN_NEQ((int)(i - map_get_size((map_t*)bb))); + i = flatmap_get_size((flatmap_t*)aa); + if (i != flatmap_get_size((flatmap_t*)bb)) { + RETURN_NEQ((int)(i - flatmap_get_size((flatmap_t*)bb))); } if (i == 0) { goto pop_next; } aa += 2; bb += 2; - i += 1; /* increment for tuple-keys */ - goto term_array; + if (exact) { + i += 1; /* increment for tuple-keys */ + goto term_array; + } + else { + /* Value array */ + WSTACK_PUSH3(stack, (UWord)(bb+1), (UWord)(aa+1), TERM_ARRAY_OP_WORD(i)); + /* Switch back from 'exact' key compare */ + WSTACK_PUSH(stack, OP_WORD(SWITCH_EXACT_OFF_OP)); + /* Now do 'exact' compare of key tuples */ + a = *aa; + b = *bb; + exact = 1; + goto bodyrecur; + } + + case (_TAG_HEADER_HASHMAP >> _TAG_PRIMARY_SIZE) : + { + struct erts_cmp_hashmap_state* sp; + if (!is_hashmap_rel(b,b_base)) { + a_tag = HASHMAP_DEF; + goto mixed_types; + } + i = hashmap_size_rel(a,a_base) - hashmap_size_rel(b,b_base); + if (i) { + RETURN_NEQ(i); + } + if (hashmap_size_rel(a,a_base) == 0) { + goto pop_next; + } + + /* Hashmap compare strategy: + Phase 1. While keys are identical + Do synchronous stepping through leafs of both trees in hash + order. Maintain value compare result of minimal key. + + Phase 2. If key diff was found in phase 1 + Ignore values from now on. + Continue iterate trees by always advancing the one + lagging behind hash-wise. Identical keys are skipped. + A minimal key can only be candidate as tie-breaker if we + have passed that hash value in the other tree (which means + the key did not exist in the other tree). + */ + + sp = PSTACK_PUSH(hmap_stack); + hashmap_iterator_init(&stack, a, 0); + hashmap_iterator_init(&b_stack, b, 0); + sp->ap = hashmap_iterator_next(&stack); + sp->bp = hashmap_iterator_next(&b_stack); + sp->cmp_res = 0; + ASSERT(sp->ap && sp->bp); + + a = CAR(sp->ap); + b = CAR(sp->bp); + sp->was_exact = exact; + exact = 1; + WSTACK_PUSH(stack, OP_WORD(HASHMAP_PHASE1_ARE_KEYS_EQUAL)); + sp->wstack_rollback = WSTACK_COUNT(stack); + goto bodyrecur; + } case (_TAG_HEADER_FLOAT >> _TAG_PRIMARY_SIZE): if (!is_float_rel(b,b_base)) { a_tag = FLOAT_DEF; @@ -2983,8 +3563,7 @@ term_array: /* arrays in 'aa' and 'bb', length in 'i' */ goto not_equal; } } else { - /* (ab)Use TAG_PRIMARY_HEADER to recognize a term_array */ - WSTACK_PUSH3(stack, i, (UWord)bb, (UWord)aa | TAG_PRIMARY_HEADER); + WSTACK_PUSH3(stack, (UWord)bb, (UWord)aa, TERM_ARRAY_OP_WORD(i)); goto tailrecur_ne; } } @@ -2996,22 +3575,179 @@ term_array: /* arrays in 'aa' and 'bb', length in 'i' */ pop_next: if (!WSTACK_ISEMPTY(stack)) { UWord something = WSTACK_POP(stack); - if (primary_tag((Eterm) something) == TAG_PRIMARY_HEADER) { /* a term_array */ - aa = (Eterm*) something; - bb = (Eterm*) WSTACK_POP(stack); - i = WSTACK_POP(stack); - goto term_array; + struct erts_cmp_hashmap_state* sp; + if (primary_tag((Eterm) something) == TAG_PRIMARY_HEADER) { /* an operation */ + switch (GET_OP(something)) { + case TERM_ARRAY_OP: + i = GET_OP_ARG(something); + aa = (Eterm*)WSTACK_POP(stack); + bb = (Eterm*) WSTACK_POP(stack); + goto term_array; + + case SWITCH_EXACT_OFF_OP: + /* Done with exact compare of map keys, switch back */ + ASSERT(exact); + exact = 0; + goto pop_next; + + case HASHMAP_PHASE1_ARE_KEYS_EQUAL: { + sp = PSTACK_TOP(hmap_stack); + if (j) { + /* Key diff found, enter phase 2 */ + if (hashmap_key_hash_cmp(sp->ap, sp->bp) < 0) { + sp->min_key = CAR(sp->ap); + sp->cmp_res = -1; + sp->ap = hashmap_iterator_next(&stack); + } + else { + sp->min_key = CAR(sp->bp); + sp->cmp_res = 1; + sp->bp = hashmap_iterator_next(&b_stack); + } + exact = 1; /* only exact key compares in phase 2 */ + goto case_HASHMAP_PHASE2_LOOP; + } + + /* No key diff found so far, compare values if min key */ + + if (sp->cmp_res) { + a = CAR(sp->ap); + b = sp->min_key; + exact = 1; + WSTACK_PUSH(stack, OP_WORD(HASHMAP_PHASE1_IS_MIN_KEY)); + sp->wstack_rollback = WSTACK_COUNT(stack); + goto bodyrecur; + } + /* no min key-value found yet */ + a = CDR(sp->ap); + b = CDR(sp->bp); + exact = sp->was_exact; + WSTACK_PUSH(stack, OP_WORD(HASHMAP_PHASE1_CMP_VALUES)); + sp->wstack_rollback = WSTACK_COUNT(stack); + goto bodyrecur; + } + case HASHMAP_PHASE1_IS_MIN_KEY: + sp = PSTACK_TOP(hmap_stack); + if (j < 0) { + a = CDR(sp->ap); + b = CDR(sp->bp); + exact = sp->was_exact; + WSTACK_PUSH(stack, OP_WORD(HASHMAP_PHASE1_CMP_VALUES)); + sp->wstack_rollback = WSTACK_COUNT(stack); + goto bodyrecur; + } + goto case_HASHMAP_PHASE1_LOOP; + + case HASHMAP_PHASE1_CMP_VALUES: + sp = PSTACK_TOP(hmap_stack); + if (j) { + sp->cmp_res = j; + sp->min_key = CAR(sp->ap); + } + case_HASHMAP_PHASE1_LOOP: + sp->ap = hashmap_iterator_next(&stack); + sp->bp = hashmap_iterator_next(&b_stack); + if (!sp->ap) { + /* end of maps with identical keys */ + ASSERT(!sp->bp); + j = sp->cmp_res; + exact = sp->was_exact; + (void) PSTACK_POP(hmap_stack); + ON_CMP_GOTO(j); + } + a = CAR(sp->ap); + b = CAR(sp->bp); + exact = 1; + WSTACK_PUSH(stack, OP_WORD(HASHMAP_PHASE1_ARE_KEYS_EQUAL)); + sp->wstack_rollback = WSTACK_COUNT(stack); + goto bodyrecur; + + case_HASHMAP_PHASE2_LOOP: + if (sp->ap && sp->bp) { + a = CAR(sp->ap); + b = CAR(sp->bp); + ASSERT(exact); + WSTACK_PUSH(stack, OP_WORD(HASHMAP_PHASE2_ARE_KEYS_EQUAL)); + sp->wstack_rollback = WSTACK_COUNT(stack); + goto bodyrecur; + } + goto case_HASHMAP_PHASE2_NEXT_STEP; + + case HASHMAP_PHASE2_ARE_KEYS_EQUAL: + sp = PSTACK_TOP(hmap_stack); + if (j == 0) { + /* keys are equal, skip them */ + sp->ap = hashmap_iterator_next(&stack); + sp->bp = hashmap_iterator_next(&b_stack); + goto case_HASHMAP_PHASE2_LOOP; + } + /* fall through */ + case_HASHMAP_PHASE2_NEXT_STEP: + if (sp->ap || sp->bp) { + if (hashmap_key_hash_cmp(sp->ap, sp->bp) < 0) { + ASSERT(sp->ap); + a = CAR(sp->ap); + b = sp->min_key; + ASSERT(exact); + WSTACK_PUSH(stack, OP_WORD(HASHMAP_PHASE2_IS_MIN_KEY_A)); + } + else { /* hash_cmp > 0 */ + ASSERT(sp->bp); + a = CAR(sp->bp); + b = sp->min_key; + ASSERT(exact); + WSTACK_PUSH(stack, OP_WORD(HASHMAP_PHASE2_IS_MIN_KEY_B)); + } + sp->wstack_rollback = WSTACK_COUNT(stack); + goto bodyrecur; + } + /* End of both maps */ + j = sp->cmp_res; + exact = sp->was_exact; + (void) PSTACK_POP(hmap_stack); + ON_CMP_GOTO(j); + + case HASHMAP_PHASE2_IS_MIN_KEY_A: + sp = PSTACK_TOP(hmap_stack); + if (j < 0) { + sp->min_key = CAR(sp->ap); + sp->cmp_res = -1; + } + sp->ap = hashmap_iterator_next(&stack); + goto case_HASHMAP_PHASE2_LOOP; + + case HASHMAP_PHASE2_IS_MIN_KEY_B: + sp = PSTACK_TOP(hmap_stack); + if (j < 0) { + sp->min_key = CAR(sp->bp); + sp->cmp_res = 1; + } + sp->bp = hashmap_iterator_next(&b_stack); + goto case_HASHMAP_PHASE2_LOOP; + + default: + ASSERT(!"Invalid cmp op"); + } /* switch */ } a = (Eterm) something; b = (Eterm) WSTACK_POP(stack); goto tailrecur; } - DESTROY_WSTACK(stack); + ASSERT(PSTACK_IS_EMPTY(hmap_stack)); + PSTACK_DESTROY(hmap_stack); + WSTACK_DESTROY(stack); + WSTACK_DESTROY(b_stack); return 0; not_equal: - DESTROY_WSTACK(stack); + if (!PSTACK_IS_EMPTY(hmap_stack)) { + WSTACK_ROLLBACK(stack, PSTACK_TOP(hmap_stack)->wstack_rollback); + goto pop_next; + } + PSTACK_DESTROY(hmap_stack); + WSTACK_DESTROY(stack); + WSTACK_DESTROY(b_stack); return j; #undef CMP_NODES diff --git a/erts/emulator/test/map_SUITE.erl b/erts/emulator/test/map_SUITE.erl index 888ed8e272..c421886431 100644 --- a/erts/emulator/test/map_SUITE.erl +++ b/erts/emulator/test/map_SUITE.erl @@ -30,7 +30,7 @@ t_list_comprehension/1, t_map_sort_literals/1, t_map_equal/1, - %t_size/1, + t_map_compare/1, t_map_size/1, %% Specific Map BIFs @@ -51,6 +51,10 @@ t_erlang_hash/1, t_map_encode_decode/1, + %% non specific BIF related + t_bif_build_and_check/1, + t_bif_merge_and_check/1, + %% maps module not bifs t_maps_fold/1, t_maps_map/1, @@ -66,6 +70,13 @@ -include_lib("stdlib/include/ms_transform.hrl"). +-define(CHECK(Cond,Term), + case (catch (Cond)) of + true -> true; + _ -> io:format("###### CHECK FAILED ######~nINPUT: ~p~n", [Term]), + exit(Term) + end). + suite() -> []. all() -> [ @@ -75,7 +86,7 @@ all() -> [ t_update_assoc,t_update_exact, t_guard_bifs, t_guard_sequence, t_guard_update, t_guard_receive,t_guard_fun, t_list_comprehension, - t_map_equal, + t_map_equal, t_map_compare, t_map_sort_literals, %% Specific Map BIFs @@ -90,6 +101,10 @@ all() -> [ t_erlang_hash, t_map_encode_decode, t_map_size, + %% non specific BIF related + t_bif_build_and_check, + t_bif_merge_and_check, + %% maps module t_maps_fold, t_maps_map, t_maps_size, t_maps_without, @@ -146,17 +161,6 @@ t_build_and_match_literals(Config) when is_list(Config) -> ok. -%% Tests size(Map). -%% not implemented, perhaps it shouldn't be either - -%t_size(Config) when is_list(Config) -> -% 0 = size(#{}), -% 1 = size(#{a=>1}), -% 1 = size(#{a=>#{a=>1}}), -% 2 = size(#{a=>1, b=>2}), -% 3 = size(#{a=>1, b=>2, b=>"3"}), -% ok. - t_map_size(Config) when is_list(Config) -> 0 = map_size(id(#{})), 1 = map_size(id(#{a=>1})), @@ -172,12 +176,23 @@ t_map_size(Config) when is_list(Config) -> true = map_is_size(M#{ "a" => 2}, 2), false = map_is_size(M#{ "c" => 2}, 2), + Ks = [build_key(fun(K) -> <<1,K:32,1>> end,I)||I<-lists:seq(1,100)], + ok = build_and_check_size(Ks,0,#{}), + %% Error cases. {'EXIT',{badarg,_}} = (catch map_size([])), {'EXIT',{badarg,_}} = (catch map_size(<<1,2,3>>)), {'EXIT',{badarg,_}} = (catch map_size(1)), ok. +build_and_check_size([K|Ks],N,M0) -> + N = map_size(M0), + M1 = M0#{ K => K }, + build_and_check_size(Ks,N + 1,M1); +build_and_check_size([],N,M) -> + N = map_size(M), + ok. + map_is_size(M,N) when map_size(M) =:= N -> true; map_is_size(_,_) -> false. @@ -432,7 +447,7 @@ t_map_sort_literals(Config) when is_list(Config) -> true = #{ c => 1, b => 1, a => 1 } < id(#{ b => 1, c => 1, d => 1}), true = #{ "a" => 1 } < id(#{ <<"a">> => 1}), false = #{ <<"a">> => 1 } < id(#{ "a" => 1}), - false = #{ 1 => 1 } < id(#{ 1.0 => 1}), + true = #{ 1 => 1 } < id(#{ 1.0 => 1}), false = #{ 1.0 => 1 } < id(#{ 1 => 1}), %% value order @@ -440,16 +455,47 @@ t_map_sort_literals(Config) when is_list(Config) -> false = #{ a => 2 } < id(#{ a => 1}), false = #{ a => 2, b => 1 } < id(#{ a => 1, b => 3}), true = #{ a => 1, b => 1 } < id(#{ a => 1, b => 3}), + false = #{ a => 1 } < id(#{ a => 1.0}), + false = #{ a => 1.0 } < id(#{ a => 1}), true = #{ "a" => "hi", b => 134 } == id(#{ b => 134,"a" => "hi"}), + %% large maps + + M = maps:from_list([{I,I}||I <- lists:seq(1,500)]), + + %% size order + true = M#{ a => 1, b => 2} < id(M#{ a => 1, b => 1, c => 1}), + true = M#{ b => 1, a => 1} < id(M#{ c => 1, a => 1, b => 1}), + false = M#{ c => 1, b => 1, a => 1} < id(M#{ c => 1, a => 1}), + + %% key order + true = M#{ a => 1 } < id(M#{ b => 1}), + false = M#{ b => 1 } < id(M#{ a => 1}), + true = M#{ a => 1, b => 1, c => 1 } < id(M#{ b => 1, c => 1, d => 1}), + true = M#{ b => 1, c => 1, d => 1 } > id(M#{ a => 1, b => 1, c => 1}), + true = M#{ c => 1, b => 1, a => 1 } < id(M#{ b => 1, c => 1, d => 1}), + true = M#{ "a" => 1 } < id(M#{ <<"a">> => 1}), + false = M#{ <<"a">> => 1 } < id(#{ "a" => 1}), + true = M#{ 1 => 1 } < id(maps:remove(1,M#{ 1.0 => 1})), + false = M#{ 1.0 => 1 } < id(M#{ 1 => 1}), + + %% value order + true = M#{ a => 1 } < id(M#{ a => 2}), + false = M#{ a => 2 } < id(M#{ a => 1}), + false = M#{ a => 2, b => 1 } < id(M#{ a => 1, b => 3}), + true = M#{ a => 1, b => 1 } < id(M#{ a => 1, b => 3}), + false = M#{ a => 1 } < id(M#{ a => 1.0}), + false = M#{ a => 1.0 } < id(M#{ a => 1}), + + true = M#{ "a" => "hi", b => 134 } == id(M#{ b => 134,"a" => "hi"}), + %% lists:sort SortVs = [#{"a"=>1},#{a=>2},#{1=>3},#{<<"a">>=>4}], [#{1:=ok},#{a:=ok},#{"a":=ok},#{<<"a">>:=ok}] = lists:sort([#{"a"=>ok},#{a=>ok},#{1=>ok},#{<<"a">>=>ok}]), [#{1:=3},#{a:=2},#{"a":=1},#{<<"a">>:=4}] = lists:sort(SortVs), [#{1:=3},#{a:=2},#{"a":=1},#{<<"a">>:=4}] = lists:sort(lists:reverse(SortVs)), - ok. t_map_equal(Config) when is_list(Config) -> @@ -469,27 +515,285 @@ t_map_equal(Config) when is_list(Config) -> true = id(#{ a => 1, b => 3, c => <<"wat">> }) =:= id(#{ a => 1, b => 3, c=><<"wat">>}), ok. + +t_map_compare(Config) when is_list(Config) -> + Seed = erlang:now(), + io:format("seed = ~p\n", [Seed]), + random:seed(Seed), + repeat(100, fun(_) -> float_int_compare() end, []), + repeat(100, fun(_) -> recursive_compare() end, []), + ok. + +float_int_compare() -> + Terms = numeric_keys(3), + %%io:format("Keys to use: ~p\n", [Terms]), + Pairs = lists:map(fun(K) -> list_to_tuple([{K,V} || V <- Terms]) end, Terms), + lists:foreach(fun(Size) -> + MapGen = fun() -> map_gen(list_to_tuple(Pairs), Size) end, + repeat(100, fun do_compare/1, [MapGen, MapGen]) + end, + lists:seq(1,length(Terms))), + ok. + +numeric_keys(N) -> + lists:foldl(fun(_,Acc) -> + Int = random:uniform(N*4) - N*2, + Float = float(Int), + [Int, Float, Float * 0.99, Float * 1.01 | Acc] + end, + [], + lists:seq(1,N)). + + +repeat(0, _, _) -> + ok; +repeat(N, Fun, Arg) -> + Fun(Arg), + repeat(N-1, Fun, Arg). + +copy_term(T) -> + Papa = self(), + P = spawn_link(fun() -> receive Msg -> Papa ! Msg end end), + P ! T, + receive R -> R end. + +do_compare([Gen1, Gen2]) -> + M1 = Gen1(), + M2 = Gen2(), + %%io:format("Maps to compare: ~p AND ~p\n", [M1, M2]), + C = (M1 < M2), + Erlang = maps_lessthan(M1, M2), + C = Erlang, + ?CHECK(M1==M1, M1), + + %% Change one key from int to float (or vice versa) and check compare + ML1 = maps:to_list(M1), + {K1,V1} = lists:nth(random:uniform(length(ML1)), ML1), + case K1 of + I when is_integer(I) -> + case maps:find(float(I),M1) of + error -> + M1f = maps:remove(I, maps:put(float(I), V1, M1)), + ?CHECK(M1f > M1, [M1f, M1]); + _ -> ok + end; + + F when is_float(F), round(F) == F -> + case maps:find(round(F),M1) of + error -> + M1i = maps:remove(F, maps:put(round(F), V1, M1)), + ?CHECK(M1i < M1, [M1i, M1]); + _ -> ok + end; + + _ -> ok % skip floats with decimals + end, + + ?CHECK(M2 == M2, [M2]). + + +maps_lessthan(M1, M2) -> + case {maps:size(M1),maps:size(M2)} of + {_S,_S} -> + {K1,V1} = lists:unzip(term_sort(maps:to_list(M1))), + {K2,V2} = lists:unzip(term_sort(maps:to_list(M2))), + + case erts_internal:cmp_term(K1,K2) of + -1 -> true; + 0 -> (V1 < V2); + 1 -> false + end; + + {S1, S2} -> + S1 < S2 + end. + +term_sort(L) -> + lists:sort(fun(A,B) -> erts_internal:cmp_term(A,B) =< 0 end, + L). + + +cmp(T1, T2, Exact) when is_tuple(T1) and is_tuple(T2) -> + case {size(T1),size(T2)} of + {_S,_S} -> cmp(tuple_to_list(T1), tuple_to_list(T2), Exact); + {S1,S2} when S1 < S2 -> -1; + {S1,S2} when S1 > S2 -> 1 + end; + +cmp([H1|T1], [H2|T2], Exact) -> + case cmp(H1,H2, Exact) of + 0 -> cmp(T1,T2, Exact); + C -> C + end; + +cmp(M1, M2, Exact) when is_map(M1) andalso is_map(M2) -> + cmp_maps(M1,M2,Exact); +cmp(M1, M2, Exact) -> + cmp_others(M1, M2, Exact). + +cmp_maps(M1, M2, Exact) -> + case {maps:size(M1),maps:size(M2)} of + {_S,_S} -> + {K1,V1} = lists:unzip(term_sort(maps:to_list(M1))), + {K2,V2} = lists:unzip(term_sort(maps:to_list(M2))), + + case cmp(K1, K2, true) of + 0 -> cmp(V1, V2, Exact); + C -> C + end; + + {S1,S2} when S1 < S2 -> -1; + {S1,S2} when S1 > S2 -> 1 + end. + +cmp_others(I, F, true) when is_integer(I), is_float(F) -> + -1; +cmp_others(F, I, true) when is_float(F), is_integer(I) -> + 1; +cmp_others(T1, T2, _) -> + case {T1<T2, T1==T2} of + {true,false} -> -1; + {false,true} -> 0; + {false,false} -> 1 + end. + +map_gen(Pairs, Size) -> + {_,L} = lists:foldl(fun(_, {Keys, Acc}) -> + KI = random:uniform(size(Keys)), + K = element(KI,Keys), + KV = element(random:uniform(size(K)), K), + {erlang:delete_element(KI,Keys), [KV | Acc]} + end, + {Pairs, []}, + lists:seq(1,Size)), + + maps:from_list(L). + + +recursive_compare() -> + Leafs = {atom, 17, 16.9, 17.1, [], self(), spawn(fun() -> ok end), make_ref(), make_ref()}, + {A, B} = term_gen_recursive(Leafs, 0, 0), + %%io:format("Recursive term A = ~p\n", [A]), + %%io:format("Recursive term B = ~p\n", [B]), + + ?CHECK({true,false} =:= case do_cmp(A, B, false) of + -1 -> {A<B, A>=B}; + 0 -> {A==B, A/=B}; + 1 -> {A>B, A=<B} + end, + {A,B}), + A2 = copy_term(A), + ?CHECK(A == A2, {A,A2}), + ?CHECK(0 =:= cmp(A, A2, false), {A,A2}), + + B2 = copy_term(B), + ?CHECK(B == B2, {B,B2}), + ?CHECK(0 =:= cmp(B, B2, false), {B,B2}), + ok. + +do_cmp(A, B, Exact) -> + C = cmp(A, B, Exact), + C. + +%% Generate two terms {A,B} that may only differ +%% at float vs integer types. +term_gen_recursive(Leafs, Flags, Depth) -> + MaxDepth = 10, + Rnd = case {Flags, Depth} of + {_, MaxDepth} -> % Only leafs + random:uniform(size(Leafs)) + 3; + {0, 0} -> % Only containers + random:uniform(3); + {0,_} -> % Anything + random:uniform(size(Leafs)+3) + end, + case Rnd of + 1 -> % Make map + Size = random:uniform(size(Leafs)), + lists:foldl(fun(_, {Acc1,Acc2}) -> + {K1,K2} = term_gen_recursive(Leafs, Flags, + Depth+1), + {V1,V2} = term_gen_recursive(Leafs, Flags, Depth+1), + %%ok = check_keys(K1,K2, 0), + {maps:put(K1,V1, Acc1), maps:put(K2,V2, Acc2)} + end, + {maps:new(), maps:new()}, + lists:seq(1,Size)); + 2 -> % Make cons + {Car1,Car2} = term_gen_recursive(Leafs, Flags, Depth+1), + {Cdr1,Cdr2} = term_gen_recursive(Leafs, Flags, Depth+1), + {[Car1 | Cdr1], [Car2 | Cdr2]}; + 3 -> % Make tuple + Size = random:uniform(size(Leafs)), + L = lists:map(fun(_) -> term_gen_recursive(Leafs, Flags, Depth+1) end, + lists:seq(1,Size)), + {L1, L2} = lists:unzip(L), + {list_to_tuple(L1), list_to_tuple(L2)}; + + N -> % Make leaf + case element(N-3, Leafs) of + I when is_integer(I) -> + case random:uniform(4) of + 1 -> {I, float(I)}; + 2 -> {float(I), I}; + _ -> {I,I} + end; + T -> {T,T} + end + end. + +check_keys(K1, K2, _) when K1 =:= K2 -> + case erlang:phash3(K1) =:= erlang:phash3(K2) of + true -> ok; + false -> + io:format("Same keys with different hash values !!!\nK1 = ~p\nK2 = ~p\n", [K1,K2]), + error + end; +check_keys(K1, K2, 0) -> + case {erlang:phash3(K1), erlang:phash3(K2)} of + {H,H} -> check_keys(K1, K2, 1); + {_,_} -> ok + end; +check_keys(K1, K2, L) when L < 10 -> + case {erlang:phash3([L|K1]), erlang:phash3([L|K2])} of + {H,H} -> check_keys(K1, K2, L+1); + {_,_} -> ok + end; +check_keys(K1, K2, L) -> + io:format("Same hash value at level ~p !!!\nK1 = ~p\nK2 = ~p\n", [L,K1,K2]), + error. + %% BIFs t_bif_map_get(Config) when is_list(Config) -> - + %% small map 1 = maps:get(a, #{ a=> 1}), 2 = maps:get(b, #{ a=> 1, b => 2}), "hi" = maps:get("hello", #{ a=>1, "hello" => "hi"}), "tuple hi" = maps:get({1,1.0}, #{ a=>a, {1,1.0} => "tuple hi"}), - M = id(#{ k1=>"v1", <<"k2">> => <<"v3">> }), - "v4" = maps:get(<<"k2">>, M#{ <<"k2">> => "v4" }), + M0 = id(#{ k1=>"v1", <<"k2">> => <<"v3">> }), + "v4" = maps:get(<<"k2">>, M0#{<<"k2">> => "v4"}), + + %% large map + M1 = maps:from_list([{I,I}||I<-lists:seq(1,100)] ++ + [{a,1},{b,2},{"hello","hi"},{{1,1.0},"tuple hi"}, + {k1,"v1"},{<<"k2">>,"v3"}]), + 1 = maps:get(a, M1), + 2 = maps:get(b, M1), + "hi" = maps:get("hello", M1), + "tuple hi" = maps:get({1,1.0}, M1), + "v3" = maps:get(<<"k2">>, M1), %% error case {'EXIT',{badarg, [{maps,get,_,_}|_]}} = (catch maps:get(a,[])), {'EXIT',{badarg, [{maps,get,_,_}|_]}} = (catch maps:get(a,<<>>)), {'EXIT',{bad_key,[{maps,get,_,_}|_]}} = (catch maps:get({1,1}, #{{1,1.0} => "tuple"})), {'EXIT',{bad_key,[{maps,get,_,_}|_]}} = (catch maps:get(a,#{})), - {'EXIT',{bad_key,[{maps,get,_,_}|_]}} = (catch maps:get(a,#{ b=>1, c=>2})), + {'EXIT',{bad_key,[{maps,get,_,_}|_]}} = (catch maps:get(a,#{b=>1, c=>2})), ok. t_bif_map_find(Config) when is_list(Config) -> - + %% small map {ok, 1} = maps:find(a, #{ a=> 1}), {ok, 2} = maps:find(b, #{ a=> 1, b => 2}), {ok, "int"} = maps:find(1, #{ 1 => "int"}), @@ -498,8 +802,18 @@ t_bif_map_find(Config) when is_list(Config) -> {ok, "hi"} = maps:find("hello", #{ a=>1, "hello" => "hi"}), {ok, "tuple hi"} = maps:find({1,1.0}, #{ a=>a, {1,1.0} => "tuple hi"}), - M = id(#{ k1=>"v1", <<"k2">> => <<"v3">> }), - {ok, "v4"} = maps:find(<<"k2">>, M#{ <<"k2">> => "v4" }), + M0 = id(#{ k1=>"v1", <<"k2">> => <<"v3">> }), + {ok, "v4"} = maps:find(<<"k2">>, M0#{ <<"k2">> => "v4" }), + + %% large map + M1 = maps:from_list([{I,I}||I<-lists:seq(1,100)] ++ + [{a,1},{b,2},{"hello","hi"},{{1,1.0},"tuple hi"}, + {k1,"v1"},{<<"k2">>,"v3"}]), + {ok, 1} = maps:find(a, M1), + {ok, 2} = maps:find(b, M1), + {ok, "hi"} = maps:find("hello", M1), + {ok, "tuple hi"} = maps:find({1,1.0}, M1), + {ok, "v3"} = maps:find(<<"k2">>, M1), %% error case error = maps:find(a,#{}), @@ -508,7 +822,6 @@ t_bif_map_find(Config) when is_list(Config) -> error = maps:find(1, #{ 1.0 => "float"}), error = maps:find({1.0,1}, #{ a=>a, {1,1.0} => "tuple hi"}), % reverse types in tuple key - {'EXIT',{badarg,[{maps,find,_,_}|_]}} = (catch maps:find(a,id([]))), {'EXIT',{badarg,[{maps,find,_,_}|_]}} = (catch maps:find(a,id(<<>>))), ok. @@ -542,12 +855,12 @@ t_bif_map_is_key(Config) when is_list(Config) -> t_bif_map_keys(Config) when is_list(Config) -> [] = maps:keys(#{}), - [1,2,3,4,5] = maps:keys(#{ 1 => a, 2 => b, 3 => c, 4 => d, 5 => e}), - [1,2,3,4,5] = maps:keys(#{ 4 => d, 5 => e, 1 => a, 2 => b, 3 => c}), + [1,2,3,4,5] = lists:sort(maps:keys(#{ 1 => a, 2 => b, 3 => c, 4 => d, 5 => e})), + [1,2,3,4,5] = lists:sort(maps:keys(#{ 4 => d, 5 => e, 1 => a, 2 => b, 3 => c})), % values in key order: [4,int,"hi",<<"key">>] M1 = #{ "hi" => "hello", int => 3, <<"key">> => <<"value">>, 4 => number}, - [4,int,"hi",<<"key">>] = maps:keys(M1), + [4,int,"hi",<<"key">>] = lists:sort(maps:keys(M1)), %% error case {'EXIT',{badarg,[{maps,keys,_,_}|_]}} = (catch maps:keys(1 bsl 65 + 3)), @@ -596,33 +909,33 @@ t_bif_map_put(Config) when is_list(Config) -> M1 = #{ "hi" := "hello"} = maps:put("hi", "hello", #{}), - ["hi"] = maps:keys(M1), - ["hello"] = maps:values(M1), + true = is_members(["hi"],maps:keys(M1)), + true = is_members(["hello"],maps:values(M1)), M2 = #{ int := 3 } = maps:put(int, 3, M1), - [int,"hi"] = maps:keys(M2), - [3,"hello"] = maps:values(M2), + true = is_members([int,"hi"],maps:keys(M2)), + true = is_members([3,"hello"],maps:values(M2)), M3 = #{ <<"key">> := <<"value">> } = maps:put(<<"key">>, <<"value">>, M2), - [int,"hi",<<"key">>] = maps:keys(M3), - [3,"hello",<<"value">>] = maps:values(M3), + true = is_members([int,"hi",<<"key">>],maps:keys(M3)), + true = is_members([3,"hello",<<"value">>],maps:values(M3)), M4 = #{ 18446744073709551629 := wat } = maps:put(18446744073709551629, wat, M3), - [18446744073709551629,int,"hi",<<"key">>] = maps:keys(M4), - [wat,3,"hello",<<"value">>] = maps:values(M4), + true = is_members([18446744073709551629,int,"hi",<<"key">>],maps:keys(M4)), + true = is_members([wat,3,"hello",<<"value">>],maps:values(M4)), M0 = #{ 4 := number } = M5 = maps:put(4, number, M4), - [4,18446744073709551629,int,"hi",<<"key">>] = maps:keys(M5), - [number,wat,3,"hello",<<"value">>] = maps:values(M5), + true = is_members([4,18446744073709551629,int,"hi",<<"key">>],maps:keys(M5)), + true = is_members([number,wat,3,"hello",<<"value">>],maps:values(M5)), M6 = #{ <<"key">> := <<"other value">> } = maps:put(<<"key">>, <<"other value">>, M5), - [4,18446744073709551629,int,"hi",<<"key">>] = maps:keys(M6), - [number,wat,3,"hello",<<"other value">>] = maps:values(M6), + true = is_members([4,18446744073709551629,int,"hi",<<"key">>],maps:keys(M6)), + true = is_members([number,wat,3,"hello",<<"other value">>],maps:values(M6)), %% error case {'EXIT',{badarg,[{maps,put,_,_}|_]}} = (catch maps:put(1,a,1 bsl 65 + 3)), @@ -630,7 +943,15 @@ t_bif_map_put(Config) when is_list(Config) -> {'EXIT',{badarg,[{maps,put,_,_}|_]}} = (catch maps:put(1,a,atom)), {'EXIT',{badarg,[{maps,put,_,_}|_]}} = (catch maps:put(1,a,[])), {'EXIT',{badarg,[{maps,put,_,_}|_]}} = (catch maps:put(1,a,<<>>)), - ok. + ok. + +is_members(Ks,Ls) when length(Ks) =/= length(Ls) -> false; +is_members(Ks,Ls) -> is_members_do(Ks,Ls). + +is_members_do([],[]) -> true; +is_members_do([],_) -> false; +is_members_do([K|Ks],Ls) -> + is_members_do(Ks, lists:delete(K,Ls)). t_bif_map_remove(Config) when is_list(Config) -> 0 = erlang:map_size(maps:remove(some_key, #{})), @@ -639,20 +960,20 @@ t_bif_map_remove(Config) when is_list(Config) -> 4 => number, 18446744073709551629 => wat}, M1 = maps:remove("hi", M0), - [4,18446744073709551629,int,<<"key">>] = maps:keys(M1), - [number,wat,3,<<"value">>] = maps:values(M1), + true = is_members([4,18446744073709551629,int,<<"key">>],maps:keys(M1)), + true = is_members([number,wat,3,<<"value">>],maps:values(M1)), M2 = maps:remove(int, M1), - [4,18446744073709551629,<<"key">>] = maps:keys(M2), - [number,wat,<<"value">>] = maps:values(M2), + true = is_members([4,18446744073709551629,<<"key">>],maps:keys(M2)), + true = is_members([number,wat,<<"value">>],maps:values(M2)), M3 = maps:remove(<<"key">>, M2), - [4,18446744073709551629] = maps:keys(M3), - [number,wat] = maps:values(M3), + true = is_members([4,18446744073709551629],maps:keys(M3)), + true = is_members([number,wat],maps:values(M3)), M4 = maps:remove(18446744073709551629, M3), - [4] = maps:keys(M4), - [number] = maps:values(M4), + true = is_members([4],maps:keys(M4)), + true = is_members([number],maps:values(M4)), M5 = maps:remove(4, M4), [] = maps:keys(M5), @@ -702,15 +1023,15 @@ t_bif_map_update(Config) when is_list(Config) -> t_bif_map_values(Config) when is_list(Config) -> [] = maps:values(#{}), + [1] = maps:values(#{a=>1}), - [a,b,c,d,e] = maps:values(#{ 1 => a, 2 => b, 3 => c, 4 => d, 5 => e}), - [a,b,c,d,e] = maps:values(#{ 4 => d, 5 => e, 1 => a, 2 => b, 3 => c}), + true = is_members([a,b,c,d,e],maps:values(#{ 1 => a, 2 => b, 3 => c, 4 => d, 5 => e})), + true = is_members([a,b,c,d,e],maps:values(#{ 4 => d, 5 => e, 1 => a, 2 => b, 3 => c})), - % values in key order: [4,int,"hi",<<"key">>] M1 = #{ "hi" => "hello", int => 3, <<"key">> => <<"value">>, 4 => number}, M2 = M1#{ "hi" => "hello2", <<"key">> => <<"value2">> }, - [number,3,"hello2",<<"value2">>] = maps:values(M2), - [number,3,"hello",<<"value">>] = maps:values(M1), + true = is_members([number,3,"hello2",<<"value2">>],maps:values(M2)), + true = is_members([number,3,"hello",<<"value">>],maps:values(M1)), %% error case {'EXIT',{badarg,[{maps,values,_,_}|_]}} = (catch maps:values(1 bsl 65 + 3)), @@ -730,61 +1051,61 @@ t_erlang_hash(Config) when is_list(Config) -> t_bif_erlang_phash2() -> 39679005 = erlang:phash2(#{}), - 78942764 = erlang:phash2(#{ a => 1, "a" => 2, <<"a">> => 3, {a,b} => 4 }), - 37338230 = erlang:phash2(#{ 1 => a, 2 => "a", 3 => <<"a">>, 4 => {a,b} }), - 14363616 = erlang:phash2(#{ 1 => a }), - 51612236 = erlang:phash2(#{ a => 1 }), + 33667975 = erlang:phash2(#{ a => 1, "a" => 2, <<"a">> => 3, {a,b} => 4 }), % 78942764 + 95332690 = erlang:phash2(#{ 1 => a, 2 => "a", 3 => <<"a">>, 4 => {a,b} }), % 37338230 + 108954384 = erlang:phash2(#{ 1 => a }), % 14363616 + 59617982 = erlang:phash2(#{ a => 1 }), % 51612236 - 37468437 = erlang:phash2(#{{} => <<>>}), - 44049159 = erlang:phash2(#{<<>> => {}}), + 42770201 = erlang:phash2(#{{} => <<>>}), % 37468437 + 71687700 = erlang:phash2(#{<<>> => {}}), % 44049159 M0 = #{ a => 1, "key" => <<"value">> }, M1 = maps:remove("key",M0), M2 = M1#{ "key" => <<"value">> }, - 118679416 = erlang:phash2(M0), - 51612236 = erlang:phash2(M1), - 118679416 = erlang:phash2(M2), + 70249457 = erlang:phash2(M0), % 118679416 + 59617982 = erlang:phash2(M1), % 51612236 + 70249457 = erlang:phash2(M2), % 118679416 ok. t_bif_erlang_phash() -> Sz = 1 bsl 32, - 268440612 = erlang:phash(#{},Sz), - 1196461908 = erlang:phash(#{ a => 1, "a" => 2, <<"a">> => 3, {a,b} => 4 },Sz), - 3944426064 = erlang:phash(#{ 1 => a, 2 => "a", 3 => <<"a">>, 4 => {a,b} },Sz), - 1394238263 = erlang:phash(#{ 1 => a },Sz), - 4066388227 = erlang:phash(#{ a => 1 },Sz), + 1113425985 = erlang:phash(#{},Sz), % 268440612 + 1510068139 = erlang:phash(#{ a => 1, "a" => 2, <<"a">> => 3, {a,b} => 4 },Sz), % 1196461908 + 3182345590 = erlang:phash(#{ 1 => a, 2 => "a", 3 => <<"a">>, 4 => {a,b} },Sz), % 3944426064 + 2927531828 = erlang:phash(#{ 1 => a },Sz), % 1394238263 + 1670235874 = erlang:phash(#{ a => 1 },Sz), % 4066388227 - 1578050717 = erlang:phash(#{{} => <<>>},Sz), - 1578050717 = erlang:phash(#{<<>> => {}},Sz), % yep, broken + 3935089469 = erlang:phash(#{{} => <<>>},Sz), % 1578050717 + 71692856 = erlang:phash(#{<<>> => {}},Sz), % 1578050717 M0 = #{ a => 1, "key" => <<"value">> }, M1 = maps:remove("key",M0), M2 = M1#{ "key" => <<"value">> }, - 3590546636 = erlang:phash(M0,Sz), - 4066388227 = erlang:phash(M1,Sz), - 3590546636 = erlang:phash(M2,Sz), + 2620391445 = erlang:phash(M0,Sz), % 3590546636 + 1670235874 = erlang:phash(M1,Sz), % 4066388227 + 2620391445 = erlang:phash(M2,Sz), % 3590546636 ok. t_bif_erlang_hash() -> Sz = 1 bsl 27 - 1, - 5158 = erlang:hash(#{},Sz), - 71555838 = erlang:hash(#{ a => 1, "a" => 2, <<"a">> => 3, {a,b} => 4 },Sz), - 5497225 = erlang:hash(#{ 1 => a, 2 => "a", 3 => <<"a">>, 4 => {a,b} },Sz), - 126071654 = erlang:hash(#{ 1 => a },Sz), - 126426236 = erlang:hash(#{ a => 1 },Sz), + 39684169 = erlang:hash(#{},Sz), % 5158 + 33673142 = erlang:hash(#{ a => 1, "a" => 2, <<"a">> => 3, {a,b} => 4 },Sz), % 71555838 + 95337869 = erlang:hash(#{ 1 => a, 2 => "a", 3 => <<"a">>, 4 => {a,b} },Sz), % 5497225 + 108959561 = erlang:hash(#{ 1 => a },Sz), % 126071654 + 59623150 = erlang:hash(#{ a => 1 },Sz), % 126426236 - 101655720 = erlang:hash(#{{} => <<>>},Sz), - 101655720 = erlang:hash(#{<<>> => {}},Sz), % yep, broken + 42775386 = erlang:hash(#{{} => <<>>},Sz), % 101655720 + 71692856 = erlang:hash(#{<<>> => {}},Sz), % 101655720 M0 = #{ a => 1, "key" => <<"value">> }, M1 = maps:remove("key",M0), M2 = M1#{ "key" => <<"value">> }, - 38260486 = erlang:hash(M0,Sz), - 126426236 = erlang:hash(M1,Sz), - 38260486 = erlang:hash(M2,Sz), + 70254632 = erlang:hash(M0,Sz), % 38260486 + 59623150 = erlang:hash(M1,Sz), % 126426236 + 70254632 = erlang:hash(M2,Sz), % 38260486 ok. @@ -818,12 +1139,12 @@ t_map_encode_decode(Config) when is_list(Config) -> %% literally #{ "hi" => "value", a=>33, b=>55 } in the internal order #{ a:=33, b:=55, "hi" := "value"} = erlang:binary_to_term(<<131,116,0,0,0,3, - 107,0,2,104,105, % "hi" :: list() + 107,0,2,104,105, % "hi" :: list() 107,0,5,118,97,108,117,101, % "value" :: list() - 100,0,1,97, % a :: atom() - 97,33, % 33 :: integer() - 100,0,1,98, % b :: atom() - 97,55 % 55 :: integer() + 100,0,1,97, % a :: atom() + 97,33, % 33 :: integer() + 100,0,1,98, % b :: atom() + 97,55 % 55 :: integer() >>), @@ -856,39 +1177,42 @@ t_map_encode_decode(Config) when is_list(Config) -> map_encode_decode_and_match([{K,V}|Pairs], EncodedPairs, M0) -> M1 = maps:put(K,V,M0), B0 = erlang:term_to_binary(M1), - Ls = lists:sort(fun(A,B) -> erts_internal:cmp_term(A,B) < 0 end, [{K, erlang:term_to_binary(K), erlang:term_to_binary(V)}|EncodedPairs]), - %% sort Ks and Vs according to term spec, then match it - KVbins = lists:foldr(fun({_,Kbin,Vbin}, Acc) -> [Kbin,Vbin | Acc] end, [], Ls), - ok = match_encoded_map(B0, length(Ls), KVbins), + Ls = [{erlang:term_to_binary(K), erlang:term_to_binary(V)}|EncodedPairs], + ok = match_encoded_map(B0, length(Ls), Ls), %% decode and match it M1 = erlang:binary_to_term(B0), map_encode_decode_and_match(Pairs,Ls,M1); map_encode_decode_and_match([],_,_) -> ok. match_encoded_map(<<131,116,Size:32,Encoded/binary>>,Size,Items) -> - match_encoded_map(Encoded,Items); + match_encoded_map_stripped_size(Encoded,Items,Items); match_encoded_map(_,_,_) -> no_match_size. -match_encoded_map(<<>>,[]) -> ok; -match_encoded_map(Bin,[<<131,Item/binary>>|Items]) -> - Size = erlang:byte_size(Item), - <<EncodedTerm:Size/binary, Bin1/binary>> = Bin, - EncodedTerm = Item, %% Asssert - match_encoded_map(Bin1,Items). +match_encoded_map_stripped_size(<<>>,_,_) -> ok; +match_encoded_map_stripped_size(B0,[{<<131,K/binary>>,<<131,V/binary>>}|Items],Ls) -> + Ksz = byte_size(K), + Vsz = byte_size(V), + case B0 of + <<K:Ksz/binary,V:Vsz/binary,B1/binary>> -> + match_encoded_map_stripped_size(B1,Ls,Ls); + _ -> + match_encoded_map_stripped_size(B0,Items,Ls) + end; +match_encoded_map_stripped_size(_,[],_) -> fail. t_bif_map_to_list(Config) when is_list(Config) -> [] = maps:to_list(#{}), - [{a,1},{b,2}] = maps:to_list(#{a=>1,b=>2}), - [{a,1},{b,2},{c,3}] = maps:to_list(#{c=>3,a=>1,b=>2}), - [{a,1},{b,2},{g,3}] = maps:to_list(#{g=>3,a=>1,b=>2}), - [{a,1},{b,2},{g,3},{"c",4}] = maps:to_list(#{g=>3,a=>1,b=>2,"c"=>4}), - [{3,v2},{hi,v4},{{hi,3},v5},{"hi",v3},{<<"hi">>,v1}] = maps:to_list(#{ - <<"hi">>=>v1,3=>v2,"hi"=>v3,hi=>v4,{hi,3}=>v5}), + [{a,1},{b,2}] = lists:sort(maps:to_list(#{a=>1,b=>2})), + [{a,1},{b,2},{c,3}] = lists:sort(maps:to_list(#{c=>3,a=>1,b=>2})), + [{a,1},{b,2},{g,3}] = lists:sort(maps:to_list(#{g=>3,a=>1,b=>2})), + [{a,1},{b,2},{g,3},{"c",4}] = lists:sort(maps:to_list(#{g=>3,a=>1,b=>2,"c"=>4})), + [{3,v2},{hi,v4},{{hi,3},v5},{"hi",v3},{<<"hi">>,v1}] = + lists:sort(maps:to_list(#{<<"hi">>=>v1,3=>v2,"hi"=>v3,hi=>v4,{hi,3}=>v5})), - [{3,v7},{hi,v9},{{hi,3},v10},{"hi",v8},{<<"hi">>,v6}] = maps:to_list(#{ - <<"hi">>=>v1,3=>v2,"hi"=>v3,hi=>v4,{hi,3}=>v5, - <<"hi">>=>v6,3=>v7,"hi"=>v8,hi=>v9,{hi,3}=>v10}), + [{3,v7},{hi,v9},{{hi,3},v10},{"hi",v8},{<<"hi">>,v6}] = + lists:sort(maps:to_list(#{<<"hi">>=>v1,3=>v2,"hi"=>v3,hi=>v4,{hi,3}=>v5, + <<"hi">>=>v6,3=>v7,"hi"=>v8,hi=>v9,{hi,3}=>v10})), %% error cases {'EXIT', {badarg,_}} = (catch maps:to_list(id(a))), @@ -901,7 +1225,7 @@ t_bif_map_from_list(Config) when is_list(Config) -> A = maps:from_list([]), 0 = erlang:map_size(A), - #{a:=1,b:=2} = maps:from_list([{a,1},{b,2}]), + #{a:=1,b:=2} = maps:from_list([{a,1},{b,2}]), #{c:=3,a:=1,b:=2} = maps:from_list([{a,1},{b,2},{c,3}]), #{g:=3,a:=1,b:=2} = maps:from_list([{a,1},{b,2},{g,3}]), @@ -923,6 +1247,136 @@ t_bif_map_from_list(Config) when is_list(Config) -> {'EXIT', {badarg,_}} = (catch maps:from_list(id(42))), ok. +t_bif_build_and_check(Config) when is_list(Config) -> + ok = check_build_and_remove(750,[ + fun(K) -> [K,K] end, + fun(K) -> [float(K),K] end, + fun(K) -> K end, + fun(K) -> {1,K} end, + fun(K) -> {K} end, + fun(K) -> [K|K] end, + fun(K) -> [K,1,2,3,4] end, + fun(K) -> {K,atom} end, + fun(K) -> float(K) end, + fun(K) -> integer_to_list(K) end, + fun(K) -> list_to_atom(integer_to_list(K)) end, + fun(K) -> [K,{K,[K,{K,[K]}]}] end, + fun(K) -> <<K:32>> end + ]), + + ok. + +check_build_and_remove(_,[]) -> ok; +check_build_and_remove(N,[F|Fs]) -> + {M,Ks} = build_and_check(N, maps:new(), F, []), + ok = remove_and_check(Ks,M), + check_build_and_remove(N,Fs). + +build_and_check(0, M0, _, Ks) -> {M0, Ks}; +build_and_check(N, M0, F, Ks) -> + K = build_key(F,N), + M1 = maps:put(K,K,M0), + ok = check_keys_exist([K|Ks], M1), + M2 = maps:update(K,v,M1), + v = maps:get(K,M2), + build_and_check(N-1,M1,F,[K|Ks]). + +remove_and_check([],_) -> ok; +remove_and_check([K|Ks], M0) -> + K = maps:get(K,M0), + true = maps:is_key(K,M0), + M1 = maps:remove(K,M0), + false = maps:is_key(K,M1), + true = maps:is_key(K,M0), + ok = check_keys_exist(Ks,M1), + error = maps:find(K,M1), + remove_and_check(Ks, M1). + +build_key(F,N) when N rem 3 =:= 0 -> F(N); +build_key(F,N) when N rem 3 =:= 1 -> K = F(N), {K,K}; +build_key(F,N) when N rem 3 =:= 2 -> K = F(N), [K,K]. + +check_keys_exist([], _) -> ok; +check_keys_exist([K|Ks],M) -> + true = maps:is_key(K,M), + check_keys_exist(Ks,M). + +t_bif_merge_and_check(Config) when is_list(Config) -> + %% simple disjunct ones + %% make sure all keys are unique + Kss = [[a,b,c,d], + [1,2,3,4], + [], + ["hi"], + [e], + [build_key(fun(K) -> {small,K} end, I) || I <- lists:seq(1,32)], + lists:seq(5, 28), + lists:seq(29, 59), + [build_key(fun(K) -> integer_to_list(K) end, I) || I <- lists:seq(2000,10000)], + [build_key(fun(K) -> <<K:32>> end, I) || I <- lists:seq(1,80)], + [build_key(fun(K) -> {<<K:32>>} end, I) || I <- lists:seq(100,1000)]], + + + KsMs = build_keys_map_pairs(Kss), + Cs = [{CKs1,CM1,CKs2,CM2} || {CKs1,CM1} <- KsMs, {CKs2,CM2} <- KsMs], + ok = merge_and_check_combo(Cs), + + %% overlapping ones + + KVs1 = [{a,1},{b,2},{c,3}], + KVs2 = [{b,3},{c,4},{d,5}], + KVs = [{I,I} || I <- lists:seq(1,32)], + KVs3 = KVs1 ++ KVs, + KVs4 = KVs2 ++ KVs, + + M1 = maps:from_list(KVs1), + M2 = maps:from_list(KVs2), + M3 = maps:from_list(KVs3), + M4 = maps:from_list(KVs4), + + M12 = maps:merge(M1,M2), + ok = check_key_values(KVs2 ++ [{a,1}], M12), + M21 = maps:merge(M2,M1), + ok = check_key_values(KVs1 ++ [{d,5}], M21), + + M34 = maps:merge(M3,M4), + ok = check_key_values(KVs4 ++ [{a,1}], M34), + M43 = maps:merge(M4,M3), + ok = check_key_values(KVs3 ++ [{d,5}], M43), + + M14 = maps:merge(M1,M4), + ok = check_key_values(KVs4 ++ [{a,1}], M14), + M41 = maps:merge(M4,M1), + ok = check_key_values(KVs1 ++ [{d,5}] ++ KVs, M41), + + ok. + +check_key_values([],_) -> ok; +check_key_values([{K,V}|KVs],M) -> + V = maps:get(K,M), + check_key_values(KVs,M). + +merge_and_check_combo([]) -> ok; +merge_and_check_combo([{Ks1,M1,Ks2,M2}|Cs]) -> + M12 = maps:merge(M1,M2), + ok = check_keys_exist(Ks1 ++ Ks2, M12), + M21 = maps:merge(M2,M1), + ok = check_keys_exist(Ks1 ++ Ks2, M21), + + true = M12 =:= M21, + M12 = M21, + + merge_and_check_combo(Cs). + +build_keys_map_pairs([]) -> []; +build_keys_map_pairs([Ks|Kss]) -> + M = maps:from_list(keys_to_pairs(Ks)), + ok = check_keys_exist(Ks, M), + [{Ks,M}|build_keys_map_pairs(Kss)]. + +keys_to_pairs(Ks) -> [{K,K} || K <- Ks]. + + %% Maps module, not BIFs t_maps_fold(_Config) -> Vs = lists:seq(1,100), diff --git a/erts/emulator/test/match_spec_SUITE.erl b/erts/emulator/test/match_spec_SUITE.erl index b231c2bbd9..d3c884689f 100644 --- a/erts/emulator/test/match_spec_SUITE.erl +++ b/erts/emulator/test/match_spec_SUITE.erl @@ -924,6 +924,7 @@ maps(Config) when is_list(Config) -> table), {ok,#{foo := 3},[],[]} = erlang:match_spec_test({}, [{{},[],[#{foo => {'+',1,2}}]}], table), + {ok,"camembert",[],[]} = erlang:match_spec_test(#{b => "camembert",c => "cabécou"}, [{#{b => '$1',c => "cabécou"},[],['$1']}], table), @@ -932,8 +933,42 @@ maps(Config) when is_list(Config) -> erlang:match_spec_test(#{<<"b">> =>"camembert","c"=>"cabécou", "wat"=>"hi", b=><<"other">>}, [{#{<<"b">> => '$1',"wat" => '$2'},[],[#{a=>'$1',b=>'$2'}]}], table), + %% large maps + + Ls0 = [{I,<<I:32>>}||I <- lists:seq(1,415)], + M0 = maps:from_list(Ls0), + M1 = #{a=>1,b=>2,c=>3,d=>4}, + + R1 = M0#{263 := #{ a=> 3 }}, + Ms1 = [{M1#{c:='$1'},[],[M0#{263 := #{a => '$1'}}]}], + + {ok,R1,[],[]} = erlang:match_spec_test(M1,Ms1,table), + + Ms2 = [{M0#{63:='$1', 19:='$2'},[],[M0#{19:='$1', 63:='$2'}]}], + R2 = M0#{63 := maps:get(19,M0), 19 := maps:get(63,M0) }, + {ok,R2,[],[]} = erlang:match_spec_test(M0,Ms2,table), + + ok = maps_check_loop(M1), + ok = maps_check_loop(M0), + M2 = maps:from_list([{integer_to_list(K),V} || {K,V} <- Ls0]), + ok = maps_check_loop(M2), ok. +maps_check_loop(M) -> + Ks = maps:keys(M), + maps_check_loop(M,M,M,M,Ks,lists:reverse(Ks),1). + +maps_check_loop(Orig,M0,MsM0,Rm0,[K|Ks],[Rk|Rks],Ix) -> + MsK = list_to_atom([$$]++integer_to_list(Ix)), + MsM1 = MsM0#{K := MsK}, + Rm1 = Rm0#{Rk := MsK}, + M1 = M0#{Rk := maps:get(K,MsM0)}, + Ms = [{MsM1,[],[Rm1]}], + {ok,M1,[],[]} = erlang:match_spec_test(Orig,Ms,table), + maps_check_loop(Orig,M1,MsM1,Rm1,Ks,Rks,Ix+1); +maps_check_loop(_,_,_,_,[],[],_) -> ok. + + empty_list(Config) when is_list(Config) -> Val=[{'$1',[], [{message,'$1'},{message,{caller}},{return_trace}]}], %% Did crash debug VM in faulty assert: diff --git a/erts/emulator/test/nif_SUITE.erl b/erts/emulator/test/nif_SUITE.erl index 4560077a51..b0624fb8c1 100644 --- a/erts/emulator/test/nif_SUITE.erl +++ b/erts/emulator/test/nif_SUITE.erl @@ -451,7 +451,7 @@ maps(Config) when is_list(Config) -> M = maps_from_list_nif(Pairs), R = {RIs,Is} = sorted_list_from_maps_nif(M), io:format("Pairs: ~p~nMap: ~p~nReturned: ~p~n", [lists:sort(Pairs),M,R]), - Is = lists:sort(Pairs), + true = (lists:sort(Is) =:= lists:sort(Pairs)), Is = lists:reverse(RIs), #{} = maps_from_list_nif([]), diff --git a/erts/emulator/test/nif_SUITE_data/nif_SUITE.c b/erts/emulator/test/nif_SUITE_data/nif_SUITE.c index 85544db2ab..7b49f23d0a 100644 --- a/erts/emulator/test/nif_SUITE_data/nif_SUITE.c +++ b/erts/emulator/test/nif_SUITE_data/nif_SUITE.c @@ -1744,6 +1744,7 @@ static ERL_NIF_TERM sorted_list_from_maps_nif(ErlNifEnv* env, int argc, const ER list_b = enif_make_list_cell(env, enif_make_tuple2(env, key, value), list_b); prev_ret = enif_map_iterator_prev(env,&iter_b); + cnt++; } if (cnt) { |