From e5899f39d043409e2a48f34c845ad28cad8a28e6 Mon Sep 17 00:00:00 2001 From: Sverker Eriksson Date: Mon, 18 May 2015 19:25:09 +0200 Subject: erts: Fix warning about const pointer to make_boxed and make_list --- erts/emulator/beam/erl_term.c | 4 ++-- erts/emulator/beam/erl_term.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/erl_term.c b/erts/emulator/beam/erl_term.c index bc04d7b78e..565528193e 100644 --- a/erts/emulator/beam/erl_term.c +++ b/erts/emulator/beam/erl_term.c @@ -128,10 +128,10 @@ FUNTY checked_##FUN(ARGTY x, const char *file, unsigned line) \ return _unchecked_##FUN(x); \ } -ET_DEFINE_CHECKED(Eterm,make_boxed,Eterm*,_is_taggable_pointer); +ET_DEFINE_CHECKED(Eterm,make_boxed,const Eterm*,_is_taggable_pointer); ET_DEFINE_CHECKED(int,is_boxed,Eterm,!is_header); ET_DEFINE_CHECKED(Eterm*,boxed_val,Wterm,_boxed_precond); -ET_DEFINE_CHECKED(Eterm,make_list,Eterm*,_is_taggable_pointer); +ET_DEFINE_CHECKED(Eterm,make_list,const Eterm*,_is_taggable_pointer); ET_DEFINE_CHECKED(int,is_not_list,Eterm,!is_header); ET_DEFINE_CHECKED(Eterm*,list_val,Wterm,_list_precond); ET_DEFINE_CHECKED(Uint,unsigned_val,Eterm,is_small); diff --git a/erts/emulator/beam/erl_term.h b/erts/emulator/beam/erl_term.h index 602aab46dc..7b15b34da1 100644 --- a/erts/emulator/beam/erl_term.h +++ b/erts/emulator/beam/erl_term.h @@ -198,7 +198,7 @@ struct erl_node_; /* Declared in erl_node_tables.h */ #endif #define _is_aligned(x) (((Uint)(x) & 0x3) == 0) #define _unchecked_make_boxed(x) ((Uint) COMPRESS_POINTER(x) + TAG_PRIMARY_BOXED) -_ET_DECLARE_CHECKED(Eterm,make_boxed,Eterm*) +_ET_DECLARE_CHECKED(Eterm,make_boxed,const Eterm*) #define make_boxed(x) _ET_APPLY(make_boxed,(x)) #if 1 #define _is_not_boxed(x) ((x) & (_TAG_PRIMARY_MASK-TAG_PRIMARY_BOXED)) @@ -214,7 +214,7 @@ _ET_DECLARE_CHECKED(Eterm*,boxed_val,Wterm) /* cons cell ("list") access methods */ #define _unchecked_make_list(x) ((Uint) COMPRESS_POINTER(x) + TAG_PRIMARY_LIST) -_ET_DECLARE_CHECKED(Eterm,make_list,Eterm*) +_ET_DECLARE_CHECKED(Eterm,make_list,const Eterm*) #define make_list(x) _ET_APPLY(make_list,(x)) #if 1 #define _unchecked_is_not_list(x) ((x) & (_TAG_PRIMARY_MASK-TAG_PRIMARY_LIST)) -- cgit v1.2.3 From 56c9154f2a0efb8a6d1347b19b797ac17b6608d4 Mon Sep 17 00:00:00 2001 From: Sverker Eriksson Date: Mon, 18 May 2015 19:34:45 +0200 Subject: erts: Fix calculation of reclaimed data during full gc The old code did not take take the old-heap into acount. --- erts/emulator/beam/erl_gc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/erl_gc.c b/erts/emulator/beam/erl_gc.c index 1785fc27be..14e7dde778 100644 --- a/erts/emulator/beam/erl_gc.c +++ b/erts/emulator/beam/erl_gc.c @@ -1223,7 +1223,8 @@ major_collection(Process* p, int need, Eterm* objv, int nobj, Uint *recl) Uint new_sz; Uint fragments = MBUF_SIZE(p) + combined_message_size(p); - size_before = fragments + (HEAP_TOP(p) - HEAP_START(p)); + size_before = fragments + (HEAP_TOP(p) - HEAP_START(p)) + + (OLD_HTOP(p) - OLD_HEAP(p)); /* * Do a fullsweep GC. First figure out the size of the heap -- cgit v1.2.3 From 957f619382923be72835500f56e75d8bbe553892 Mon Sep 17 00:00:00 2001 From: Sverker Eriksson Date: Tue, 26 May 2015 20:12:08 +0200 Subject: erts: Fix magic binary alignment on 32-bit Caused bus error on 32-bit sparc from unaligned 64-bit word in binary_to_term trap context. Also add _UNALIGNED_ magic macros to avoid double alignment padding in NIF resources. --- erts/emulator/beam/erl_binary.h | 18 ++++++++++++++--- erts/emulator/beam/erl_bits.c | 8 ++++++++ erts/emulator/beam/erl_nif.c | 26 +++++++++++++++--------- erts/emulator/beam/global.h | 45 +++++++++++++++++++++++++++++++++-------- 4 files changed, 77 insertions(+), 20 deletions(-) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/erl_binary.h b/erts/emulator/beam/erl_binary.h index 8d264d166e..6b96787d40 100644 --- a/erts/emulator/beam/erl_binary.h +++ b/erts/emulator/beam/erl_binary.h @@ -194,6 +194,9 @@ ERTS_GLB_INLINE Binary *erts_bin_nrml_alloc(Uint size); ERTS_GLB_INLINE Binary *erts_bin_realloc_fnf(Binary *bp, Uint size); ERTS_GLB_INLINE Binary *erts_bin_realloc(Binary *bp, Uint size); ERTS_GLB_INLINE void erts_bin_free(Binary *bp); +ERTS_GLB_INLINE Binary *erts_create_magic_binary_x(Uint size, + void (*destructor)(Binary *), + int unaligned); ERTS_GLB_INLINE Binary *erts_create_magic_binary(Uint size, void (*destructor)(Binary *)); @@ -332,21 +335,30 @@ erts_bin_free(Binary *bp) } ERTS_GLB_INLINE Binary * -erts_create_magic_binary(Uint size, void (*destructor)(Binary *)) +erts_create_magic_binary_x(Uint size, void (*destructor)(Binary *), + int unaligned) { - Uint bsize = ERTS_MAGIC_BIN_SIZE(size); + Uint bsize = unaligned ? ERTS_MAGIC_BIN_UNALIGNED_SIZE(size) + : ERTS_MAGIC_BIN_SIZE(size); Binary* bptr = erts_alloc_fnf(ERTS_ALC_T_BINARY, bsize); ASSERT(bsize > size); if (!bptr) erts_alloc_n_enomem(ERTS_ALC_T2N(ERTS_ALC_T_BINARY), bsize); ERTS_CHK_BIN_ALIGNMENT(bptr); bptr->flags = BIN_FLAG_MAGIC; - bptr->orig_size = ERTS_MAGIC_BIN_ORIG_SIZE(size); + bptr->orig_size = unaligned ? ERTS_MAGIC_BIN_UNALIGNED_ORIG_SIZE(size) + : ERTS_MAGIC_BIN_ORIG_SIZE(size); erts_refc_init(&bptr->refc, 0); ERTS_MAGIC_BIN_DESTRUCTOR(bptr) = destructor; return bptr; } +ERTS_GLB_INLINE Binary * +erts_create_magic_binary(Uint size, void (*destructor)(Binary *)) +{ + return erts_create_magic_binary_x(size, destructor, 0); +} + #endif /* #if ERTS_GLB_INLINE_INCL_FUNC_DEF */ #endif /* !__ERL_BINARY_H */ diff --git a/erts/emulator/beam/erl_bits.c b/erts/emulator/beam/erl_bits.c index b8ae93fa58..2e29bf8895 100644 --- a/erts/emulator/beam/erl_bits.c +++ b/erts/emulator/beam/erl_bits.c @@ -107,6 +107,14 @@ erts_bits_destroy_state(ERL_BITS_PROTO_0) void erts_init_bits(void) { + ERTS_CT_ASSERT(offsetof(Binary,orig_bytes) % 8 == 0); + ERTS_CT_ASSERT(offsetof(ErtsMagicBinary,u.aligned.data) % 8 == 0); + ERTS_CT_ASSERT(ERTS_MAGIC_BIN_BYTES_TO_ALIGN == + (offsetof(ErtsMagicBinary,u.aligned.data) + - offsetof(ErtsMagicBinary,u.unaligned.data))); + ERTS_CT_ASSERT(offsetof(ErtsBinary,driver.binary.orig_bytes) + == offsetof(Binary,orig_bytes)); + erts_smp_atomic_init_nob(&bits_bufs_size, 0); #if defined(ERTS_SMP) /* erl_process.c calls erts_bits_init_state() on all state instances */ diff --git a/erts/emulator/beam/erl_nif.c b/erts/emulator/beam/erl_nif.c index 426a00304e..f42ccf23c2 100644 --- a/erts/emulator/beam/erl_nif.c +++ b/erts/emulator/beam/erl_nif.c @@ -1199,7 +1199,11 @@ typedef struct enif_resource_t struct enif_resource_type_t* type; #ifdef DEBUG erts_refc_t nif_refc; +# ifdef ARCH_32 + byte align__[4]; +# endif #endif + char data[1]; }ErlNifResource; @@ -1375,7 +1379,7 @@ static void rollback_opened_resource_types(void) static void nif_resource_dtor(Binary* bin) { - ErlNifResource* resource = (ErlNifResource*) ERTS_MAGIC_BIN_DATA(bin); + ErlNifResource* resource = (ErlNifResource*) ERTS_MAGIC_BIN_UNALIGNED_DATA(bin); ErlNifResourceType* type = resource->type; ASSERT(ERTS_MAGIC_BIN_DESTRUCTOR(bin) == &nif_resource_dtor); @@ -1396,8 +1400,10 @@ static void nif_resource_dtor(Binary* bin) void* enif_alloc_resource(ErlNifResourceType* type, size_t size) { - Binary* bin = erts_create_magic_binary(SIZEOF_ErlNifResource(size), &nif_resource_dtor); - ErlNifResource* resource = ERTS_MAGIC_BIN_DATA(bin); + Binary* bin = erts_create_magic_binary_x(SIZEOF_ErlNifResource(size), + &nif_resource_dtor, + 1); /* unaligned */ + ErlNifResource* resource = ERTS_MAGIC_BIN_UNALIGNED_DATA(bin); ASSERT(type->owner && type->next && type->prev); /* not allowed in load/upgrade */ resource->type = type; @@ -1412,7 +1418,7 @@ void* enif_alloc_resource(ErlNifResourceType* type, size_t size) void enif_release_resource(void* obj) { ErlNifResource* resource = DATA_TO_RESOURCE(obj); - ErtsBinary* bin = ERTS_MAGIC_BIN_FROM_DATA(resource); + ErtsBinary* bin = ERTS_MAGIC_BIN_FROM_UNALIGNED_DATA(resource); ASSERT(ERTS_MAGIC_BIN_DESTRUCTOR(bin) == &nif_resource_dtor); #ifdef DEBUG @@ -1426,7 +1432,7 @@ void enif_release_resource(void* obj) void enif_keep_resource(void* obj) { ErlNifResource* resource = DATA_TO_RESOURCE(obj); - ErtsBinary* bin = ERTS_MAGIC_BIN_FROM_DATA(resource); + ErtsBinary* bin = ERTS_MAGIC_BIN_FROM_UNALIGNED_DATA(resource); ASSERT(ERTS_MAGIC_BIN_DESTRUCTOR(bin) == &nif_resource_dtor); #ifdef DEBUG @@ -1438,7 +1444,7 @@ void enif_keep_resource(void* obj) ERL_NIF_TERM enif_make_resource(ErlNifEnv* env, void* obj) { ErlNifResource* resource = DATA_TO_RESOURCE(obj); - ErtsBinary* bin = ERTS_MAGIC_BIN_FROM_DATA(resource); + ErtsBinary* bin = ERTS_MAGIC_BIN_FROM_UNALIGNED_DATA(resource); Eterm* hp = alloc_heap(env,PROC_BIN_SIZE); return erts_mk_magic_binary_term(&hp, &MSO(env->proc), &bin->binary); } @@ -1467,7 +1473,7 @@ int enif_get_resource(ErlNifEnv* env, ERL_NIF_TERM term, ErlNifResourceType* typ return 0; / * Or should we allow "resource binaries" as handles? * / }*/ mbin = pb->val; - resource = (ErlNifResource*) ERTS_MAGIC_BIN_DATA(mbin); + resource = (ErlNifResource*) ERTS_MAGIC_BIN_UNALIGNED_DATA(mbin); if (ERTS_MAGIC_BIN_DESTRUCTOR(mbin) != &nif_resource_dtor || resource->type != type) { return 0; @@ -1479,8 +1485,8 @@ int enif_get_resource(ErlNifEnv* env, ERL_NIF_TERM term, ErlNifResourceType* typ size_t enif_sizeof_resource(void* obj) { ErlNifResource* resource = DATA_TO_RESOURCE(obj); - Binary* bin = &ERTS_MAGIC_BIN_FROM_DATA(resource)->binary; - return ERTS_MAGIC_BIN_DATA_SIZE(bin) - offsetof(ErlNifResource,data); + Binary* bin = &ERTS_MAGIC_BIN_FROM_UNALIGNED_DATA(resource)->binary; + return ERTS_MAGIC_BIN_UNALIGNED_DATA_SIZE(bin) - offsetof(ErlNifResource,data); } @@ -2689,6 +2695,8 @@ erts_unload_nif(struct erl_module_nif* lib) void erl_nif_init() { + ERTS_CT_ASSERT((offsetof(ErlNifResource,data) % 8) == ERTS_MAGIC_BIN_BYTES_TO_ALIGN); + resource_type_list.next = &resource_type_list; resource_type_list.prev = &resource_type_list; resource_type_list.dtor = NULL; diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h index 340c7033ab..ee1f70b748 100644 --- a/erts/emulator/beam/global.h +++ b/erts/emulator/beam/global.h @@ -230,9 +230,23 @@ typedef struct { ERTS_INTERNAL_BINARY_FIELDS SWord orig_size; void (*destructor)(Binary *); - char magic_bin_data[1]; + union { + struct { + ERTS_BINARY_STRUCT_ALIGNMENT + char data[1]; + } aligned; + struct { + char data[1]; + } unaligned; + } u; } ErtsMagicBinary; +#ifdef ARCH_32 +#define ERTS_MAGIC_BIN_BYTES_TO_ALIGN 4 +#else +#define ERTS_MAGIC_BIN_BYTES_TO_ALIGN 0 +#endif + typedef union { Binary binary; ErtsMagicBinary magic_binary; @@ -252,15 +266,30 @@ typedef union { #define ERTS_MAGIC_BIN_DESTRUCTOR(BP) \ ((ErtsBinary *) (BP))->magic_binary.destructor #define ERTS_MAGIC_BIN_DATA(BP) \ - ((void *) ((ErtsBinary *) (BP))->magic_binary.magic_bin_data) -#define ERTS_MAGIC_BIN_DATA_SIZE(BP) \ - ((BP)->orig_size - sizeof(void (*)(Binary *))) + ((void *) ((ErtsBinary *) (BP))->magic_binary.u.aligned.data) +#define ERTS_MAGIC_DATA_OFFSET \ + (offsetof(ErtsMagicBinary,u.aligned.data) - offsetof(Binary,orig_bytes)) #define ERTS_MAGIC_BIN_ORIG_SIZE(Sz) \ - (sizeof(void (*)(Binary *)) + (Sz)) + (ERTS_MAGIC_DATA_OFFSET + (Sz)) #define ERTS_MAGIC_BIN_SIZE(Sz) \ - (offsetof(ErtsMagicBinary,magic_bin_data) + (Sz)) -#define ERTS_MAGIC_BIN_FROM_DATA(DATA) \ - ((ErtsBinary*)((char*)(DATA) - offsetof(ErtsMagicBinary,magic_bin_data))) + (offsetof(ErtsMagicBinary,u.aligned.data) + (Sz)) + +/* On 32-bit arch these macro variants will save memory + by not forcing 8-byte alignment for the magic payload. +*/ +#define ERTS_MAGIC_BIN_UNALIGNED_DATA(BP) \ + ((void *) ((ErtsBinary *) (BP))->magic_binary.u.unaligned.data) +#define ERTS_MAGIC_UNALIGNED_DATA_OFFSET \ + (offsetof(ErtsMagicBinary,u.unaligned.data) - offsetof(Binary,orig_bytes)) +#define ERTS_MAGIC_BIN_UNALIGNED_DATA_SIZE(BP) \ + ((BP)->orig_size - ERTS_MAGIC_UNALIGNED_DATA_OFFSET) +#define ERTS_MAGIC_BIN_UNALIGNED_ORIG_SIZE(Sz) \ + (ERTS_MAGIC_UNALIGNED_DATA_OFFSET + (Sz)) +#define ERTS_MAGIC_BIN_UNALIGNED_SIZE(Sz) \ + (offsetof(ErtsMagicBinary,u.unaligned.data) + (Sz)) +#define ERTS_MAGIC_BIN_FROM_UNALIGNED_DATA(DATA) \ + ((ErtsBinary*)((char*)(DATA) - offsetof(ErtsMagicBinary,u.unaligned.data))) + #define Binary2ErlDrvBinary(B) (&((ErtsBinary *) (B))->driver.binary) #define ErlDrvBinary2Binary(D) ((Binary *) \ -- cgit v1.2.3 From 512c321bdaf25b685124405e287a3839be467149 Mon Sep 17 00:00:00 2001 From: Sverker Eriksson Date: Tue, 2 Jun 2015 19:23:25 +0200 Subject: erts: Add save/restore for PSTACK --- erts/emulator/beam/global.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h index ee1f70b748..07806c823c 100644 --- a/erts/emulator/beam/global.h +++ b/erts/emulator/beam/global.h @@ -777,6 +777,15 @@ ErtsPStack s = { (byte*)PSTK_DEF_STACK(s), /* pstart */ \ ERTS_ALC_T_ESTACK /* alloc_type */ \ } +#define PSTACK_CHANGE_ALLOCATOR(s,t) \ +do { \ + if (s.pstart != (byte*)PSTK_DEF_STACK(s)) { \ + erl_exit(1, "Internal error - trying to change allocator " \ + "type of active pstack\n"); \ + } \ + s.alloc_type = (t); \ + } while (0) + #define PSTACK_DESTROY(s) \ do { \ if (s.pstart != (byte*)PSTK_DEF_STACK(s)) { \ @@ -786,6 +795,8 @@ do { \ #define PSTACK_IS_EMPTY(s) (s.psp < s.pstart) +#define PSTACK_COUNT(s) (((PSTACK_TYPE*)s.psp + 1) - (PSTACK_TYPE*)s.pstart) + #define PSTACK_TOP(s) (ASSERT(!PSTACK_IS_EMPTY(s)), (PSTACK_TYPE*)(s.psp)) #define PSTACK_PUSH(s) \ @@ -796,6 +807,37 @@ do { \ #define PSTACK_POP(s) ((PSTACK_TYPE*) (s.psp -= sizeof(PSTACK_TYPE))) +/* + * Do not free the stack after this, it may have pointers into what + * was saved in 'dst'. + */ +#define PSTACK_SAVE(s,dst)\ +do {\ + if (s.pstart == (byte*)PSTK_DEF_STACK(s)) {\ + UWord _pbytes = PSTACK_COUNT(s) * sizeof(PSTACK_TYPE);\ + (dst)->pstart = erts_alloc(s.alloc_type,\ + sizeof(PSTK_DEF_STACK(s)));\ + memcpy((dst)->pstart, s.pstart, _pbytes);\ + (dst)->psp = (dst)->pstart + _pbytes - sizeof(PSTACK_TYPE);\ + (dst)->pend = (dst)->pstart + sizeof(PSTK_DEF_STACK(s));\ + (dst)->alloc_type = s.alloc_type;\ + } else\ + *(dst) = s;\ + } while (0) + +/* + * Use on empty stack, only the allocator can be changed before this. + * The src stack is reset to NULL. + */ +#define PSTACK_RESTORE(s, src) \ +do { \ + ASSERT(s.pstart == (byte*)PSTK_DEF_STACK(s)); \ + s = *(src); /* struct copy */ \ + (src)->pstart = NULL; \ + ASSERT(s.psp >= (s.pstart - sizeof(PSTACK_TYPE))); \ + ASSERT(s.psp < s.pend); \ +} while (0) + /* binary.c */ -- cgit v1.2.3 From eea59350ddc1f8c2d86e10f5d38f0cda2f9081f3 Mon Sep 17 00:00:00 2001 From: Sverker Eriksson Date: Mon, 8 Jun 2015 14:52:41 +0200 Subject: erts: Refactor arg swapping for maps:merge --- erts/emulator/beam/erl_map.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/erl_map.c b/erts/emulator/beam/erl_map.c index a1bd39dbc8..5802ec76ba 100644 --- a/erts/emulator/beam/erl_map.c +++ b/erts/emulator/beam/erl_map.c @@ -80,7 +80,7 @@ typedef struct { static Eterm flatmap_merge(Process *p, Eterm nodeA, Eterm nodeB); static Eterm map_merge_mixed(Process *p, Eterm flat, Eterm tree, int swap_args); -static Eterm hashmap_merge(Process *p, Eterm nodeA, Eterm nodeB); +static Eterm hashmap_merge(Process *p, Eterm nodeA, Eterm nodeB, int swap_args); static Eterm hashmap_to_list(Process *p, Eterm map); static Eterm hashmap_keys(Process *p, Eterm map); static Eterm hashmap_values(Process *p, Eterm map); @@ -947,7 +947,7 @@ BIF_RETTYPE maps_merge_2(BIF_ALIST_2) { BIF_P->fvalue = BIF_ARG_2; } else if (is_hashmap(BIF_ARG_1)) { if (is_hashmap(BIF_ARG_2)) { - BIF_RET(hashmap_merge(BIF_P, BIF_ARG_1, BIF_ARG_2)); + BIF_RET(hashmap_merge(BIF_P, BIF_ARG_1, BIF_ARG_2, 0)); } else if (is_flatmap(BIF_ARG_2)) { /* Will always become a tree */ BIF_RET(map_merge_mixed(BIF_P, BIF_ARG_2, BIF_ARG_1, 1)); @@ -1113,12 +1113,12 @@ static Eterm map_merge_mixed(Process *p, Eterm flat, Eterm tree, int swap_args) erts_free(ERTS_ALC_T_TMP, (void *) hxns); ERTS_VERIFY_UNUSED_TEMP_ALLOC(p); - return swap_args ? hashmap_merge(p, tree, res) : hashmap_merge(p, res, tree); + return hashmap_merge(p, res, tree, swap_args); } #define HALLOC_EXTRA 200 -static Eterm hashmap_merge(Process *p, Eterm nodeA, Eterm nodeB) { +static Eterm hashmap_merge(Process *p, Eterm nodeA, Eterm nodeB, int swap_args) { #define PSTACK_TYPE struct HashmapMergePStackType struct HashmapMergePStackType { Eterm *srcA, *srcB; @@ -1133,7 +1133,7 @@ static Eterm hashmap_merge(Process *p, Eterm nodeA, Eterm nodeB) { Eterm hdrA, hdrB; Uint32 ahx, bhx; Uint size; /* total key-value counter */ - int keepA = 0; + int keepA = swap_args; unsigned int lvl = 0; DeclareTmpHeap(th,2,p); Eterm res = THE_NON_VALUE; -- cgit v1.2.3 From c09561dc51be736943a32862a41a3eaef2e41b83 Mon Sep 17 00:00:00 2001 From: Sverker Eriksson Date: Wed, 3 Jun 2015 16:19:30 +0200 Subject: erts: Yield in maps:merge --- erts/emulator/beam/atom.names | 1 + erts/emulator/beam/erl_init.c | 1 + erts/emulator/beam/erl_map.c | 255 ++++++++++++++++++++++++++++++------------ erts/emulator/beam/global.h | 13 ++- 4 files changed, 196 insertions(+), 74 deletions(-) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names index 5ec1409adf..74b42c647e 100644 --- a/erts/emulator/beam/atom.names +++ b/erts/emulator/beam/atom.names @@ -350,6 +350,7 @@ atom message atom message_binary atom message_queue_len atom messages +atom merge_trap atom meta atom meta_match_spec atom micro_seconds diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c index 988ff0e2b5..98ab9f598f 100644 --- a/erts/emulator/beam/erl_init.c +++ b/erts/emulator/beam/erl_init.c @@ -379,6 +379,7 @@ erl_init(int ncpu, erts_init_bif_re(); erts_init_unicode(); /* after RE to get access to PCRE unicode */ erts_init_external(); + erts_init_map(); erts_delay_trap = erts_export_put(am_erlang, am_delay_trap, 2); erts_late_init_process(); #if HAVE_ERTS_MSEG diff --git a/erts/emulator/beam/erl_map.c b/erts/emulator/beam/erl_map.c index 5802ec76ba..c1ad5658d9 100644 --- a/erts/emulator/beam/erl_map.c +++ b/erts/emulator/beam/erl_map.c @@ -32,6 +32,7 @@ #include "erl_process.h" #include "error.h" #include "bif.h" +#include "erl_binary.h" #include "erl_map.h" @@ -79,8 +80,12 @@ typedef struct { static Eterm flatmap_merge(Process *p, Eterm nodeA, Eterm nodeB); -static Eterm map_merge_mixed(Process *p, Eterm flat, Eterm tree, int swap_args); -static Eterm hashmap_merge(Process *p, Eterm nodeA, Eterm nodeB, int swap_args); +static BIF_RETTYPE map_merge_mixed(Process *p, Eterm flat, Eterm tree, int swap_args); +struct HashmapMergeContext_; +static BIF_RETTYPE hashmap_merge(Process *p, Eterm nodeA, Eterm nodeB, int swap_args, + struct HashmapMergeContext_*); +static Export hashmap_merge_trap_export; +static BIF_RETTYPE maps_merge_trap_1(BIF_ALIST_1); static Eterm hashmap_to_list(Process *p, Eterm map); static Eterm hashmap_keys(Process *p, Eterm map); static Eterm hashmap_values(Process *p, Eterm map); @@ -95,6 +100,15 @@ static Eterm hashmap_bld_tuple_uint(Uint **hpp, Uint *szp, Uint n, Uint nums[]); static int hxnodecmp(hxnode_t* a, hxnode_t* b); static int hxnodecmpkey(hxnode_t* a, hxnode_t* b); + +void erts_init_map(void) { + erts_init_trap_export(&hashmap_merge_trap_export, + am_maps, am_merge_trap, 1, + &maps_merge_trap_1); + return; +} + + /* erlang:map_size/1 * the corresponding instruction is implemented in: * beam/erl_bif_guard.c @@ -942,15 +956,15 @@ BIF_RETTYPE maps_merge_2(BIF_ALIST_2) { BIF_RET(flatmap_merge(BIF_P, BIF_ARG_1, BIF_ARG_2)); } else if (is_hashmap(BIF_ARG_2)) { /* Will always become a tree */ - BIF_RET(map_merge_mixed(BIF_P, BIF_ARG_1, BIF_ARG_2, 0)); + return map_merge_mixed(BIF_P, BIF_ARG_1, BIF_ARG_2, 0); } BIF_P->fvalue = BIF_ARG_2; } else if (is_hashmap(BIF_ARG_1)) { if (is_hashmap(BIF_ARG_2)) { - BIF_RET(hashmap_merge(BIF_P, BIF_ARG_1, BIF_ARG_2, 0)); + return hashmap_merge(BIF_P, BIF_ARG_1, BIF_ARG_2, 0, NULL); } else if (is_flatmap(BIF_ARG_2)) { /* Will always become a tree */ - BIF_RET(map_merge_mixed(BIF_P, BIF_ARG_2, BIF_ARG_1, 1)); + return map_merge_mixed(BIF_P, BIF_ARG_2, BIF_ARG_1, 1); } BIF_P->fvalue = BIF_ARG_2; } else { @@ -1113,30 +1127,63 @@ static Eterm map_merge_mixed(Process *p, Eterm flat, Eterm tree, int swap_args) erts_free(ERTS_ALC_T_TMP, (void *) hxns); ERTS_VERIFY_UNUSED_TEMP_ALLOC(p); - return hashmap_merge(p, res, tree, swap_args); + return hashmap_merge(p, res, tree, swap_args, NULL); +} + +#define PSTACK_TYPE struct HashmapMergePStackType +struct HashmapMergePStackType { + Eterm *srcA, *srcB; + Uint32 abm, bbm, rbm; /* node bitmaps */ + int keepA; + int ix; + Eterm array[16]; +}; + +typedef struct HashmapMergeContext_ { + Uint size; /* total key-value counter */ + unsigned int lvl; + Eterm nodeA, nodeB; + Eterm res; + Eterm trap_bin; + ErtsPStack pstack; +#ifdef DEBUG + Eterm dbg_map_A, dbg_map_B; +#endif +} HashmapMergeContext; + +static void hashmap_merge_ctx_destructor(Binary* ctx_bin) +{ + HashmapMergeContext* ctx = (HashmapMergeContext*) ERTS_MAGIC_BIN_DATA(ctx_bin); + ASSERT(ERTS_MAGIC_BIN_DESTRUCTOR(ctx_bin) == hashmap_merge_ctx_destructor); + + PSTACK_DESTROY_SAVED(&ctx->pstack); +} + +BIF_RETTYPE maps_merge_trap_1(BIF_ALIST_1) { + Binary* ctx_bin = ((ProcBin *) binary_val(BIF_ARG_1))->val; + + ASSERT(ERTS_MAGIC_BIN_DESTRUCTOR(ctx_bin) == hashmap_merge_ctx_destructor); + + return hashmap_merge(BIF_P, NIL, NIL, 0, + (HashmapMergeContext*) ERTS_MAGIC_BIN_DATA(ctx_bin)); } #define HALLOC_EXTRA 200 +#define MAP_MERGE_LOOP_FACTOR 8 -static Eterm hashmap_merge(Process *p, Eterm nodeA, Eterm nodeB, int swap_args) { +static BIF_RETTYPE hashmap_merge(Process *p, Eterm map_A, Eterm map_B, + int swap_args, HashmapMergeContext* ctx) { #define PSTACK_TYPE struct HashmapMergePStackType - struct HashmapMergePStackType { - Eterm *srcA, *srcB; - Uint32 abm, bbm, rbm; /* node bitmaps */ - int keepA; - int ix; - Eterm array[16]; - }; PSTACK_DECLARE(s, 4); - struct HashmapMergePStackType* sp = PSTACK_PUSH(s); - Eterm *hp, *nhp; - Eterm hdrA, hdrB; + HashmapMergeContext local_ctx; + struct HashmapMergePStackType* sp; Uint32 ahx, bhx; - Uint size; /* total key-value counter */ - int keepA = swap_args; - unsigned int lvl = 0; + Eterm hdrA, hdrB; + Eterm *hp, *nhp; + Eterm trap_ret; + Sint initial_reds = (Sint) (ERTS_BIF_REDS_LEFT(p) * MAP_MERGE_LOOP_FACTOR); + Sint reds = initial_reds; DeclareTmpHeap(th,2,p); - Eterm res = THE_NON_VALUE; UseTmpHeap(2,p); /* @@ -1144,52 +1191,72 @@ static Eterm hashmap_merge(Process *p, Eterm nodeA, Eterm nodeB, int swap_args) * and merge each pair of nodes. */ - { - hashmap_head_t* a = (hashmap_head_t*) hashmap_val(nodeA); - hashmap_head_t* b = (hashmap_head_t*) hashmap_val(nodeB); - size = a->size + b->size; + PSTACK_CHANGE_ALLOCATOR(s, ERTS_ALC_T_SAVED_ESTACK); + + if (ctx == NULL) { /* first call */ + hashmap_head_t* a = (hashmap_head_t*) hashmap_val(map_A); + hashmap_head_t* b = (hashmap_head_t*) hashmap_val(map_B); + + sp = PSTACK_PUSH(s); + sp->keepA = swap_args; + local_ctx.size = a->size + b->size; + local_ctx.lvl = 0; + local_ctx.nodeA = map_A; + local_ctx.nodeB = map_B; + local_ctx.res = THE_NON_VALUE; + #ifdef DEBUG + local_ctx.dbg_map_A = map_A; + local_ctx.dbg_map_B = map_B; + local_ctx.trap_bin = THE_NON_VALUE; + #endif + ctx = &local_ctx; + } + else { + PSTACK_RESTORE(s, &ctx->pstack); + sp = PSTACK_TOP(s); + goto resume_from_trap; } recurse: - if (primary_tag(nodeA) == TAG_PRIMARY_BOXED && - primary_tag(nodeB) == TAG_PRIMARY_LIST) { + if (primary_tag(ctx->nodeA) == TAG_PRIMARY_BOXED && + primary_tag(ctx->nodeB) == TAG_PRIMARY_LIST) { /* Avoid implementing this combination by switching places */ - Eterm tmp = nodeA; - nodeA = nodeB; - nodeB = tmp; - keepA = !keepA; + Eterm tmp = ctx->nodeA; + ctx->nodeA = ctx->nodeB; + ctx->nodeB = tmp; + sp->keepA = !sp->keepA; } - switch (primary_tag(nodeA)) { + switch (primary_tag(ctx->nodeA)) { case TAG_PRIMARY_LIST: { - sp->srcA = list_val(nodeA); - switch (primary_tag(nodeB)) { + Eterm keyA = CAR(list_val(ctx->nodeA)); + switch (primary_tag(ctx->nodeB)) { case TAG_PRIMARY_LIST: { /* LEAF + LEAF */ - sp->srcB = list_val(nodeB); + Eterm keyB = CAR(list_val(ctx->nodeB)); - if (EQ(CAR(sp->srcA), CAR(sp->srcB))) { - --size; - res = keepA ? nodeA : nodeB; + if (EQ(keyA, keyB)) { + --ctx->size; + ctx->res = sp->keepA ? ctx->nodeA : ctx->nodeB; } else { - ahx = hashmap_restore_hash(th, lvl, CAR(sp->srcA)); - bhx = hashmap_restore_hash(th, lvl, CAR(sp->srcB)); + ahx = hashmap_restore_hash(th, ctx->lvl, keyA); + bhx = hashmap_restore_hash(th, ctx->lvl, keyB); sp->abm = 1 << hashmap_index(ahx); sp->bbm = 1 << hashmap_index(bhx); - sp->srcA = &nodeA; - sp->srcB = &nodeB; + sp->srcA = &ctx->nodeA; + sp->srcB = &ctx->nodeB; } break; } case TAG_PRIMARY_BOXED: { /* LEAF + NODE */ - sp->srcB = boxed_val(nodeB); + sp->srcB = boxed_val(ctx->nodeB); ASSERT(is_header(*sp->srcB)); hdrB = *sp->srcB++; - ahx = hashmap_restore_hash(th, lvl, CAR(sp->srcA)); + ahx = hashmap_restore_hash(th, ctx->lvl, keyA); sp->abm = 1 << hashmap_index(ahx); - sp->srcA = &nodeA; + sp->srcA = &ctx->nodeA; switch(hdrB & _HEADER_MAP_SUBTAG_MASK) { case HAMT_SUBTAG_HEAD_ARRAY: sp->srcB++; @@ -1202,26 +1269,26 @@ recurse: break; default: - erl_exit(1, "bad header tag %ld\r\n", *sp->srcB & _HEADER_MAP_SUBTAG_MASK); + erl_exit(ERTS_ABORT_EXIT, "bad header tag %ld\r\n", *sp->srcB & _HEADER_MAP_SUBTAG_MASK); break; } break; } default: - erl_exit(1, "bad primary tag %ld\r\n", nodeB); + erl_exit(ERTS_ABORT_EXIT, "bad primary tag %ld\r\n", ctx->nodeB); } break; } case TAG_PRIMARY_BOXED: { /* NODE + NODE */ - sp->srcA = boxed_val(nodeA); + sp->srcA = boxed_val(ctx->nodeA); hdrA = *sp->srcA++; ASSERT(is_header(hdrA)); switch (hdrA & _HEADER_MAP_SUBTAG_MASK) { case HAMT_SUBTAG_HEAD_ARRAY: { sp->srcA++; - ASSERT(primary_tag(nodeB) == TAG_PRIMARY_BOXED); + ASSERT(primary_tag(ctx->nodeB) == TAG_PRIMARY_BOXED); sp->abm = 0xffff; - sp->srcB = boxed_val(nodeB); + sp->srcB = boxed_val(ctx->nodeB); hdrB = *sp->srcB++; ASSERT(is_header(hdrB)); switch (hdrB & _HEADER_MAP_SUBTAG_MASK) { @@ -1240,9 +1307,9 @@ recurse: } case HAMT_SUBTAG_HEAD_BITMAP: sp->srcA++; case HAMT_SUBTAG_NODE_BITMAP: { - ASSERT(primary_tag(nodeB) == TAG_PRIMARY_BOXED); + ASSERT(primary_tag(ctx->nodeB) == TAG_PRIMARY_BOXED); sp->abm = MAP_HEADER_VAL(hdrA); - sp->srcB = boxed_val(nodeB); + sp->srcB = boxed_val(ctx->nodeB); hdrB = *sp->srcB++; ASSERT(is_header(hdrB)); switch (hdrB & _HEADER_MAP_SUBTAG_MASK) { @@ -1256,40 +1323,44 @@ recurse: break; default: - erl_exit(1, "bad header tag %ld\r\n", *sp->srcB & _HEADER_MAP_SUBTAG_MASK); + erl_exit(ERTS_ABORT_EXIT, "bad header tag %ld\r\n", *sp->srcB & _HEADER_MAP_SUBTAG_MASK); } break; } default: - erl_exit(1, "bad primary tag %ld\r\n", nodeA); + erl_exit(ERTS_ABORT_EXIT, "bad primary tag %ld\r\n", ctx->nodeA); } break; } default: - erl_exit(1, "bad primary tag %ld\r\n", nodeA); + erl_exit(ERTS_ABORT_EXIT, "bad primary tag %ld\r\n", ctx->nodeA); } for (;;) { - if (is_value(res)) { /* We have a complete (sub-)tree or leaf */ - if (lvl == 0) + if (is_value(ctx->res)) { /* We have a complete (sub-)tree or leaf */ + if (ctx->lvl == 0) break; /* Pop from stack and continue build parent node */ - lvl--; + ctx->lvl--; sp = PSTACK_POP(s); - sp->array[sp->ix++] = res; - res = THE_NON_VALUE; + sp->array[sp->ix++] = ctx->res; + ctx->res = THE_NON_VALUE; if (sp->rbm) { sp->srcA++; sp->srcB++; - keepA = sp->keepA; } } else { /* Start build a node */ sp->ix = 0; sp->rbm = sp->abm | sp->bbm; - ASSERT(!(sp->rbm == 0 && lvl > 0)); + ASSERT(!(sp->rbm == 0 && ctx->lvl > 0)); } + if (--reds <= 0) { + goto trap; + } +resume_from_trap: + while (sp->rbm) { Uint32 next = sp->rbm & (sp->rbm-1); Uint32 bit = sp->rbm ^ next; @@ -1297,13 +1368,12 @@ recurse: if (sp->abm & bit) { if (sp->bbm & bit) { /* Bit clash. Push and resolve by recursive merge */ - if (sp->rbm) { - sp->keepA = keepA; - } - nodeA = *sp->srcA; - nodeB = *sp->srcB; - lvl++; + int keepA = sp->keepA; + ctx->nodeA = *sp->srcA; + ctx->nodeB= *sp->srcB; + ctx->lvl++; sp = PSTACK_PUSH(s); + sp->keepA = keepA; goto recurse; } else { sp->array[sp->ix++] = *sp->srcA++; @@ -1315,23 +1385,62 @@ recurse: } ASSERT(sp->ix == hashmap_bitcount(sp->abm | sp->bbm)); - if (lvl == 0) { + if (ctx->lvl == 0) { nhp = HAllocX(p, HAMT_HEAD_BITMAP_SZ(sp->ix), HALLOC_EXTRA); hp = nhp; *hp++ = (sp->ix == 16 ? MAP_HEADER_HAMT_HEAD_ARRAY : MAP_HEADER_HAMT_HEAD_BITMAP(sp->abm | sp->bbm)); - *hp++ = size; + *hp++ = ctx->size; } else { nhp = HAllocX(p, HAMT_NODE_BITMAP_SZ(sp->ix), HALLOC_EXTRA); hp = nhp; *hp++ = MAP_HEADER_HAMT_NODE_BITMAP(sp->abm | sp->bbm); } - memcpy(hp, sp->array, sp->ix * sizeof(Eterm)); - res = make_boxed(nhp); + sys_memcpy(hp, sp->array, sp->ix * sizeof(Eterm)); + ctx->res = make_boxed(nhp); + } + + /* Done */ + + if (ctx != &local_ctx) { + ASSERT(ctx->trap_bin != THE_NON_VALUE); + ASSERT(p->flags & F_DISABLE_GC); + erts_set_gc_state(p, 1); + } + else { + ASSERT(ctx->trap_bin == THE_NON_VALUE); + ASSERT(!(p->flags & F_DISABLE_GC)); } PSTACK_DESTROY(s); UnUseTmpHeap(2,p); - return res; + BUMP_REDS(p, (initial_reds - reds) / MAP_MERGE_LOOP_FACTOR); + return ctx->res; + +trap: /* Yield */ + + if (ctx == &local_ctx) { + Binary* ctx_b = erts_create_magic_binary(sizeof(HashmapMergeContext), + hashmap_merge_ctx_destructor); + ctx = ERTS_MAGIC_BIN_DATA(ctx_b); + sys_memcpy(ctx, &local_ctx, sizeof(HashmapMergeContext)); + hp = HAlloc(p, PROC_BIN_SIZE); + ASSERT(ctx->trap_bin == THE_NON_VALUE); + ctx->trap_bin = erts_mk_magic_binary_term(&hp, &MSO(p), ctx_b); + + erts_set_gc_state(p, 0); + } + else { + ASSERT(ctx->trap_bin != THE_NON_VALUE); + ASSERT(p->flags & F_DISABLE_GC); + } + + PSTACK_SAVE(s, &ctx->pstack); + + BUMP_ALL_REDS(p); + ERTS_BIF_PREP_TRAP1(trap_ret, &hashmap_merge_trap_export, + p, ctx->trap_bin); + UnUseTmpHeap(2,p); + return trap_ret; } static int hash_cmp(Uint32 ha, Uint32 hb) diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h index 07806c823c..14d42599a1 100644 --- a/erts/emulator/beam/global.h +++ b/erts/emulator/beam/global.h @@ -817,7 +817,7 @@ do {\ UWord _pbytes = PSTACK_COUNT(s) * sizeof(PSTACK_TYPE);\ (dst)->pstart = erts_alloc(s.alloc_type,\ sizeof(PSTK_DEF_STACK(s)));\ - memcpy((dst)->pstart, s.pstart, _pbytes);\ + sys_memcpy((dst)->pstart, s.pstart, _pbytes);\ (dst)->psp = (dst)->pstart + _pbytes - sizeof(PSTACK_TYPE);\ (dst)->pend = (dst)->pstart + sizeof(PSTK_DEF_STACK(s));\ (dst)->alloc_type = s.alloc_type;\ @@ -838,6 +838,14 @@ do { \ ASSERT(s.psp < s.pend); \ } while (0) +#define PSTACK_DESTROY_SAVED(pstack)\ +do {\ + if ((pstack)->pstart) {\ + erts_free((pstack)->alloc_type, (pstack)->pstart);\ + (pstack)->pstart = NULL;\ + }\ +} while(0) + /* binary.c */ @@ -1126,6 +1134,9 @@ Sint erts_binary_set_loop_limit(Sint limit); /* external.c */ void erts_init_external(void); +/* erl_map.c */ +void erts_init_map(void); + /* erl_unicode.c */ void erts_init_unicode(void); Sint erts_unicode_set_loop_limit(Sint limit); -- cgit v1.2.3 From f7ef9fb1679fcd46c48ec5f8a968f7e053b3d4ed Mon Sep 17 00:00:00 2001 From: Sverker Eriksson Date: Mon, 8 Jun 2015 15:50:46 +0200 Subject: erts: Optimize maps:merge to be better at reusing entire hashmap sub-trees. Sub-tree reuse is detected in three cases: 1. The sub-tree top node does not exist at all in the other map. Already implemented before this commit. 2. The exact same sub-tree exist in both maps. Must calculate nr of keys in tree to get total size right. 3. We detect that a sub-tree only contains stuff from one of the maps. There is still one case we don't detect. If A and B leafs have equal keys we could also compare the values. If values are equal, further node reuse could propagate up toward the root (by 'mix'==0). The downside would be potentially expensive value comparisons. --- erts/emulator/beam/erl_map.c | 300 ++++++++++++++++++++++--------------------- 1 file changed, 154 insertions(+), 146 deletions(-) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/erl_map.c b/erts/emulator/beam/erl_map.c index c1ad5658d9..3e78731d20 100644 --- a/erts/emulator/beam/erl_map.c +++ b/erts/emulator/beam/erl_map.c @@ -86,6 +86,7 @@ static BIF_RETTYPE hashmap_merge(Process *p, Eterm nodeA, Eterm nodeB, int swap_ struct HashmapMergeContext_*); static Export hashmap_merge_trap_export; static BIF_RETTYPE maps_merge_trap_1(BIF_ALIST_1); +static Uint hashmap_subtree_size(Eterm node); static Eterm hashmap_to_list(Process *p, Eterm map); static Eterm hashmap_keys(Process *p, Eterm map); static Eterm hashmap_values(Process *p, Eterm map); @@ -1132,18 +1133,18 @@ static Eterm map_merge_mixed(Process *p, Eterm flat, Eterm tree, int swap_args) #define PSTACK_TYPE struct HashmapMergePStackType struct HashmapMergePStackType { + Eterm nodeA, nodeB; Eterm *srcA, *srcB; - Uint32 abm, bbm, rbm; /* node bitmaps */ - int keepA; + Uint32 abm, bbm, rbm; /* node bitmaps */ + int mix; /* &1: there are unique A stuff in node + * &2: there are unique B stuff in node */ int ix; - Eterm array[16]; + Eterm array[16]; /* temp node construction area */ }; typedef struct HashmapMergeContext_ { Uint size; /* total key-value counter */ unsigned int lvl; - Eterm nodeA, nodeB; - Eterm res; Eterm trap_bin; ErtsPStack pstack; #ifdef DEBUG @@ -1177,7 +1178,8 @@ static BIF_RETTYPE hashmap_merge(Process *p, Eterm map_A, Eterm map_B, PSTACK_DECLARE(s, 4); HashmapMergeContext local_ctx; struct HashmapMergePStackType* sp; - Uint32 ahx, bhx; + Uint32 hx; + Eterm res = THE_NON_VALUE; Eterm hdrA, hdrB; Eterm *hp, *nhp; Eterm trap_ret; @@ -1198,12 +1200,11 @@ static BIF_RETTYPE hashmap_merge(Process *p, Eterm map_A, Eterm map_B, hashmap_head_t* b = (hashmap_head_t*) hashmap_val(map_B); sp = PSTACK_PUSH(s); - sp->keepA = swap_args; + sp->srcA = swap_args ? &map_B : &map_A; + sp->srcB = swap_args ? &map_A : &map_B; + sp->mix = 0; local_ctx.size = a->size + b->size; local_ctx.lvl = 0; - local_ctx.nodeA = map_A; - local_ctx.nodeB = map_B; - local_ctx.res = THE_NON_VALUE; #ifdef DEBUG local_ctx.dbg_map_A = map_A; local_ctx.dbg_map_B = map_B; @@ -1219,133 +1220,97 @@ static BIF_RETTYPE hashmap_merge(Process *p, Eterm map_A, Eterm map_B, recurse: - if (primary_tag(ctx->nodeA) == TAG_PRIMARY_BOXED && - primary_tag(ctx->nodeB) == TAG_PRIMARY_LIST) { - /* Avoid implementing this combination by switching places */ - Eterm tmp = ctx->nodeA; - ctx->nodeA = ctx->nodeB; - ctx->nodeB = tmp; - sp->keepA = !sp->keepA; - } - - switch (primary_tag(ctx->nodeA)) { - case TAG_PRIMARY_LIST: { - Eterm keyA = CAR(list_val(ctx->nodeA)); - switch (primary_tag(ctx->nodeB)) { - case TAG_PRIMARY_LIST: { /* LEAF + LEAF */ - Eterm keyB = CAR(list_val(ctx->nodeB)); - - if (EQ(keyA, keyB)) { - --ctx->size; - ctx->res = sp->keepA ? ctx->nodeA : ctx->nodeB; - } else { - ahx = hashmap_restore_hash(th, ctx->lvl, keyA); - bhx = hashmap_restore_hash(th, ctx->lvl, keyB); - sp->abm = 1 << hashmap_index(ahx); - sp->bbm = 1 << hashmap_index(bhx); + sp->nodeA = *sp->srcA; + sp->nodeB = *sp->srcB; - sp->srcA = &ctx->nodeA; - sp->srcB = &ctx->nodeB; - } - break; - } - case TAG_PRIMARY_BOXED: { /* LEAF + NODE */ - sp->srcB = boxed_val(ctx->nodeB); - ASSERT(is_header(*sp->srcB)); - hdrB = *sp->srcB++; - - ahx = hashmap_restore_hash(th, ctx->lvl, keyA); - sp->abm = 1 << hashmap_index(ahx); - sp->srcA = &ctx->nodeA; - switch(hdrB & _HEADER_MAP_SUBTAG_MASK) { - case HAMT_SUBTAG_HEAD_ARRAY: - sp->srcB++; - sp->bbm = 0xffff; - break; + if (sp->nodeA == sp->nodeB) { + res = sp->nodeA; + ctx->size -= is_list(sp->nodeB) ? 1 : hashmap_subtree_size(sp->nodeB); + } + else { + if (is_list(sp->nodeA)) { /* A is LEAF */ + Eterm keyA = CAR(list_val(sp->nodeA)); + + if (is_list(sp->nodeB)) { /* LEAF + LEAF */ + Eterm keyB = CAR(list_val(sp->nodeB)); + + if (EQ(keyA, keyB)) { + --ctx->size; + res = sp->nodeB; + sp->mix = 2; /* We assume values differ. + + Don't spend time comparing big values. + - Might waste some heap space for internal + nodes that could otherwise be reused. */ + goto merge_nodes; + } + } + hx = hashmap_restore_hash(th, ctx->lvl, keyA); + sp->abm = 1 << hashmap_index(hx); + /* keep srcA pointing at the leaf */ + } + else { /* A is NODE */ + sp->srcA = boxed_val(sp->nodeA); + hdrA = *sp->srcA++; + ASSERT(is_header(hdrA)); + switch (hdrA & _HEADER_MAP_SUBTAG_MASK) { + case HAMT_SUBTAG_HEAD_ARRAY: { + sp->srcA++; + sp->abm = 0xffff; + break; + } + case HAMT_SUBTAG_HEAD_BITMAP: sp->srcA++; + case HAMT_SUBTAG_NODE_BITMAP: { + sp->abm = MAP_HEADER_VAL(hdrA); + break; + } + default: + erl_exit(ERTS_ABORT_EXIT, "bad header %ld\r\n", hdrA); + } + } - case HAMT_SUBTAG_HEAD_BITMAP: sp->srcB++; - case HAMT_SUBTAG_NODE_BITMAP: - sp->bbm = MAP_HEADER_VAL(hdrB); - break; + if (is_list(sp->nodeB)) { /* B is LEAF */ + Eterm keyB = CAR(list_val(sp->nodeB)); - default: - erl_exit(ERTS_ABORT_EXIT, "bad header tag %ld\r\n", *sp->srcB & _HEADER_MAP_SUBTAG_MASK); - break; - } - break; - } - default: - erl_exit(ERTS_ABORT_EXIT, "bad primary tag %ld\r\n", ctx->nodeB); - } - break; - } - case TAG_PRIMARY_BOXED: { /* NODE + NODE */ - sp->srcA = boxed_val(ctx->nodeA); - hdrA = *sp->srcA++; - ASSERT(is_header(hdrA)); - switch (hdrA & _HEADER_MAP_SUBTAG_MASK) { - case HAMT_SUBTAG_HEAD_ARRAY: { - sp->srcA++; - ASSERT(primary_tag(ctx->nodeB) == TAG_PRIMARY_BOXED); - sp->abm = 0xffff; - sp->srcB = boxed_val(ctx->nodeB); - hdrB = *sp->srcB++; - ASSERT(is_header(hdrB)); - switch (hdrB & _HEADER_MAP_SUBTAG_MASK) { - case HAMT_SUBTAG_HEAD_ARRAY: - sp->srcB++; - sp->bbm = 0xffff; - break; - case HAMT_SUBTAG_HEAD_BITMAP: sp->srcB++; - case HAMT_SUBTAG_NODE_BITMAP: - sp->bbm = MAP_HEADER_VAL(hdrB); - break; - default: - erl_exit(1, "bad header tag %ld\r\n", *sp->srcB & _HEADER_MAP_SUBTAG_MASK); - } - break; - } - case HAMT_SUBTAG_HEAD_BITMAP: sp->srcA++; - case HAMT_SUBTAG_NODE_BITMAP: { - ASSERT(primary_tag(ctx->nodeB) == TAG_PRIMARY_BOXED); - sp->abm = MAP_HEADER_VAL(hdrA); - sp->srcB = boxed_val(ctx->nodeB); - hdrB = *sp->srcB++; - ASSERT(is_header(hdrB)); - switch (hdrB & _HEADER_MAP_SUBTAG_MASK) { - case HAMT_SUBTAG_HEAD_ARRAY: + hx = hashmap_restore_hash(th, ctx->lvl, keyB); + sp->bbm = 1 << hashmap_index(hx); + /* keep srcB pointing at the leaf */ + } + else { /* B is NODE */ + sp->srcB = boxed_val(sp->nodeB); + hdrB = *sp->srcB++; + ASSERT(is_header(hdrB)); + switch (hdrB & _HEADER_MAP_SUBTAG_MASK) { + case HAMT_SUBTAG_HEAD_ARRAY: { sp->srcB++; - sp->bbm = 0xffff; - break; - case HAMT_SUBTAG_HEAD_BITMAP: sp->srcB++; - case HAMT_SUBTAG_NODE_BITMAP: - sp->bbm = MAP_HEADER_VAL(hdrB); - break; - - default: - erl_exit(ERTS_ABORT_EXIT, "bad header tag %ld\r\n", *sp->srcB & _HEADER_MAP_SUBTAG_MASK); - } - break; - } - default: - erl_exit(ERTS_ABORT_EXIT, "bad primary tag %ld\r\n", ctx->nodeA); - } - break; - } - default: - erl_exit(ERTS_ABORT_EXIT, "bad primary tag %ld\r\n", ctx->nodeA); + sp->bbm = 0xffff; + break; + } + case HAMT_SUBTAG_HEAD_BITMAP: sp->srcB++; + case HAMT_SUBTAG_NODE_BITMAP: { + sp->bbm = MAP_HEADER_VAL(hdrB); + break; + } + default: + erl_exit(ERTS_ABORT_EXIT, "bad header %ld\r\n", hdrB); + } + } } +merge_nodes: + for (;;) { - if (is_value(ctx->res)) { /* We have a complete (sub-)tree or leaf */ + if (is_value(res)) { /* We have a complete (sub-)tree or leaf */ + int child_mix; if (ctx->lvl == 0) break; /* Pop from stack and continue build parent node */ ctx->lvl--; + child_mix = sp->mix; sp = PSTACK_POP(s); - sp->array[sp->ix++] = ctx->res; - ctx->res = THE_NON_VALUE; + sp->array[sp->ix++] = res; + sp->mix |= child_mix; + res = THE_NON_VALUE; if (sp->rbm) { sp->srcA++; sp->srcB++; @@ -1368,40 +1333,69 @@ resume_from_trap: if (sp->abm & bit) { if (sp->bbm & bit) { /* Bit clash. Push and resolve by recursive merge */ - int keepA = sp->keepA; - ctx->nodeA = *sp->srcA; - ctx->nodeB= *sp->srcB; + Eterm* srcA = sp->srcA; + Eterm* srcB = sp->srcB; ctx->lvl++; sp = PSTACK_PUSH(s); - sp->keepA = keepA; + sp->srcA = srcA; + sp->srcB = srcB; + sp->mix = 0; goto recurse; } else { sp->array[sp->ix++] = *sp->srcA++; + sp->mix |= 1; } } else { ASSERT(sp->bbm & bit); sp->array[sp->ix++] = *sp->srcB++; + sp->mix |= 2; } } - ASSERT(sp->ix == hashmap_bitcount(sp->abm | sp->bbm)); - if (ctx->lvl == 0) { - nhp = HAllocX(p, HAMT_HEAD_BITMAP_SZ(sp->ix), HALLOC_EXTRA); - hp = nhp; - *hp++ = (sp->ix == 16 ? MAP_HEADER_HAMT_HEAD_ARRAY - : MAP_HEADER_HAMT_HEAD_BITMAP(sp->abm | sp->bbm)); - *hp++ = ctx->size; - } else { - nhp = HAllocX(p, HAMT_NODE_BITMAP_SZ(sp->ix), HALLOC_EXTRA); - hp = nhp; - *hp++ = MAP_HEADER_HAMT_NODE_BITMAP(sp->abm | sp->bbm); - } - sys_memcpy(hp, sp->array, sp->ix * sizeof(Eterm)); - ctx->res = make_boxed(nhp); + switch (sp->mix) { + case 0: /* Nodes A and B contain the *EXACT* same sub-trees + => fall through and reuse nodeA */ + + case 1: /* Only unique A stuff => reuse nodeA */ + res = sp->nodeA; + break; + + case 2: /* Only unique B stuff => reuse nodeB */ + res = sp->nodeB; + break; + + case 3: /* We have a mix => must build new node */ + ASSERT(sp->ix == hashmap_bitcount(sp->abm | sp->bbm)); + if (ctx->lvl == 0) { + nhp = HAllocX(p, HAMT_HEAD_BITMAP_SZ(sp->ix), HALLOC_EXTRA); + hp = nhp; + *hp++ = (sp->ix == 16 ? MAP_HEADER_HAMT_HEAD_ARRAY + : MAP_HEADER_HAMT_HEAD_BITMAP(sp->abm | sp->bbm)); + *hp++ = ctx->size; + } else { + nhp = HAllocX(p, HAMT_NODE_BITMAP_SZ(sp->ix), HALLOC_EXTRA); + hp = nhp; + *hp++ = MAP_HEADER_HAMT_NODE_BITMAP(sp->abm | sp->bbm); + } + sys_memcpy(hp, sp->array, sp->ix * sizeof(Eterm)); + res = make_boxed(nhp); + break; + default: + erl_exit(ERTS_ABORT_EXIT, "strange mix %d\r\n", sp->mix); + } } /* Done */ +#ifdef DEBUG + { + Eterm *head = hashmap_val(res); + Uint size = head[1]; + Uint real_size = hashmap_subtree_size(res); + ASSERT(size == real_size); + } +#endif + if (ctx != &local_ctx) { ASSERT(ctx->trap_bin != THE_NON_VALUE); ASSERT(p->flags & F_DISABLE_GC); @@ -1414,7 +1408,7 @@ resume_from_trap: PSTACK_DESTROY(s); UnUseTmpHeap(2,p); BUMP_REDS(p, (initial_reds - reds) / MAP_MERGE_LOOP_FACTOR); - return ctx->res; + return res; trap: /* Yield */ @@ -1443,6 +1437,19 @@ trap: /* Yield */ return trap_ret; } +static Uint hashmap_subtree_size(Eterm node) { + DECLARE_WSTACK(stack); + Uint size = 0; + + hashmap_iterator_init(&stack, node, 0); + while (hashmap_iterator_next(&stack)) { + size++; + } + DESTROY_WSTACK(stack); + return size; +} + + static int hash_cmp(Uint32 ha, Uint32 hb) { int i; @@ -1865,10 +1872,11 @@ void hashmap_iterator_init(ErtsWStack* s, Eterm node, int reverse) { sz = 16; break; case HAMT_SUBTAG_HEAD_BITMAP: - sz = hashmap_bitcount(MAP_HEADER_VAL(hdr)); + case HAMT_SUBTAG_NODE_BITMAP: + sz = hashmap_bitcount(MAP_HEADER_VAL(hdr)); break; default: - erl_exit(1, "bad header"); + erl_exit(ERTS_ABORT_EXIT, "bad header"); } WSTACK_PUSH3((*s), (UWord)THE_NON_VALUE, /* end marker */ @@ -1905,7 +1913,7 @@ Eterm* hashmap_iterator_next(ErtsWStack* s) { ASSERT(sz < 17); break; default: - erl_exit(1, "bad header"); + erl_exit(ERTS_ABORT_EXIT, "bad header"); } idx++; -- cgit v1.2.3