diff options
-rw-r--r-- | erts/emulator/beam/atom.c | 2 | ||||
-rw-r--r-- | erts/emulator/beam/erl_alloc.types | 1 | ||||
-rw-r--r-- | erts/emulator/beam/erl_bif_lists.c | 830 | ||||
-rw-r--r-- | erts/emulator/beam/erl_utils.h | 61 | ||||
-rw-r--r-- | erts/emulator/beam/external.c | 3 | ||||
-rw-r--r-- | erts/emulator/beam/utils.c | 64 | ||||
-rw-r--r-- | lib/compiler/src/beam_jump.erl | 15 | ||||
-rw-r--r-- | lib/compiler/test/beam_jump_SUITE.erl | 19 | ||||
-rw-r--r-- | lib/stdlib/doc/src/lists.xml | 8 | ||||
-rw-r--r-- | lib/stdlib/test/lists_SUITE.erl | 42 | ||||
-rw-r--r-- | system/doc/efficiency_guide/commoncaveats.xml | 48 | ||||
-rw-r--r-- | system/doc/efficiency_guide/retired_myths.xml | 14 |
12 files changed, 879 insertions, 228 deletions
diff --git a/erts/emulator/beam/atom.c b/erts/emulator/beam/atom.c index 5381611fab..59b51fd15e 100644 --- a/erts/emulator/beam/atom.c +++ b/erts/emulator/beam/atom.c @@ -174,7 +174,7 @@ atom_alloc(Atom* tmpl) /* * Precompute ordinal value of first 3 bytes + 7 bits. - * This is used by utils.c:erts_cmp_atoms(). + * This is used by erl_utils.h:erts_cmp_atoms(). * We cannot use the full 32 bits of the first 4 bytes, * since we use the sign of the difference between two * ordinal values to represent their relative order. diff --git a/erts/emulator/beam/erl_alloc.types b/erts/emulator/beam/erl_alloc.types index 5409b89bab..98ba0e5f07 100644 --- a/erts/emulator/beam/erl_alloc.types +++ b/erts/emulator/beam/erl_alloc.types @@ -274,6 +274,7 @@ type ML_YIELD_STATE SHORT_LIVED SYSTEM monitor_link_yield_state type ML_DIST STANDARD SYSTEM monitor_link_dist type PF3_ARGS SHORT_LIVED PROCESSES process_flag_3_arguments type SETUP_CONN_ARG SHORT_LIVED PROCESSES setup_connection_argument +type LIST_TRAP SHORT_LIVED PROCESSES list_bif_trap_state type ENVIRONMENT SYSTEM SYSTEM environment diff --git a/erts/emulator/beam/erl_bif_lists.c b/erts/emulator/beam/erl_bif_lists.c index 395be67a90..a793b34852 100644 --- a/erts/emulator/beam/erl_bif_lists.c +++ b/erts/emulator/beam/erl_bif_lists.c @@ -29,12 +29,13 @@ #include "sys.h" #include "erl_vm.h" #include "global.h" -#include "erl_process.h" -#include "error.h" #include "bif.h" +#include "erl_binary.h" + static Eterm keyfind(int Bif, Process* p, Eterm Key, Eterm Pos, Eterm List); + static BIF_RETTYPE append(Process* p, Eterm A, Eterm B) { Eterm list; @@ -146,103 +147,724 @@ BIF_RETTYPE append_2(BIF_ALIST_2) return append(BIF_P, BIF_ARG_1, BIF_ARG_2); } -/* - * erlang:'--'/2 - */ +/* erlang:'--'/2 + * + * Subtracts a list from another (LHS -- RHS), removing the first occurrence of + * each element in LHS from RHS. There is no type coercion so the elements must + * match exactly. + * + * The BIF is broken into several stages that can all trap individually, and it + * chooses its algorithm based on input size. If either input is small it will + * use a linear scan tuned to which side it's on, and if both inputs are large + * enough it will convert RHS into a multiset to provide good asymptotic + * behavior. */ + +#define SUBTRACT_LHS_THRESHOLD 16 +#define SUBTRACT_RHS_THRESHOLD 16 + +typedef enum { + SUBTRACT_STAGE_START, + SUBTRACT_STAGE_LEN_LHS, + + /* Naive linear scan that's efficient when + * LEN_LHS <= SUBTRACT_LHS_THRESHOLD. */ + SUBTRACT_STAGE_NAIVE_LHS, + + SUBTRACT_STAGE_LEN_RHS, + + /* As SUBTRACT_STAGE_NAIVE_LHS but for RHS. */ + SUBTRACT_STAGE_NAIVE_RHS, + + /* Creates a multiset from RHS for faster lookups before sweeping through + * LHS. The set is implemented as a red-black tree and duplicate elements + * are handled by a counter on each node. */ + SUBTRACT_STAGE_SET_BUILD, + SUBTRACT_STAGE_SET_FINISH +} ErtsSubtractCtxStage; + +typedef struct subtract_node__ { + struct subtract_node__ *parent; + struct subtract_node__ *left; + struct subtract_node__ *right; + int is_red; + + Eterm key; + Uint count; +} subtract_tree_t; + +typedef struct { + ErtsSubtractCtxStage stage; + + Eterm lhs_original; + Eterm rhs_original; + + Uint lhs_remaining; + Uint rhs_remaining; + + Eterm iterator; + + Eterm *result_cdr; + Eterm result; + + union { + Eterm lhs_elements[SUBTRACT_LHS_THRESHOLD]; + Eterm rhs_elements[SUBTRACT_RHS_THRESHOLD]; + + struct { + subtract_tree_t *tree; + + /* A memory area for the tree's nodes, saving us the need to have + * one allocation per node. */ + subtract_tree_t *alloc_start; + subtract_tree_t *alloc; + } rhs_set; + } u; +} ErtsSubtractContext; + +#define ERTS_RBT_PREFIX subtract +#define ERTS_RBT_T subtract_tree_t +#define ERTS_RBT_KEY_T Eterm +#define ERTS_RBT_FLAGS_T int +#define ERTS_RBT_INIT_EMPTY_TNODE(T) \ + do { \ + (T)->parent = NULL; \ + (T)->left = NULL; \ + (T)->right = NULL; \ + } while(0) +#define ERTS_RBT_IS_RED(T) ((T)->is_red) +#define ERTS_RBT_SET_RED(T) ((T)->is_red = 1) +#define ERTS_RBT_IS_BLACK(T) (!ERTS_RBT_IS_RED(T)) +#define ERTS_RBT_SET_BLACK(T) ((T)->is_red = 0) +#define ERTS_RBT_GET_FLAGS(T) ((T)->is_red) +#define ERTS_RBT_SET_FLAGS(T, F) ((T)->is_red = F) +#define ERTS_RBT_GET_PARENT(T) ((T)->parent) +#define ERTS_RBT_SET_PARENT(T, P) ((T)->parent = P) +#define ERTS_RBT_GET_RIGHT(T) ((T)->right) +#define ERTS_RBT_SET_RIGHT(T, R) ((T)->right = (R)) +#define ERTS_RBT_GET_LEFT(T) ((T)->left) +#define ERTS_RBT_SET_LEFT(T, L) ((T)->left = (L)) +#define ERTS_RBT_GET_KEY(T) ((T)->key) +#define ERTS_RBT_CMP_KEYS(KX, KY) CMP_TERM(KX, KY) +#define ERTS_RBT_WANT_LOOKUP_INSERT +#define ERTS_RBT_WANT_LOOKUP +#define ERTS_RBT_WANT_DELETE +#define ERTS_RBT_UNDEF + +#include "erl_rbtree.h" + +static int subtract_continue(Process *p, ErtsSubtractContext *context); + +static void subtract_ctx_dtor(ErtsSubtractContext *context) { + switch (context->stage) { + case SUBTRACT_STAGE_SET_BUILD: + case SUBTRACT_STAGE_SET_FINISH: + erts_free(ERTS_ALC_T_LIST_TRAP, context->u.rhs_set.alloc_start); + break; + default: + break; + } +} -#define SMALL_VEC_SIZE 10 -static Eterm subtract(Process* p, Eterm A, Eterm B) -{ - Eterm list; - Eterm* hp; - Uint need; - Eterm res; - Eterm small_vec[SMALL_VEC_SIZE]; /* Preallocated memory for small lists */ - Eterm* vec_p; - Eterm* vp; - Sint i; - Sint n; - Sint m; - - if ((n = erts_list_length(A)) < 0) { - BIF_ERROR(p, BADARG); +static int subtract_ctx_bin_dtor(Binary *context_bin) { + ErtsSubtractContext *context = ERTS_MAGIC_BIN_DATA(context_bin); + subtract_ctx_dtor(context); + return 1; +} + +static void subtract_ctx_move(ErtsSubtractContext *from, + ErtsSubtractContext *to) { + int uses_result_cdr = 0; + + to->stage = from->stage; + + to->lhs_original = from->lhs_original; + to->rhs_original = from->rhs_original; + + to->lhs_remaining = from->lhs_remaining; + to->rhs_remaining = from->rhs_remaining; + + to->iterator = from->iterator; + to->result = from->result; + + switch (to->stage) { + case SUBTRACT_STAGE_NAIVE_LHS: + sys_memcpy(to->u.lhs_elements, + from->u.lhs_elements, + sizeof(Eterm) * to->lhs_remaining); + break; + case SUBTRACT_STAGE_NAIVE_RHS: + sys_memcpy(to->u.rhs_elements, + from->u.rhs_elements, + sizeof(Eterm) * to->rhs_remaining); + + uses_result_cdr = 1; + break; + case SUBTRACT_STAGE_SET_FINISH: + uses_result_cdr = 1; + /* FALL THROUGH */ + case SUBTRACT_STAGE_SET_BUILD: + to->u.rhs_set.alloc_start = from->u.rhs_set.alloc_start; + to->u.rhs_set.alloc = from->u.rhs_set.alloc; + to->u.rhs_set.tree = from->u.rhs_set.tree; + break; + default: + break; } - if ((m = erts_list_length(B)) < 0) { - BIF_ERROR(p, BADARG); + + if (uses_result_cdr) { + if (from->result_cdr == &from->result) { + to->result_cdr = &to->result; + } else { + to->result_cdr = from->result_cdr; + } } - - if (n == 0) - BIF_RET(NIL); - if (m == 0) - BIF_RET(A); - - /* allocate element vector */ - if (n <= SMALL_VEC_SIZE) - vec_p = small_vec; - else - vec_p = (Eterm*) erts_alloc(ERTS_ALC_T_TMP, n * sizeof(Eterm)); - - /* PUT ALL ELEMENTS IN VP */ - vp = vec_p; - list = A; - i = n; - while(i--) { - Eterm* listp = list_val(list); - *vp++ = CAR(listp); - list = CDR(listp); +} + +static Eterm subtract_create_trap_state(Process *p, + ErtsSubtractContext *context) { + Binary *state_bin; + Eterm *hp; + + state_bin = erts_create_magic_binary(sizeof(ErtsSubtractContext), + subtract_ctx_bin_dtor); + + subtract_ctx_move(context, ERTS_MAGIC_BIN_DATA(state_bin)); + + hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE); + + return erts_mk_magic_ref(&hp, &MSO(p), state_bin); +} + +static int subtract_enter_len_lhs(Process *p, ErtsSubtractContext *context) { + context->stage = SUBTRACT_STAGE_LEN_LHS; + + context->iterator = context->lhs_original; + context->lhs_remaining = 0; + + return subtract_continue(p, context); +} + +static int subtract_enter_len_rhs(Process *p, ErtsSubtractContext *context) { + context->stage = SUBTRACT_STAGE_LEN_RHS; + + context->iterator = context->rhs_original; + context->rhs_remaining = 0; + + return subtract_continue(p, context); +} + +static int subtract_get_length(Process *p, Eterm *iterator_p, Uint *count_p) { + static const Sint ELEMENTS_PER_RED = 32; + + Sint budget, count; + Eterm iterator; + + budget = ELEMENTS_PER_RED * ERTS_BIF_REDS_LEFT(p); + iterator = *iterator_p; + +#ifdef DEBUG + budget = budget / 10 + 1; +#endif + + for (count = 0; count < budget && is_list(iterator); count++) { + iterator = CDR(list_val(iterator)); } - - /* UNMARK ALL DELETED CELLS */ - list = B; - m = 0; /* number of deleted elements */ - while(is_list(list)) { - Eterm* listp = list_val(list); - Eterm elem = CAR(listp); - i = n; - vp = vec_p; - while(i--) { - if (is_value(*vp) && eq(*vp, elem)) { - *vp = THE_NON_VALUE; - m++; - break; - } - vp++; - } - list = CDR(listp); + + if (!is_list(iterator) && !is_nil(iterator)) { + return -1; } - - if (m == n) /* All deleted ? */ - res = NIL; - else if (m == 0) /* None deleted ? */ - res = A; - else { /* REBUILD LIST */ - res = NIL; - need = 2*(n - m); - hp = HAlloc(p, need); - vp = vec_p + n - 1; - while(vp >= vec_p) { - if (is_value(*vp)) { - res = CONS(hp, *vp, res); - hp += 2; - } - vp--; - } + + BUMP_REDS(p, count / ELEMENTS_PER_RED); + + *iterator_p = iterator; + *count_p += count; + + if (is_nil(iterator)) { + return 1; } - if (vec_p != small_vec) - erts_free(ERTS_ALC_T_TMP, (void *) vec_p); - BIF_RET(res); + + return 0; } -BIF_RETTYPE ebif_minusminus_2(BIF_ALIST_2) -{ - return subtract(BIF_P, BIF_ARG_1, BIF_ARG_2); +static int subtract_enter_naive_lhs(Process *p, ErtsSubtractContext *context) { + Eterm iterator; + int i = 0; + + context->stage = SUBTRACT_STAGE_NAIVE_LHS; + + context->iterator = context->rhs_original; + context->result = NIL; + + iterator = context->lhs_original; + + while (is_list(iterator)) { + const Eterm *cell = list_val(iterator); + + ASSERT(i < SUBTRACT_LHS_THRESHOLD); + + context->u.lhs_elements[i++] = CAR(cell); + iterator = CDR(cell); + } + + ASSERT(i == context->lhs_remaining); + + return subtract_continue(p, context); } -BIF_RETTYPE subtract_2(BIF_ALIST_2) -{ - return subtract(BIF_P, BIF_ARG_1, BIF_ARG_2); +static int subtract_naive_lhs(Process *p, ErtsSubtractContext *context) { + const Sint CHECKS_PER_RED = 16; + Sint checks, budget; + + budget = CHECKS_PER_RED * ERTS_BIF_REDS_LEFT(p); + checks = 0; + + while (checks < budget && is_list(context->iterator)) { + const Eterm *cell; + Eterm value, next; + int found_at; + + cell = list_val(context->iterator); + + value = CAR(cell); + next = CDR(cell); + + for (found_at = 0; found_at < context->lhs_remaining; found_at++) { + if (EQ(value, context->u.lhs_elements[found_at])) { + /* We shift the array one step down as we have to preserve + * order. + * + * Note that we can't exit early as that would suppress errors + * in the right-hand side (this runs prior to determining the + * length of RHS). */ + + context->lhs_remaining--; + sys_memmove(&context->u.lhs_elements[found_at], + &context->u.lhs_elements[found_at + 1], + (context->lhs_remaining - found_at) * sizeof(Eterm)); + break; + } + } + + checks += MAX(1, context->lhs_remaining); + context->iterator = next; + } + + BUMP_REDS(p, MIN(checks, budget) / CHECKS_PER_RED); + + if (is_list(context->iterator)) { + return 0; + } else if (!is_nil(context->iterator)) { + return -1; + } + + if (context->lhs_remaining > 0) { + Eterm *hp; + int i; + + hp = HAlloc(p, context->lhs_remaining * 2); + + for (i = context->lhs_remaining - 1; i >= 0; i--) { + Eterm value = context->u.lhs_elements[i]; + + context->result = CONS(hp, value, context->result); + hp += 2; + } + } + + ASSERT(context->lhs_remaining > 0 || context->result == NIL); + + return 1; +} + +static int subtract_enter_naive_rhs(Process *p, ErtsSubtractContext *context) { + Eterm iterator; + int i = 0; + + context->stage = SUBTRACT_STAGE_NAIVE_RHS; + + context->iterator = context->lhs_original; + context->result_cdr = &context->result; + context->result = NIL; + + iterator = context->rhs_original; + + while (is_list(iterator)) { + const Eterm *cell = list_val(iterator); + + ASSERT(i < SUBTRACT_RHS_THRESHOLD); + + context->u.rhs_elements[i++] = CAR(cell); + iterator = CDR(cell); + } + + ASSERT(i == context->rhs_remaining); + + return subtract_continue(p, context); +} + +static int subtract_naive_rhs(Process *p, ErtsSubtractContext *context) { + const Sint CHECKS_PER_RED = 16; + Sint checks, budget; + + budget = CHECKS_PER_RED * ERTS_BIF_REDS_LEFT(p); + checks = 0; + +#ifdef DEBUG + budget = budget / 10 + 1; +#endif + + while (checks < budget && is_list(context->iterator)) { + const Eterm *cell; + Eterm value, next; + int found_at; + + cell = list_val(context->iterator); + value = CAR(cell); + next = CDR(cell); + + for (found_at = context->rhs_remaining - 1; found_at >= 0; found_at--) { + if (EQ(value, context->u.rhs_elements[found_at])) { + break; + } + } + + if (found_at < 0) { + /* Destructively add the value to the result. This is safe + * since the GC is disabled and the unfinished term is never + * leaked to the outside world. */ + Eterm *hp = HAllocX(p, 2, context->lhs_remaining * 2); + + *context->result_cdr = make_list(hp); + context->result_cdr = &CDR(hp); + + CAR(hp) = value; + } else if (found_at >= 0) { + Eterm swap; + + if (context->rhs_remaining-- == 1) { + /* We've run out of items to remove, so the rest of the + * result will be equal to the remainder of the input. We know + * that LHS is well-formed as any errors would've been reported + * during length determination. */ + *context->result_cdr = next; + + BUMP_REDS(p, MIN(budget, checks) / CHECKS_PER_RED); + + return 1; + } + + swap = context->u.rhs_elements[context->rhs_remaining]; + context->u.rhs_elements[found_at] = swap; + } + + checks += context->rhs_remaining; + context->iterator = next; + context->lhs_remaining--; + } + + /* The result only has to be terminated when returning it to the user, but + * we're doing it when trapping as well to prevent headaches when + * debugging. */ + *context->result_cdr = NIL; + + BUMP_REDS(p, MIN(budget, checks) / CHECKS_PER_RED); + + if (is_list(context->iterator)) { + ASSERT(context->lhs_remaining > 0 && context->rhs_remaining > 0); + return 0; + } + + return 1; } +static int subtract_enter_set_build(Process *p, ErtsSubtractContext *context) { + context->stage = SUBTRACT_STAGE_SET_BUILD; + + context->u.rhs_set.alloc_start = + erts_alloc(ERTS_ALC_T_LIST_TRAP, + context->rhs_remaining * sizeof(subtract_tree_t)); + + context->u.rhs_set.alloc = context->u.rhs_set.alloc_start; + context->u.rhs_set.tree = NULL; + + context->iterator = context->rhs_original; + + return subtract_continue(p, context); +} + +static int subtract_set_build(Process *p, ErtsSubtractContext *context) { + const static Sint INSERTIONS_PER_RED = 16; + Sint budget, insertions; + + budget = INSERTIONS_PER_RED * ERTS_BIF_REDS_LEFT(p); + insertions = 0; + +#ifdef DEBUG + budget = budget / 10 + 1; +#endif + + while (insertions < budget && is_list(context->iterator)) { + subtract_tree_t *existing_node, *new_node; + const Eterm *cell; + Eterm value, next; + + cell = list_val(context->iterator); + value = CAR(cell); + next = CDR(cell); + + new_node = context->u.rhs_set.alloc; + new_node->key = value; + new_node->count = 1; + + existing_node = subtract_rbt_lookup_insert(&context->u.rhs_set.tree, + new_node); + + if (existing_node != NULL) { + existing_node->count++; + } else { + context->u.rhs_set.alloc++; + } + + context->iterator = next; + insertions++; + } + + BUMP_REDS(p, insertions / INSERTIONS_PER_RED); + + ASSERT(is_list(context->iterator) || is_nil(context->iterator)); + ASSERT(context->u.rhs_set.tree != NULL); + + return is_nil(context->iterator); +} + +static int subtract_enter_set_finish(Process *p, ErtsSubtractContext *context) { + context->stage = SUBTRACT_STAGE_SET_FINISH; + + context->result_cdr = &context->result; + context->result = NIL; + + context->iterator = context->lhs_original; + + return subtract_continue(p, context); +} + +static int subtract_set_finish(Process *p, ErtsSubtractContext *context) { + const Sint CHECKS_PER_RED = 8; + Sint checks, budget; + + budget = CHECKS_PER_RED * ERTS_BIF_REDS_LEFT(p); + checks = 0; + +#ifdef DEBUG + budget = budget / 10 + 1; +#endif + + while (checks < budget && is_list(context->iterator)) { + subtract_tree_t *node; + const Eterm *cell; + Eterm value, next; + + cell = list_val(context->iterator); + value = CAR(cell); + next = CDR(cell); + + ASSERT(context->rhs_remaining > 0); + + node = subtract_rbt_lookup(context->u.rhs_set.tree, value); + + if (node == NULL) { + Eterm *hp = HAllocX(p, 2, context->lhs_remaining * 2); + + *context->result_cdr = make_list(hp); + context->result_cdr = &CDR(hp); + + CAR(hp) = value; + } else { + if (context->rhs_remaining-- == 1) { + *context->result_cdr = next; + + BUMP_REDS(p, checks / CHECKS_PER_RED); + + return 1; + } + + if (node->count-- == 1) { + subtract_rbt_delete(&context->u.rhs_set.tree, node); + } + } + + context->iterator = next; + context->lhs_remaining--; + checks++; + } + + *context->result_cdr = NIL; + + BUMP_REDS(p, checks / CHECKS_PER_RED); + + if (is_list(context->iterator)) { + ASSERT(context->lhs_remaining > 0 && context->rhs_remaining > 0); + return 0; + } + + return 1; +} + +static int subtract_continue(Process *p, ErtsSubtractContext *context) { + switch (context->stage) { + case SUBTRACT_STAGE_START: { + return subtract_enter_len_lhs(p, context); + } + + case SUBTRACT_STAGE_LEN_LHS: { + int res = subtract_get_length(p, + &context->iterator, + &context->lhs_remaining); + + if (res != 1) { + return res; + } + + if (context->lhs_remaining <= SUBTRACT_LHS_THRESHOLD) { + return subtract_enter_naive_lhs(p, context); + } + + return subtract_enter_len_rhs(p, context); + } + + case SUBTRACT_STAGE_NAIVE_LHS: { + return subtract_naive_lhs(p, context); + } + + case SUBTRACT_STAGE_LEN_RHS: { + int res = subtract_get_length(p, + &context->iterator, + &context->rhs_remaining); + + if (res != 1) { + return res; + } + + /* We've walked through both lists fully now so we no longer need + * to check for errors past this point. */ + + if (context->rhs_remaining <= SUBTRACT_RHS_THRESHOLD) { + return subtract_enter_naive_rhs(p, context); + } + + return subtract_enter_set_build(p, context); + } + + case SUBTRACT_STAGE_NAIVE_RHS: { + return subtract_naive_rhs(p, context); + } + + case SUBTRACT_STAGE_SET_BUILD: { + int res = subtract_set_build(p, context); + + if (res != 1) { + return res; + } + + return subtract_enter_set_finish(p, context); + } + + case SUBTRACT_STAGE_SET_FINISH: { + return subtract_set_finish(p, context); + } + + default: + ERTS_ASSERT(!"unreachable"); + } +} + +static int subtract_start(Process *p, Eterm lhs, Eterm rhs, + ErtsSubtractContext *context) { + context->stage = SUBTRACT_STAGE_START; + + context->lhs_original = lhs; + context->rhs_original = rhs; + + return subtract_continue(p, context); +} + +/* erlang:'--'/2 */ +static Eterm subtract(Export *bif_entry, BIF_ALIST_2) { + Eterm lhs = BIF_ARG_1, rhs = BIF_ARG_2; + + if ((is_list(lhs) || is_nil(lhs)) && (is_list(rhs) || is_nil(rhs))) { + /* We start with the context on the stack in the hopes that we won't + * have to trap. */ + ErtsSubtractContext context; + int res; + + res = subtract_start(BIF_P, lhs, rhs, &context); + + if (res == 0) { + Eterm state_mref; + + state_mref = subtract_create_trap_state(BIF_P, &context); + erts_set_gc_state(BIF_P, 0); + + BIF_TRAP2(bif_entry, BIF_P, state_mref, NIL); + } + + subtract_ctx_dtor(&context); + + if (res < 0) { + BIF_ERROR(BIF_P, BADARG); + } + + BIF_RET(context.result); + } else if (is_internal_magic_ref(lhs)) { + ErtsSubtractContext *context; + int (*dtor)(Binary*); + Binary *magic_bin; + + int res; + + magic_bin = erts_magic_ref2bin(lhs); + dtor = ERTS_MAGIC_BIN_DESTRUCTOR(magic_bin); + + if (dtor != subtract_ctx_bin_dtor) { + BIF_ERROR(BIF_P, BADARG); + } + + ASSERT(BIF_P->flags & F_DISABLE_GC); + ASSERT(rhs == NIL); + + context = ERTS_MAGIC_BIN_DATA(magic_bin); + res = subtract_continue(BIF_P, context); + + if (res == 0) { + BIF_TRAP2(bif_entry, BIF_P, lhs, NIL); + } + + erts_set_gc_state(BIF_P, 1); + + if (res < 0) { + ERTS_BIF_ERROR_TRAPPED2(BIF_P, BADARG, bif_entry, + context->lhs_original, + context->rhs_original); + } + + BIF_RET(context->result); + } + + ASSERT(!(BIF_P->flags & F_DISABLE_GC)); + + BIF_ERROR(BIF_P, BADARG); +} + +BIF_RETTYPE ebif_minusminus_2(BIF_ALIST_2) { + return subtract(bif_export[BIF_ebif_minusminus_2], BIF_CALL_ARGS); +} + +BIF_RETTYPE subtract_2(BIF_ALIST_2) { + return subtract(bif_export[BIF_subtract_2], BIF_CALL_ARGS); +} + + BIF_RETTYPE lists_member_2(BIF_ALIST_2) { Eterm term; @@ -283,7 +905,7 @@ static BIF_RETTYPE lists_reverse_alloc(Process *c_p, { static const Uint CELLS_PER_RED = 40; - Eterm *heap_top, *heap_end; + Eterm *alloc_top, *alloc_end; Uint cells_left, max_cells; Eterm list, tail; Eterm lookahead; @@ -305,18 +927,18 @@ static BIF_RETTYPE lists_reverse_alloc(Process *c_p, BIF_ERROR(c_p, BADARG); } - heap_top = HAlloc(c_p, 2 * (max_cells - cells_left)); - heap_end = heap_top + 2 * (max_cells - cells_left); + alloc_top = HAlloc(c_p, 2 * (max_cells - cells_left)); + alloc_end = alloc_top + 2 * (max_cells - cells_left); - while (heap_top < heap_end) { + while (alloc_top < alloc_end) { Eterm *pair = list_val(list); - tail = CONS(heap_top, CAR(pair), tail); + tail = CONS(alloc_top, CAR(pair), tail); list = CDR(pair); ASSERT(is_list(list) || is_nil(list)); - heap_top += 2; + alloc_top += 2; } if (is_nil(list)) { @@ -333,7 +955,7 @@ static BIF_RETTYPE lists_reverse_onheap(Process *c_p, { static const Uint CELLS_PER_RED = 60; - Eterm *heap_top, *heap_end; + Eterm *alloc_start, *alloc_top, *alloc_end; Uint cells_left, max_cells; Eterm list, tail; @@ -343,21 +965,27 @@ static BIF_RETTYPE lists_reverse_onheap(Process *c_p, cells_left = max_cells = CELLS_PER_RED * (1 + ERTS_BIF_REDS_LEFT(c_p)); ASSERT(HEAP_LIMIT(c_p) >= HEAP_TOP(c_p) + 2); - heap_end = HEAP_LIMIT(c_p) - 2; - heap_top = HEAP_TOP(c_p); + alloc_start = HEAP_TOP(c_p); + alloc_end = HEAP_LIMIT(c_p) - 2; + alloc_top = alloc_start; + + /* Don't process more cells than we have reductions for. */ + alloc_end = MIN(alloc_top + (cells_left * 2), alloc_end); - while (heap_top < heap_end && is_list(list)) { + while (alloc_top < alloc_end && is_list(list)) { Eterm *pair = list_val(list); - tail = CONS(heap_top, CAR(pair), tail); + tail = CONS(alloc_top, CAR(pair), tail); list = CDR(pair); - heap_top += 2; + alloc_top += 2; } - cells_left -= (heap_top - heap_end) / 2; + cells_left -= (alloc_top - alloc_start) / 2; + HEAP_TOP(c_p) = alloc_top; + + ASSERT(cells_left >= 0 && cells_left <= max_cells); BUMP_REDS(c_p, (max_cells - cells_left) / CELLS_PER_RED); - HEAP_TOP(c_p) = heap_top; if (is_nil(list)) { BIF_RET(tail); diff --git a/erts/emulator/beam/erl_utils.h b/erts/emulator/beam/erl_utils.h index b3bfa69052..880febba8b 100644 --- a/erts/emulator/beam/erl_utils.h +++ b/erts/emulator/beam/erl_utils.h @@ -22,6 +22,7 @@ #define ERL_UTILS_H__ #include "sys.h" +#include "atom.h" #include "erl_printf.h" struct process; @@ -112,10 +113,12 @@ int eq(Eterm, Eterm); #define EQ(x,y) (((x) == (y)) || (is_not_both_immed((x),(y)) && eq((x),(y)))) -int erts_cmp_atoms(Eterm a, Eterm b); -Sint erts_cmp(Eterm, Eterm, int, int); -Sint erts_cmp_compound(Eterm, Eterm, int, int); +ERTS_GLB_INLINE Sint erts_cmp(Eterm, Eterm, int, int); +ERTS_GLB_INLINE int erts_cmp_atoms(Eterm a, Eterm b); + Sint cmp(Eterm a, Eterm b); +Sint erts_cmp_compound(Eterm, Eterm, int, int); + #define CMP(A,B) erts_cmp(A,B,0,0) #define CMP_TERM(A,B) erts_cmp(A,B,1,0) #define CMP_EQ_ONLY(A,B) erts_cmp(A,B,0,1) @@ -150,4 +153,56 @@ Sint cmp(Eterm a, Eterm b); if (erts_cmp_compound(X,Y,0,EqOnly) Op 0) { Action; }; \ } +#define erts_float_comp(x,y) (((x)<(y)) ? -1 : (((x)==(y)) ? 0 : 1)) + +#if ERTS_GLB_INLINE_INCL_FUNC_DEF + +ERTS_GLB_INLINE int erts_cmp_atoms(Eterm a, Eterm b) { + Atom *aa = atom_tab(atom_val(a)); + Atom *bb = atom_tab(atom_val(b)); + + byte *name_a, *name_b; + int len_a, len_b, diff; + + diff = aa->ord0 - bb->ord0; + + if (diff != 0) { + return diff; + } + + name_a = &aa->name[3]; + name_b = &bb->name[3]; + len_a = aa->len-3; + len_b = bb->len-3; + + if (len_a > 0 && len_b > 0) { + diff = sys_memcmp(name_a, name_b, MIN(len_a, len_b)); + + if (diff != 0) { + return diff; + } + } + + return len_a - len_b; +} + +ERTS_GLB_INLINE Sint erts_cmp(Eterm a, Eterm b, int exact, int eq_only) { + if (is_atom(a) && is_atom(b)) { + return erts_cmp_atoms(a, b); + } else if (is_both_small(a, b)) { + return (signed_val(a) - signed_val(b)); + } else if (is_float(a) && is_float(b)) { + FloatDef af, bf; + + GET_DOUBLE(a, af); + GET_DOUBLE(b, bf); + + return erts_float_comp(af.fd, bf.fd); + } + + return erts_cmp_compound(a,b,exact,eq_only); +} + +#endif /* ERTS_GLB_INLINE_INCL_FUNC_DEF */ + #endif diff --git a/erts/emulator/beam/external.c b/erts/emulator/beam/external.c index 621ba108ba..9a66e491f3 100644 --- a/erts/emulator/beam/external.c +++ b/erts/emulator/beam/external.c @@ -1953,7 +1953,8 @@ static Eterm erts_term_to_binary_int(Process* p, Eterm Term, int level, Uint fla #define RETURN_STATE() \ do { \ - hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE+3); \ + static const int TUPLE2_SIZE = 2 + 1; \ + hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE + TUPLE2_SIZE); \ c_term = erts_mk_magic_ref(&hp, &MSO(p), context_b); \ res = TUPLE2(hp, Term, c_term); \ BUMP_ALL_REDS(p); \ diff --git a/erts/emulator/beam/utils.c b/erts/emulator/beam/utils.c index 08f8ca9788..d81bd89a48 100644 --- a/erts/emulator/beam/utils.c +++ b/erts/emulator/beam/utils.c @@ -2615,27 +2615,6 @@ not_equal: } -/* - * Lexically compare two strings of bytes (string s1 length l1 and s2 l2). - * - * s1 < s2 return -1 - * s1 = s2 return 0 - * s1 > s2 return +1 - */ -static int cmpbytes(byte *s1, int l1, byte *s2, int l2) -{ - int i; - i = 0; - while((i < l1) && (i < l2)) { - if (s1[i] < s2[i]) return(-1); - if (s1[i] > s2[i]) return(1); - i++; - } - if (l1 < l2) return(-1); - if (l1 > l2) return(1); - return(0); -} - /* * Compare objects. @@ -2649,20 +2628,6 @@ static int cmpbytes(byte *s1, int l1, byte *s2, int l2) * */ - -#define float_comp(x,y) (((x)<(y)) ? -1 : (((x)==(y)) ? 0 : 1)) - -int erts_cmp_atoms(Eterm a, Eterm b) -{ - Atom *aa = atom_tab(atom_val(a)); - Atom *bb = atom_tab(atom_val(b)); - int diff = aa->ord0 - bb->ord0; - if (diff) - return diff; - return cmpbytes(aa->name+3, aa->len-3, - bb->name+3, bb->len-3); -} - /* cmp(Eterm a, Eterm b) * For compatibility with HiPE - arith-based compare. */ @@ -2673,22 +2638,6 @@ Sint cmp(Eterm a, Eterm b) Sint erts_cmp_compound(Eterm a, Eterm b, int exact, int eq_only); -Sint erts_cmp(Eterm a, Eterm b, int exact, int eq_only) -{ - if (is_atom(a) && is_atom(b)) { - return erts_cmp_atoms(a, b); - } else if (is_both_small(a, b)) { - return (signed_val(a) - signed_val(b)); - } else if (is_float(a) && is_float(b)) { - FloatDef af, bf; - GET_DOUBLE(a, af); - GET_DOUBLE(b, bf); - return float_comp(af.fd, bf.fd); - } - return erts_cmp_compound(a,b,exact,eq_only); -} - - /* erts_cmp(Eterm a, Eterm b, int exact) * exact = 1 -> term-based compare * exact = 0 -> arith-based compare @@ -2985,7 +2934,7 @@ tailrecur_ne: GET_DOUBLE(a, af); GET_DOUBLE(b, bf); - ON_CMP_GOTO(float_comp(af.fd, bf.fd)); + ON_CMP_GOTO(erts_float_comp(af.fd, bf.fd)); } case (_TAG_HEADER_POS_BIG >> _TAG_PRIMARY_SIZE): case (_TAG_HEADER_NEG_BIG >> _TAG_PRIMARY_SIZE): @@ -3022,10 +2971,7 @@ tailrecur_ne: ErlFunThing* f2 = (ErlFunThing *) fun_val(b); Sint diff; - diff = cmpbytes(atom_tab(atom_val(f1->fe->module))->name, - atom_tab(atom_val(f1->fe->module))->len, - atom_tab(atom_val(f2->fe->module))->name, - atom_tab(atom_val(f2->fe->module))->len); + diff = erts_cmp_atoms((f1->fe)->module, (f2->fe)->module); if (diff != 0) { RETURN_NEQ(diff); } @@ -3219,7 +3165,7 @@ tailrecur_ne: if (f2.fd < MAX_LOSSLESS_FLOAT && f2.fd > MIN_LOSSLESS_FLOAT) { /* Float is within the no loss limit */ f1.fd = signed_val(aw); - j = float_comp(f1.fd, f2.fd); + j = erts_float_comp(f1.fd, f2.fd); } #if ERTS_SIZEOF_ETERM == 8 else if (f2.fd > (double) (MAX_SMALL + 1)) { @@ -3266,7 +3212,7 @@ tailrecur_ne: if (big_to_double(aw, &f1.fd) < 0) { j = big_sign(aw) ? -1 : 1; } else { - j = float_comp(f1.fd, f2.fd); + j = erts_float_comp(f1.fd, f2.fd); } } else { big = double_to_big(f2.fd, big_buf, sizeof(big_buf)/sizeof(Eterm)); @@ -3282,7 +3228,7 @@ tailrecur_ne: if (f1.fd < MAX_LOSSLESS_FLOAT && f1.fd > MIN_LOSSLESS_FLOAT) { /* Float is within the no loss limit */ f2.fd = signed_val(bw); - j = float_comp(f1.fd, f2.fd); + j = erts_float_comp(f1.fd, f2.fd); } #if ERTS_SIZEOF_ETERM == 8 else if (f1.fd > (double) (MAX_SMALL + 1)) { diff --git a/lib/compiler/src/beam_jump.erl b/lib/compiler/src/beam_jump.erl index 9eee56d604..22974da398 100644 --- a/lib/compiler/src/beam_jump.erl +++ b/lib/compiler/src/beam_jump.erl @@ -128,7 +128,7 @@ %%% on the program state. %%% --import(lists, [reverse/1,reverse/2,foldl/3]). +-import(lists, [dropwhile/2,reverse/1,reverse/2,foldl/3]). -type instruction() :: beam_utils:instruction(). @@ -411,14 +411,19 @@ opt_useless_loads([{test,_,{f,L},_}=I|Is], L, St) -> opt_useless_loads(Is, _L, St) -> {Is,St}. -opt_useless_block_loads([{set,[Dst],_,_}=I|Is], L, Index) -> - BlockJump = [{block,Is},{jump,{f,L}}], +opt_useless_block_loads([{set,[Dst],_,_}=I|Is0], L, Index) -> + BlockJump = [{block,Is0},{jump,{f,L}}], case beam_utils:is_killed(Dst, BlockJump, Index) of true -> - %% The register is killed and not used, we can remove the load + %% The register is killed and not used, we can remove the load. + %% Remove any `put` instructions in case we just + %% removed a `put_tuple` instruction. + Is = dropwhile(fun({set,_,_,put}) -> true; + (_) -> false + end, Is0), opt_useless_block_loads(Is, L, Index); false -> - [I|opt_useless_block_loads(Is, L, Index)] + [I|opt_useless_block_loads(Is0, L, Index)] end; opt_useless_block_loads([I|Is], L, Index) -> [I|opt_useless_block_loads(Is, L, Index)]; diff --git a/lib/compiler/test/beam_jump_SUITE.erl b/lib/compiler/test/beam_jump_SUITE.erl index c61e4ab65c..faedc0c1f1 100644 --- a/lib/compiler/test/beam_jump_SUITE.erl +++ b/lib/compiler/test/beam_jump_SUITE.erl @@ -21,7 +21,8 @@ -export([all/0,suite/0,groups/0,init_per_suite/1,end_per_suite/1, init_per_group/2,end_per_group/2, - undefined_label/1,ambiguous_catch_try_state/1]). + undefined_label/1,ambiguous_catch_try_state/1, + build_tuple/1]). suite() -> [{ct_hooks,[ts_install_cth]}]. @@ -32,7 +33,8 @@ all() -> groups() -> [{p,[parallel], [undefined_label, - ambiguous_catch_try_state + ambiguous_catch_try_state, + build_tuple ]}]. init_per_suite(Config) -> @@ -72,3 +74,16 @@ river() -> song. checks(Wanted) -> %% Must be one line to cause the unsafe optimization. {catch case river() of sheet -> begin +Wanted, if "da" -> Wanted end end end, catch case river() of sheet -> begin + Wanted, if "da" -> Wanted end end end}. + +-record(message2, {id, p1}). +-record(message3, {id, p1, p2}). + +build_tuple(_Config) -> + {'EXIT',{{badrecord,message3},_}} = (catch do_build_tuple(#message2{})), + ok. + +do_build_tuple(Message) -> + if is_record(Message, message2) -> + Res = {res, rand:uniform(100)}, + {Message#message3.id, Res} + end. diff --git a/lib/stdlib/doc/src/lists.xml b/lib/stdlib/doc/src/lists.xml index c3d5d7e07a..e4215a5336 100644 --- a/lib/stdlib/doc/src/lists.xml +++ b/lib/stdlib/doc/src/lists.xml @@ -850,14 +850,6 @@ splitwith(Pred, List) -> > <input>lists:subtract("123212", "212").</input> "312".</pre> <p><c>lists:subtract(A, B)</c> is equivalent to <c>A -- B</c>.</p> - <warning> - <p>The complexity of <c>lists:subtract(A, B)</c> is proportional to - <c>length(A)*length(B)</c>, meaning that it is very slow if both - <c>A</c> and <c>B</c> are long lists. (If both lists are long, it - is a much better choice to use ordered lists and - <seealso marker="ordsets#subtract/2"> - <c>ordsets:subtract/2</c></seealso>.</p> - </warning> </desc> </func> diff --git a/lib/stdlib/test/lists_SUITE.erl b/lib/stdlib/test/lists_SUITE.erl index 837ab4e97e..9a94bcc012 100644 --- a/lib/stdlib/test/lists_SUITE.erl +++ b/lib/stdlib/test/lists_SUITE.erl @@ -2597,6 +2597,13 @@ subtract(Config) when is_list(Config) -> {'EXIT',_} = (catch sub([a|b], [])), {'EXIT',_} = (catch sub([a|b], [a])), + %% Trapping, both crashing and otherwise. + [sub_trapping(N) || N <- lists:seq(0, 18)], + + %% The current implementation chooses which algorithm to use based on + %% certain thresholds, and we need proper coverage for all corner cases. + [sub_thresholds(N) || N <- lists:seq(0, 32)], + ok. sub_non_matching(A, B) -> @@ -2606,6 +2613,41 @@ sub(A, B) -> Res = A -- B, Res = lists:subtract(A, B). +sub_trapping(N) -> + List = lists:duplicate(N + (1 bsl N), gurka), + ImproperList = List ++ crash, + + {'EXIT',_} = (catch sub_trapping_1(ImproperList, [])), + {'EXIT',_} = (catch sub_trapping_1(List, ImproperList)), + + List = List -- lists:duplicate(N + (1 bsl N), gaffel), + ok = sub_trapping_1(List, []). + +sub_trapping_1([], _) -> ok; +sub_trapping_1(L, R) -> sub_trapping_1(L -- R, [gurka | R]). + +sub_thresholds(N) -> + %% This needs to be long enough to cause trapping. + OtherLen = 1 bsl 18, + Other = lists:seq(0, OtherLen - 1), + + Disjoint = lists:seq(-N, -1), + Subset = lists:seq(1, N), + + %% LHS is disjoint from RHS, so all elements must be retained. + Disjoint = Disjoint -- Other, + + %% LHS is covered by RHS, so all elements must be removed. + [] = Subset -- Other, + + %% RHS is disjoint from LHS, so all elements must be retained. + Other = Other -- Disjoint, + + %% RHS is covered by LHS, so N elements must be removed. + N = OtherLen - length(Other -- Subset), + + ok. + %% Test lists:droplast/1 droplast(Config) when is_list(Config) -> [] = lists:droplast([x]), diff --git a/system/doc/efficiency_guide/commoncaveats.xml b/system/doc/efficiency_guide/commoncaveats.xml index b41ffc3902..367da09ba3 100644 --- a/system/doc/efficiency_guide/commoncaveats.xml +++ b/system/doc/efficiency_guide/commoncaveats.xml @@ -169,53 +169,5 @@ multiple_setelement(T0) -> {Bin1,Bin2} = split_binary(Bin, Num)</code> </section> - <section> - <title>Operator "--"</title> - <p>The "<c>--</c>" operator has a complexity - proportional to the product of the length of its operands. - This means that the operator is very slow if both of its operands - are long lists:</p> - - <p><em>DO NOT</em></p> - <code type="none"><![CDATA[ - HugeList1 -- HugeList2]]></code> - - <p>Instead use the <seealso marker="stdlib:ordsets">ordsets</seealso> - module in STDLIB:</p> - - <p><em>DO</em></p> - <code type="none"> - HugeSet1 = ordsets:from_list(HugeList1), - HugeSet2 = ordsets:from_list(HugeList2), - ordsets:subtract(HugeSet1, HugeSet2)</code> - - <p>Obviously, that code does not work if the original order - of the list is important. If the order of the list must be - preserved, do as follows:</p> - - <p><em>DO</em></p> - <code type="none"><![CDATA[ - Set = gb_sets:from_list(HugeList2), - [E || E <- HugeList1, not gb_sets:is_element(E, Set)]]]></code> - - <note><p>This code behaves differently from "<c>--</c>" - if the lists contain duplicate elements (one occurrence - of an element in HugeList2 removes <em>all</em> - occurrences in HugeList1.)</p> - <p>Also, this code compares lists elements using the - "<c>==</c>" operator, while "<c>--</c>" uses the "<c>=:=</c>" operator. - If that difference is important, <c>sets</c> can be used instead of - <c>gb_sets</c>, but <c>sets:from_list/1</c> is much - slower than <c>gb_sets:from_list/1</c> for long lists.</p></note> - - <p>Using the "<c>--</c>" operator to delete an element - from a list is not a performance problem:</p> - - <p><em>OK</em></p> - <code type="none"> - HugeList1 -- [Element]</code> - - </section> - </chapter> diff --git a/system/doc/efficiency_guide/retired_myths.xml b/system/doc/efficiency_guide/retired_myths.xml index 9b914a3b6e..144c942c2b 100644 --- a/system/doc/efficiency_guide/retired_myths.xml +++ b/system/doc/efficiency_guide/retired_myths.xml @@ -60,4 +60,18 @@ That leads us to the myth that tail-recursive functions are faster than body-recursive functions.</p> </section> + + <section> + <title>Myth: List subtraction ("--" operator) is slow</title> + + <p>List subtraction used to have a run-time complexity proportional to the + product of the length of its operands, so it was extremely slow when both + lists were long.</p> + + <p>As of OTP 22 the run-time complexity is "n log n" and the operation will + complete quickly even when both lists are very long. In fact, it is + faster and uses less memory than the commonly used workaround to convert + both lists to ordered sets before subtracting them with + <c>ordsets:subtract/2</c>.</p> + </section> </chapter> |