diff options
author | Erlang/OTP <[email protected]> | 2018-11-05 10:48:25 +0100 |
---|---|---|
committer | Erlang/OTP <[email protected]> | 2018-11-05 10:48:25 +0100 |
commit | 6605e23f1c622cd6e30a523891f00f3e6ea746f8 (patch) | |
tree | 36119b24784e1b54770a4ca61680f9e9f219c5b6 | |
parent | 1da4135c98d9160d2d890724eb423db1fd1e39a2 (diff) | |
parent | bf5886b790f8f386ed425f543506a4bebb48448c (diff) | |
download | otp-6605e23f1c622cd6e30a523891f00f3e6ea746f8.tar.gz otp-6605e23f1c622cd6e30a523891f00f3e6ea746f8.tar.bz2 otp-6605e23f1c622cd6e30a523891f00f3e6ea746f8.zip |
Merge branch 'john/erts/OTP-20.3.8/minusminus_trapping/OTP-15371' into maint-20
* john/erts/OTP-20.3.8/minusminus_trapping/OTP-15371:
Optimize operator '--' and yield on large inputs
-rw-r--r-- | erts/emulator/beam/erl_alloc.types | 1 | ||||
-rw-r--r-- | erts/emulator/beam/erl_bif_lists.c | 797 | ||||
-rw-r--r-- | lib/stdlib/doc/src/lists.xml | 8 | ||||
-rw-r--r-- | lib/stdlib/test/lists_SUITE.erl | 42 | ||||
-rw-r--r-- | system/doc/efficiency_guide/commoncaveats.xml | 48 | ||||
-rw-r--r-- | system/doc/efficiency_guide/retired_myths.xml | 14 |
6 files changed, 767 insertions, 143 deletions
diff --git a/erts/emulator/beam/erl_alloc.types b/erts/emulator/beam/erl_alloc.types index 252bf1cc7e..af1133b853 100644 --- a/erts/emulator/beam/erl_alloc.types +++ b/erts/emulator/beam/erl_alloc.types @@ -322,6 +322,7 @@ type THR_PRGR_DATA LONG_LIVED SYSTEM thr_prgr_data type T_THR_PRGR_DATA SHORT_LIVED SYSTEM temp_thr_prgr_data type RELEASE_LAREA SHORT_LIVED SYSTEM release_literal_area +endif +type LIST_TRAP SHORT_LIVED PROCESSES list_bif_trap_state # # Types used for special emulators diff --git a/erts/emulator/beam/erl_bif_lists.c b/erts/emulator/beam/erl_bif_lists.c index 73d327da3e..94a41c285a 100644 --- a/erts/emulator/beam/erl_bif_lists.c +++ b/erts/emulator/beam/erl_bif_lists.c @@ -29,12 +29,13 @@ #include "sys.h" #include "erl_vm.h" #include "global.h" -#include "erl_process.h" -#include "error.h" #include "bif.h" +#include "erl_binary.h" + static Eterm keyfind(int Bif, Process* p, Eterm Key, Eterm Pos, Eterm List); + static BIF_RETTYPE append(Process* p, Eterm A, Eterm B) { Eterm list; @@ -146,103 +147,725 @@ BIF_RETTYPE append_2(BIF_ALIST_2) return append(BIF_P, BIF_ARG_1, BIF_ARG_2); } -/* - * erlang:'--'/2 - */ +/* erlang:'--'/2 + * + * Subtracts a list from another (LHS -- RHS), removing the first occurrence of + * each element in LHS from RHS. There is no type coercion so the elements must + * match exactly. + * + * The BIF is broken into several stages that can all trap individually, and it + * chooses its algorithm based on input size. If either input is small it will + * use a linear scan tuned to which side it's on, and if both inputs are large + * enough it will convert RHS into a multiset to provide good asymptotic + * behavior. */ -#define SMALL_VEC_SIZE 10 -static Eterm subtract(Process* p, Eterm A, Eterm B) -{ - Eterm list; - Eterm* hp; - Uint need; - Eterm res; - Eterm small_vec[SMALL_VEC_SIZE]; /* Preallocated memory for small lists */ - Eterm* vec_p; - Eterm* vp; - Sint i; - Sint n; - Sint m; - - if ((n = erts_list_length(A)) < 0) { - BIF_ERROR(p, BADARG); +#define SUBTRACT_LHS_THRESHOLD 16 +#define SUBTRACT_RHS_THRESHOLD 16 + +typedef enum { + SUBTRACT_STAGE_START, + SUBTRACT_STAGE_LEN_LHS, + + /* Naive linear scan that's efficient when + * LEN_LHS <= SUBTRACT_LHS_THRESHOLD. */ + SUBTRACT_STAGE_NAIVE_LHS, + + SUBTRACT_STAGE_LEN_RHS, + + /* As SUBTRACT_STAGE_NAIVE_LHS but for RHS. */ + SUBTRACT_STAGE_NAIVE_RHS, + + /* Creates a multiset from RHS for faster lookups before sweeping through + * LHS. The set is implemented as a red-black tree and duplicate elements + * are handled by a counter on each node. */ + SUBTRACT_STAGE_SET_BUILD, + SUBTRACT_STAGE_SET_FINISH +} ErtsSubtractCtxStage; + +typedef struct subtract_node__ { + struct subtract_node__ *parent; + struct subtract_node__ *left; + struct subtract_node__ *right; + int is_red; + + Eterm key; + Uint count; +} subtract_tree_t; + +typedef struct { + ErtsSubtractCtxStage stage; + + Eterm lhs_original; + Eterm rhs_original; + + Uint lhs_remaining; + Uint rhs_remaining; + + Eterm iterator; + + Eterm *result_cdr; + Eterm result; + + union { + Eterm lhs_elements[SUBTRACT_LHS_THRESHOLD]; + Eterm rhs_elements[SUBTRACT_RHS_THRESHOLD]; + + struct { + subtract_tree_t *tree; + + /* A memory area for the tree's nodes, saving us the need to have + * one allocation per node. */ + subtract_tree_t *alloc_start; + subtract_tree_t *alloc; + } rhs_set; + } u; +} ErtsSubtractContext; + +#define ERTS_RBT_PREFIX subtract +#define ERTS_RBT_T subtract_tree_t +#define ERTS_RBT_KEY_T Eterm +#define ERTS_RBT_FLAGS_T int +#define ERTS_RBT_INIT_EMPTY_TNODE(T) \ + do { \ + (T)->parent = NULL; \ + (T)->left = NULL; \ + (T)->right = NULL; \ + } while(0) +#define ERTS_RBT_IS_RED(T) ((T)->is_red) +#define ERTS_RBT_SET_RED(T) ((T)->is_red = 1) +#define ERTS_RBT_IS_BLACK(T) (!ERTS_RBT_IS_RED(T)) +#define ERTS_RBT_SET_BLACK(T) ((T)->is_red = 0) +#define ERTS_RBT_GET_FLAGS(T) ((T)->is_red) +#define ERTS_RBT_SET_FLAGS(T, F) ((T)->is_red = F) +#define ERTS_RBT_GET_PARENT(T) ((T)->parent) +#define ERTS_RBT_SET_PARENT(T, P) ((T)->parent = P) +#define ERTS_RBT_GET_RIGHT(T) ((T)->right) +#define ERTS_RBT_SET_RIGHT(T, R) ((T)->right = (R)) +#define ERTS_RBT_GET_LEFT(T) ((T)->left) +#define ERTS_RBT_SET_LEFT(T, L) ((T)->left = (L)) +#define ERTS_RBT_GET_KEY(T) ((T)->key) +#define ERTS_RBT_IS_LT(KX, KY) (CMP_TERM(KX, KY) < 0) +#define ERTS_RBT_IS_EQ(KX, KY) EQ(KX, KY) +#define ERTS_RBT_WANT_LOOKUP_INSERT +#define ERTS_RBT_WANT_LOOKUP +#define ERTS_RBT_WANT_DELETE +#define ERTS_RBT_UNDEF + +#include "erl_rbtree.h" + +static int subtract_continue(Process *p, ErtsSubtractContext *context); + +static void subtract_ctx_dtor(ErtsSubtractContext *context) { + switch (context->stage) { + case SUBTRACT_STAGE_SET_BUILD: + case SUBTRACT_STAGE_SET_FINISH: + erts_free(ERTS_ALC_T_LIST_TRAP, context->u.rhs_set.alloc_start); + break; + default: + break; } - if ((m = erts_list_length(B)) < 0) { - BIF_ERROR(p, BADARG); +} + +static int subtract_ctx_bin_dtor(Binary *context_bin) { + ErtsSubtractContext *context = ERTS_MAGIC_BIN_DATA(context_bin); + subtract_ctx_dtor(context); + return 1; +} + +static void subtract_ctx_move(ErtsSubtractContext *from, + ErtsSubtractContext *to) { + int uses_result_cdr = 0; + + to->stage = from->stage; + + to->lhs_original = from->lhs_original; + to->rhs_original = from->rhs_original; + + to->lhs_remaining = from->lhs_remaining; + to->rhs_remaining = from->rhs_remaining; + + to->iterator = from->iterator; + to->result = from->result; + + switch (to->stage) { + case SUBTRACT_STAGE_NAIVE_LHS: + sys_memcpy(to->u.lhs_elements, + from->u.lhs_elements, + sizeof(Eterm) * to->lhs_remaining); + break; + case SUBTRACT_STAGE_NAIVE_RHS: + sys_memcpy(to->u.rhs_elements, + from->u.rhs_elements, + sizeof(Eterm) * to->rhs_remaining); + + uses_result_cdr = 1; + break; + case SUBTRACT_STAGE_SET_FINISH: + uses_result_cdr = 1; + /* FALL THROUGH */ + case SUBTRACT_STAGE_SET_BUILD: + to->u.rhs_set.alloc_start = from->u.rhs_set.alloc_start; + to->u.rhs_set.alloc = from->u.rhs_set.alloc; + to->u.rhs_set.tree = from->u.rhs_set.tree; + break; + default: + break; } - - if (n == 0) - BIF_RET(NIL); - if (m == 0) - BIF_RET(A); - - /* allocate element vector */ - if (n <= SMALL_VEC_SIZE) - vec_p = small_vec; - else - vec_p = (Eterm*) erts_alloc(ERTS_ALC_T_TMP, n * sizeof(Eterm)); - - /* PUT ALL ELEMENTS IN VP */ - vp = vec_p; - list = A; - i = n; - while(i--) { - Eterm* listp = list_val(list); - *vp++ = CAR(listp); - list = CDR(listp); + + if (uses_result_cdr) { + if (from->result_cdr == &from->result) { + to->result_cdr = &to->result; + } else { + to->result_cdr = from->result_cdr; + } } - - /* UNMARK ALL DELETED CELLS */ - list = B; - m = 0; /* number of deleted elements */ - while(is_list(list)) { - Eterm* listp = list_val(list); - Eterm elem = CAR(listp); - i = n; - vp = vec_p; - while(i--) { - if (is_value(*vp) && eq(*vp, elem)) { - *vp = THE_NON_VALUE; - m++; - break; - } - vp++; - } - list = CDR(listp); +} + +static Eterm subtract_create_trap_state(Process *p, + ErtsSubtractContext *context) { + Binary *state_bin; + Eterm *hp; + + state_bin = erts_create_magic_binary(sizeof(ErtsSubtractContext), + subtract_ctx_bin_dtor); + + subtract_ctx_move(context, ERTS_MAGIC_BIN_DATA(state_bin)); + + hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE); + + return erts_mk_magic_ref(&hp, &MSO(p), state_bin); +} + +static int subtract_enter_len_lhs(Process *p, ErtsSubtractContext *context) { + context->stage = SUBTRACT_STAGE_LEN_LHS; + + context->iterator = context->lhs_original; + context->lhs_remaining = 0; + + return subtract_continue(p, context); +} + +static int subtract_enter_len_rhs(Process *p, ErtsSubtractContext *context) { + context->stage = SUBTRACT_STAGE_LEN_RHS; + + context->iterator = context->rhs_original; + context->rhs_remaining = 0; + + return subtract_continue(p, context); +} + +static int subtract_get_length(Process *p, Eterm *iterator_p, Uint *count_p) { + static const Sint ELEMENTS_PER_RED = 32; + + Sint budget, count; + Eterm iterator; + + budget = ELEMENTS_PER_RED * ERTS_BIF_REDS_LEFT(p); + iterator = *iterator_p; + +#ifdef DEBUG + budget = budget / 10 + 1; +#endif + + for (count = 0; count < budget && is_list(iterator); count++) { + iterator = CDR(list_val(iterator)); } - - if (m == n) /* All deleted ? */ - res = NIL; - else if (m == 0) /* None deleted ? */ - res = A; - else { /* REBUILD LIST */ - res = NIL; - need = 2*(n - m); - hp = HAlloc(p, need); - vp = vec_p + n - 1; - while(vp >= vec_p) { - if (is_value(*vp)) { - res = CONS(hp, *vp, res); - hp += 2; - } - vp--; - } + + if (!is_list(iterator) && !is_nil(iterator)) { + return -1; + } + + BUMP_REDS(p, count / ELEMENTS_PER_RED); + + *iterator_p = iterator; + *count_p += count; + + if (is_nil(iterator)) { + return 1; } - if (vec_p != small_vec) - erts_free(ERTS_ALC_T_TMP, (void *) vec_p); - BIF_RET(res); + + return 0; } -BIF_RETTYPE ebif_minusminus_2(BIF_ALIST_2) -{ - return subtract(BIF_P, BIF_ARG_1, BIF_ARG_2); +static int subtract_enter_naive_lhs(Process *p, ErtsSubtractContext *context) { + Eterm iterator; + int i = 0; + + context->stage = SUBTRACT_STAGE_NAIVE_LHS; + + context->iterator = context->rhs_original; + context->result = NIL; + + iterator = context->lhs_original; + + while (is_list(iterator)) { + const Eterm *cell = list_val(iterator); + + ASSERT(i < SUBTRACT_LHS_THRESHOLD); + + context->u.lhs_elements[i++] = CAR(cell); + iterator = CDR(cell); + } + + ASSERT(i == context->lhs_remaining); + + return subtract_continue(p, context); } -BIF_RETTYPE subtract_2(BIF_ALIST_2) -{ - return subtract(BIF_P, BIF_ARG_1, BIF_ARG_2); +static int subtract_naive_lhs(Process *p, ErtsSubtractContext *context) { + const Sint CHECKS_PER_RED = 16; + Sint checks, budget; + + budget = CHECKS_PER_RED * ERTS_BIF_REDS_LEFT(p); + checks = 0; + + while (checks < budget && is_list(context->iterator)) { + const Eterm *cell; + Eterm value, next; + int found_at; + + cell = list_val(context->iterator); + + value = CAR(cell); + next = CDR(cell); + + for (found_at = 0; found_at < context->lhs_remaining; found_at++) { + if (EQ(value, context->u.lhs_elements[found_at])) { + /* We shift the array one step down as we have to preserve + * order. + * + * Note that we can't exit early as that would suppress errors + * in the right-hand side (this runs prior to determining the + * length of RHS). */ + + context->lhs_remaining--; + sys_memmove(&context->u.lhs_elements[found_at], + &context->u.lhs_elements[found_at + 1], + (context->lhs_remaining - found_at) * sizeof(Eterm)); + break; + } + } + + checks += MAX(1, context->lhs_remaining); + context->iterator = next; + } + + BUMP_REDS(p, MIN(checks, budget) / CHECKS_PER_RED); + + if (is_list(context->iterator)) { + return 0; + } else if (!is_nil(context->iterator)) { + return -1; + } + + if (context->lhs_remaining > 0) { + Eterm *hp; + int i; + + hp = HAlloc(p, context->lhs_remaining * 2); + + for (i = context->lhs_remaining - 1; i >= 0; i--) { + Eterm value = context->u.lhs_elements[i]; + + context->result = CONS(hp, value, context->result); + hp += 2; + } + } + + ASSERT(context->lhs_remaining > 0 || context->result == NIL); + + return 1; +} + +static int subtract_enter_naive_rhs(Process *p, ErtsSubtractContext *context) { + Eterm iterator; + int i = 0; + + context->stage = SUBTRACT_STAGE_NAIVE_RHS; + + context->iterator = context->lhs_original; + context->result_cdr = &context->result; + context->result = NIL; + + iterator = context->rhs_original; + + while (is_list(iterator)) { + const Eterm *cell = list_val(iterator); + + ASSERT(i < SUBTRACT_RHS_THRESHOLD); + + context->u.rhs_elements[i++] = CAR(cell); + iterator = CDR(cell); + } + + ASSERT(i == context->rhs_remaining); + + return subtract_continue(p, context); +} + +static int subtract_naive_rhs(Process *p, ErtsSubtractContext *context) { + const Sint CHECKS_PER_RED = 16; + Sint checks, budget; + + budget = CHECKS_PER_RED * ERTS_BIF_REDS_LEFT(p); + checks = 0; + +#ifdef DEBUG + budget = budget / 10 + 1; +#endif + + while (checks < budget && is_list(context->iterator)) { + const Eterm *cell; + Eterm value, next; + int found_at; + + cell = list_val(context->iterator); + value = CAR(cell); + next = CDR(cell); + + for (found_at = context->rhs_remaining - 1; found_at >= 0; found_at--) { + if (EQ(value, context->u.rhs_elements[found_at])) { + break; + } + } + + if (found_at < 0) { + /* Destructively add the value to the result. This is safe + * since the GC is disabled and the unfinished term is never + * leaked to the outside world. */ + Eterm *hp = HAllocX(p, 2, context->lhs_remaining * 2); + + *context->result_cdr = make_list(hp); + context->result_cdr = &CDR(hp); + + CAR(hp) = value; + } else if (found_at >= 0) { + Eterm swap; + + if (context->rhs_remaining-- == 1) { + /* We've run out of items to remove, so the rest of the + * result will be equal to the remainder of the input. We know + * that LHS is well-formed as any errors would've been reported + * during length determination. */ + *context->result_cdr = next; + + BUMP_REDS(p, MIN(budget, checks) / CHECKS_PER_RED); + + return 1; + } + + swap = context->u.rhs_elements[context->rhs_remaining]; + context->u.rhs_elements[found_at] = swap; + } + + checks += context->rhs_remaining; + context->iterator = next; + context->lhs_remaining--; + } + + /* The result only has to be terminated when returning it to the user, but + * we're doing it when trapping as well to prevent headaches when + * debugging. */ + *context->result_cdr = NIL; + + BUMP_REDS(p, MIN(budget, checks) / CHECKS_PER_RED); + + if (is_list(context->iterator)) { + ASSERT(context->lhs_remaining > 0 && context->rhs_remaining > 0); + return 0; + } + + return 1; +} + +static int subtract_enter_set_build(Process *p, ErtsSubtractContext *context) { + context->stage = SUBTRACT_STAGE_SET_BUILD; + + context->u.rhs_set.alloc_start = + erts_alloc(ERTS_ALC_T_LIST_TRAP, + context->rhs_remaining * sizeof(subtract_tree_t)); + + context->u.rhs_set.alloc = context->u.rhs_set.alloc_start; + context->u.rhs_set.tree = NULL; + + context->iterator = context->rhs_original; + + return subtract_continue(p, context); +} + +static int subtract_set_build(Process *p, ErtsSubtractContext *context) { + const static Sint INSERTIONS_PER_RED = 16; + Sint budget, insertions; + + budget = INSERTIONS_PER_RED * ERTS_BIF_REDS_LEFT(p); + insertions = 0; + +#ifdef DEBUG + budget = budget / 10 + 1; +#endif + + while (insertions < budget && is_list(context->iterator)) { + subtract_tree_t *existing_node, *new_node; + const Eterm *cell; + Eterm value, next; + + cell = list_val(context->iterator); + value = CAR(cell); + next = CDR(cell); + + new_node = context->u.rhs_set.alloc; + new_node->key = value; + new_node->count = 1; + + existing_node = subtract_rbt_lookup_insert(&context->u.rhs_set.tree, + new_node); + + if (existing_node != NULL) { + existing_node->count++; + } else { + context->u.rhs_set.alloc++; + } + + context->iterator = next; + insertions++; + } + + BUMP_REDS(p, insertions / INSERTIONS_PER_RED); + + ASSERT(is_list(context->iterator) || is_nil(context->iterator)); + ASSERT(context->u.rhs_set.tree != NULL); + + return is_nil(context->iterator); +} + +static int subtract_enter_set_finish(Process *p, ErtsSubtractContext *context) { + context->stage = SUBTRACT_STAGE_SET_FINISH; + + context->result_cdr = &context->result; + context->result = NIL; + + context->iterator = context->lhs_original; + + return subtract_continue(p, context); +} + +static int subtract_set_finish(Process *p, ErtsSubtractContext *context) { + const Sint CHECKS_PER_RED = 8; + Sint checks, budget; + + budget = CHECKS_PER_RED * ERTS_BIF_REDS_LEFT(p); + checks = 0; + +#ifdef DEBUG + budget = budget / 10 + 1; +#endif + + while (checks < budget && is_list(context->iterator)) { + subtract_tree_t *node; + const Eterm *cell; + Eterm value, next; + + cell = list_val(context->iterator); + value = CAR(cell); + next = CDR(cell); + + ASSERT(context->rhs_remaining > 0); + + node = subtract_rbt_lookup(context->u.rhs_set.tree, value); + + if (node == NULL) { + Eterm *hp = HAllocX(p, 2, context->lhs_remaining * 2); + + *context->result_cdr = make_list(hp); + context->result_cdr = &CDR(hp); + + CAR(hp) = value; + } else { + if (context->rhs_remaining-- == 1) { + *context->result_cdr = next; + + BUMP_REDS(p, checks / CHECKS_PER_RED); + + return 1; + } + + if (node->count-- == 1) { + subtract_rbt_delete(&context->u.rhs_set.tree, node); + } + } + + context->iterator = next; + context->lhs_remaining--; + checks++; + } + + *context->result_cdr = NIL; + + BUMP_REDS(p, checks / CHECKS_PER_RED); + + if (is_list(context->iterator)) { + ASSERT(context->lhs_remaining > 0 && context->rhs_remaining > 0); + return 0; + } + + return 1; +} + +static int subtract_continue(Process *p, ErtsSubtractContext *context) { + switch (context->stage) { + case SUBTRACT_STAGE_START: { + return subtract_enter_len_lhs(p, context); + } + + case SUBTRACT_STAGE_LEN_LHS: { + int res = subtract_get_length(p, + &context->iterator, + &context->lhs_remaining); + + if (res != 1) { + return res; + } + + if (context->lhs_remaining <= SUBTRACT_LHS_THRESHOLD) { + return subtract_enter_naive_lhs(p, context); + } + + return subtract_enter_len_rhs(p, context); + } + + case SUBTRACT_STAGE_NAIVE_LHS: { + return subtract_naive_lhs(p, context); + } + + case SUBTRACT_STAGE_LEN_RHS: { + int res = subtract_get_length(p, + &context->iterator, + &context->rhs_remaining); + + if (res != 1) { + return res; + } + + /* We've walked through both lists fully now so we no longer need + * to check for errors past this point. */ + + if (context->rhs_remaining <= SUBTRACT_RHS_THRESHOLD) { + return subtract_enter_naive_rhs(p, context); + } + + return subtract_enter_set_build(p, context); + } + + case SUBTRACT_STAGE_NAIVE_RHS: { + return subtract_naive_rhs(p, context); + } + + case SUBTRACT_STAGE_SET_BUILD: { + int res = subtract_set_build(p, context); + + if (res != 1) { + return res; + } + + return subtract_enter_set_finish(p, context); + } + + case SUBTRACT_STAGE_SET_FINISH: { + return subtract_set_finish(p, context); + } + + default: + ERTS_ASSERT(!"unreachable"); + } +} + +static int subtract_start(Process *p, Eterm lhs, Eterm rhs, + ErtsSubtractContext *context) { + context->stage = SUBTRACT_STAGE_START; + + context->lhs_original = lhs; + context->rhs_original = rhs; + + return subtract_continue(p, context); } +/* erlang:'--'/2 */ +static Eterm subtract(Export *bif_entry, BIF_ALIST_2) { + Eterm lhs = BIF_ARG_1, rhs = BIF_ARG_2; + + if ((is_list(lhs) || is_nil(lhs)) && (is_list(rhs) || is_nil(rhs))) { + /* We start with the context on the stack in the hopes that we won't + * have to trap. */ + ErtsSubtractContext context; + int res; + + res = subtract_start(BIF_P, lhs, rhs, &context); + + if (res == 0) { + Eterm state_mref; + + state_mref = subtract_create_trap_state(BIF_P, &context); + erts_set_gc_state(BIF_P, 0); + + BIF_TRAP2(bif_entry, BIF_P, state_mref, NIL); + } + + subtract_ctx_dtor(&context); + + if (res < 0) { + BIF_ERROR(BIF_P, BADARG); + } + + BIF_RET(context.result); + } else if (is_internal_magic_ref(lhs)) { + ErtsSubtractContext *context; + int (*dtor)(Binary*); + Binary *magic_bin; + + int res; + + magic_bin = erts_magic_ref2bin(lhs); + dtor = ERTS_MAGIC_BIN_DESTRUCTOR(magic_bin); + + if (dtor != subtract_ctx_bin_dtor) { + BIF_ERROR(BIF_P, BADARG); + } + + ASSERT(BIF_P->flags & F_DISABLE_GC); + ASSERT(rhs == NIL); + + context = ERTS_MAGIC_BIN_DATA(magic_bin); + res = subtract_continue(BIF_P, context); + + if (res == 0) { + BIF_TRAP2(bif_entry, BIF_P, lhs, NIL); + } + + erts_set_gc_state(BIF_P, 1); + + if (res < 0) { + ERTS_BIF_ERROR_TRAPPED2(BIF_P, BADARG, bif_entry, + context->lhs_original, + context->rhs_original); + } + + BIF_RET(context->result); + } + + ASSERT(!(BIF_P->flags & F_DISABLE_GC)); + + BIF_ERROR(BIF_P, BADARG); +} + +BIF_RETTYPE ebif_minusminus_2(BIF_ALIST_2) { + return subtract(bif_export[BIF_ebif_minusminus_2], BIF_CALL_ARGS); +} + +BIF_RETTYPE subtract_2(BIF_ALIST_2) { + return subtract(bif_export[BIF_subtract_2], BIF_CALL_ARGS); +} + + BIF_RETTYPE lists_member_2(BIF_ALIST_2) { Eterm term; diff --git a/lib/stdlib/doc/src/lists.xml b/lib/stdlib/doc/src/lists.xml index 7efafedc82..55227aaee5 100644 --- a/lib/stdlib/doc/src/lists.xml +++ b/lib/stdlib/doc/src/lists.xml @@ -838,14 +838,6 @@ splitwith(Pred, List) -> > <input>lists:subtract("123212", "212").</input> "312".</pre> <p><c>lists:subtract(A, B)</c> is equivalent to <c>A -- B</c>.</p> - <warning> - <p>The complexity of <c>lists:subtract(A, B)</c> is proportional to - <c>length(A)*length(B)</c>, meaning that it is very slow if both - <c>A</c> and <c>B</c> are long lists. (If both lists are long, it - is a much better choice to use ordered lists and - <seealso marker="ordsets#subtract/2"> - <c>ordsets:subtract/2</c></seealso>.</p> - </warning> </desc> </func> diff --git a/lib/stdlib/test/lists_SUITE.erl b/lib/stdlib/test/lists_SUITE.erl index 7c99244b36..c380b3bba1 100644 --- a/lib/stdlib/test/lists_SUITE.erl +++ b/lib/stdlib/test/lists_SUITE.erl @@ -2597,6 +2597,13 @@ subtract(Config) when is_list(Config) -> {'EXIT',_} = (catch sub([a|b], [])), {'EXIT',_} = (catch sub([a|b], [a])), + %% Trapping, both crashing and otherwise. + [sub_trapping(N) || N <- lists:seq(0, 18)], + + %% The current implementation chooses which algorithm to use based on + %% certain thresholds, and we need proper coverage for all corner cases. + [sub_thresholds(N) || N <- lists:seq(0, 32)], + ok. sub_non_matching(A, B) -> @@ -2606,6 +2613,41 @@ sub(A, B) -> Res = A -- B, Res = lists:subtract(A, B). +sub_trapping(N) -> + List = lists:duplicate(N + (1 bsl N), gurka), + ImproperList = List ++ crash, + + {'EXIT',_} = (catch sub_trapping_1(ImproperList, [])), + {'EXIT',_} = (catch sub_trapping_1(List, ImproperList)), + + List = List -- lists:duplicate(N + (1 bsl N), gaffel), + ok = sub_trapping_1(List, []). + +sub_trapping_1([], _) -> ok; +sub_trapping_1(L, R) -> sub_trapping_1(L -- R, [gurka | R]). + +sub_thresholds(N) -> + %% This needs to be long enough to cause trapping. + OtherLen = 1 bsl 18, + Other = lists:seq(0, OtherLen - 1), + + Disjoint = lists:seq(-N, -1), + Subset = lists:seq(1, N), + + %% LHS is disjoint from RHS, so all elements must be retained. + Disjoint = Disjoint -- Other, + + %% LHS is covered by RHS, so all elements must be removed. + [] = Subset -- Other, + + %% RHS is disjoint from LHS, so all elements must be retained. + Other = Other -- Disjoint, + + %% RHS is covered by LHS, so N elements must be removed. + N = OtherLen - length(Other -- Subset), + + ok. + %% Test lists:droplast/1 droplast(Config) when is_list(Config) -> [] = lists:droplast([x]), diff --git a/system/doc/efficiency_guide/commoncaveats.xml b/system/doc/efficiency_guide/commoncaveats.xml index b41ffc3902..367da09ba3 100644 --- a/system/doc/efficiency_guide/commoncaveats.xml +++ b/system/doc/efficiency_guide/commoncaveats.xml @@ -169,53 +169,5 @@ multiple_setelement(T0) -> {Bin1,Bin2} = split_binary(Bin, Num)</code> </section> - <section> - <title>Operator "--"</title> - <p>The "<c>--</c>" operator has a complexity - proportional to the product of the length of its operands. - This means that the operator is very slow if both of its operands - are long lists:</p> - - <p><em>DO NOT</em></p> - <code type="none"><![CDATA[ - HugeList1 -- HugeList2]]></code> - - <p>Instead use the <seealso marker="stdlib:ordsets">ordsets</seealso> - module in STDLIB:</p> - - <p><em>DO</em></p> - <code type="none"> - HugeSet1 = ordsets:from_list(HugeList1), - HugeSet2 = ordsets:from_list(HugeList2), - ordsets:subtract(HugeSet1, HugeSet2)</code> - - <p>Obviously, that code does not work if the original order - of the list is important. If the order of the list must be - preserved, do as follows:</p> - - <p><em>DO</em></p> - <code type="none"><![CDATA[ - Set = gb_sets:from_list(HugeList2), - [E || E <- HugeList1, not gb_sets:is_element(E, Set)]]]></code> - - <note><p>This code behaves differently from "<c>--</c>" - if the lists contain duplicate elements (one occurrence - of an element in HugeList2 removes <em>all</em> - occurrences in HugeList1.)</p> - <p>Also, this code compares lists elements using the - "<c>==</c>" operator, while "<c>--</c>" uses the "<c>=:=</c>" operator. - If that difference is important, <c>sets</c> can be used instead of - <c>gb_sets</c>, but <c>sets:from_list/1</c> is much - slower than <c>gb_sets:from_list/1</c> for long lists.</p></note> - - <p>Using the "<c>--</c>" operator to delete an element - from a list is not a performance problem:</p> - - <p><em>OK</em></p> - <code type="none"> - HugeList1 -- [Element]</code> - - </section> - </chapter> diff --git a/system/doc/efficiency_guide/retired_myths.xml b/system/doc/efficiency_guide/retired_myths.xml index 9b914a3b6e..144c942c2b 100644 --- a/system/doc/efficiency_guide/retired_myths.xml +++ b/system/doc/efficiency_guide/retired_myths.xml @@ -60,4 +60,18 @@ That leads us to the myth that tail-recursive functions are faster than body-recursive functions.</p> </section> + + <section> + <title>Myth: List subtraction ("--" operator) is slow</title> + + <p>List subtraction used to have a run-time complexity proportional to the + product of the length of its operands, so it was extremely slow when both + lists were long.</p> + + <p>As of OTP 22 the run-time complexity is "n log n" and the operation will + complete quickly even when both lists are very long. In fact, it is + faster and uses less memory than the commonly used workaround to convert + both lists to ordered sets before subtracting them with + <c>ordsets:subtract/2</c>.</p> + </section> </chapter> |