aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJohn Högberg <[email protected]>2018-11-02 09:38:31 +0100
committerJohn Högberg <[email protected]>2018-11-02 09:38:31 +0100
commit7999ddad6121db7d1b7fe44b3c6a80a8d7ff70f3 (patch)
treece945f151c9c789bda1e54708815be5301b843a8
parentf9aa2fbb81470e9d58efe2cff7e394888c14a779 (diff)
parenteb9ee88f4cc640065f4902e270d834bfb596d5fc (diff)
downloadotp-7999ddad6121db7d1b7fe44b3c6a80a8d7ff70f3.tar.gz
otp-7999ddad6121db7d1b7fe44b3c6a80a8d7ff70f3.tar.bz2
otp-7999ddad6121db7d1b7fe44b3c6a80a8d7ff70f3.zip
Merge branch 'john/erts/minusminus_trapping/OTP-15371' into maint
* john/erts/minusminus_trapping/OTP-15371: Optimize operator '--' and yield on large inputs Inline erts_cmp Clarify a magical allocation size Fix trapping in lists:reverse/2
-rw-r--r--erts/emulator/beam/atom.c2
-rw-r--r--erts/emulator/beam/erl_alloc.types1
-rw-r--r--erts/emulator/beam/erl_bif_lists.c830
-rw-r--r--erts/emulator/beam/erl_utils.h61
-rw-r--r--erts/emulator/beam/external.c3
-rw-r--r--erts/emulator/beam/utils.c64
-rw-r--r--lib/stdlib/doc/src/lists.xml8
-rw-r--r--lib/stdlib/test/lists_SUITE.erl42
-rw-r--r--system/doc/efficiency_guide/commoncaveats.xml48
-rw-r--r--system/doc/efficiency_guide/retired_myths.xml14
10 files changed, 852 insertions, 221 deletions
diff --git a/erts/emulator/beam/atom.c b/erts/emulator/beam/atom.c
index 5381611fab..59b51fd15e 100644
--- a/erts/emulator/beam/atom.c
+++ b/erts/emulator/beam/atom.c
@@ -174,7 +174,7 @@ atom_alloc(Atom* tmpl)
/*
* Precompute ordinal value of first 3 bytes + 7 bits.
- * This is used by utils.c:erts_cmp_atoms().
+ * This is used by erl_utils.h:erts_cmp_atoms().
* We cannot use the full 32 bits of the first 4 bytes,
* since we use the sign of the difference between two
* ordinal values to represent their relative order.
diff --git a/erts/emulator/beam/erl_alloc.types b/erts/emulator/beam/erl_alloc.types
index 5409b89bab..98ba0e5f07 100644
--- a/erts/emulator/beam/erl_alloc.types
+++ b/erts/emulator/beam/erl_alloc.types
@@ -274,6 +274,7 @@ type ML_YIELD_STATE SHORT_LIVED SYSTEM monitor_link_yield_state
type ML_DIST STANDARD SYSTEM monitor_link_dist
type PF3_ARGS SHORT_LIVED PROCESSES process_flag_3_arguments
type SETUP_CONN_ARG SHORT_LIVED PROCESSES setup_connection_argument
+type LIST_TRAP SHORT_LIVED PROCESSES list_bif_trap_state
type ENVIRONMENT SYSTEM SYSTEM environment
diff --git a/erts/emulator/beam/erl_bif_lists.c b/erts/emulator/beam/erl_bif_lists.c
index 395be67a90..a793b34852 100644
--- a/erts/emulator/beam/erl_bif_lists.c
+++ b/erts/emulator/beam/erl_bif_lists.c
@@ -29,12 +29,13 @@
#include "sys.h"
#include "erl_vm.h"
#include "global.h"
-#include "erl_process.h"
-#include "error.h"
#include "bif.h"
+#include "erl_binary.h"
+
static Eterm keyfind(int Bif, Process* p, Eterm Key, Eterm Pos, Eterm List);
+
static BIF_RETTYPE append(Process* p, Eterm A, Eterm B)
{
Eterm list;
@@ -146,103 +147,724 @@ BIF_RETTYPE append_2(BIF_ALIST_2)
return append(BIF_P, BIF_ARG_1, BIF_ARG_2);
}
-/*
- * erlang:'--'/2
- */
+/* erlang:'--'/2
+ *
+ * Subtracts a list from another (LHS -- RHS), removing the first occurrence of
+ * each element in LHS from RHS. There is no type coercion so the elements must
+ * match exactly.
+ *
+ * The BIF is broken into several stages that can all trap individually, and it
+ * chooses its algorithm based on input size. If either input is small it will
+ * use a linear scan tuned to which side it's on, and if both inputs are large
+ * enough it will convert RHS into a multiset to provide good asymptotic
+ * behavior. */
+
+#define SUBTRACT_LHS_THRESHOLD 16
+#define SUBTRACT_RHS_THRESHOLD 16
+
+typedef enum {
+ SUBTRACT_STAGE_START,
+ SUBTRACT_STAGE_LEN_LHS,
+
+ /* Naive linear scan that's efficient when
+ * LEN_LHS <= SUBTRACT_LHS_THRESHOLD. */
+ SUBTRACT_STAGE_NAIVE_LHS,
+
+ SUBTRACT_STAGE_LEN_RHS,
+
+ /* As SUBTRACT_STAGE_NAIVE_LHS but for RHS. */
+ SUBTRACT_STAGE_NAIVE_RHS,
+
+ /* Creates a multiset from RHS for faster lookups before sweeping through
+ * LHS. The set is implemented as a red-black tree and duplicate elements
+ * are handled by a counter on each node. */
+ SUBTRACT_STAGE_SET_BUILD,
+ SUBTRACT_STAGE_SET_FINISH
+} ErtsSubtractCtxStage;
+
+typedef struct subtract_node__ {
+ struct subtract_node__ *parent;
+ struct subtract_node__ *left;
+ struct subtract_node__ *right;
+ int is_red;
+
+ Eterm key;
+ Uint count;
+} subtract_tree_t;
+
+typedef struct {
+ ErtsSubtractCtxStage stage;
+
+ Eterm lhs_original;
+ Eterm rhs_original;
+
+ Uint lhs_remaining;
+ Uint rhs_remaining;
+
+ Eterm iterator;
+
+ Eterm *result_cdr;
+ Eterm result;
+
+ union {
+ Eterm lhs_elements[SUBTRACT_LHS_THRESHOLD];
+ Eterm rhs_elements[SUBTRACT_RHS_THRESHOLD];
+
+ struct {
+ subtract_tree_t *tree;
+
+ /* A memory area for the tree's nodes, saving us the need to have
+ * one allocation per node. */
+ subtract_tree_t *alloc_start;
+ subtract_tree_t *alloc;
+ } rhs_set;
+ } u;
+} ErtsSubtractContext;
+
+#define ERTS_RBT_PREFIX subtract
+#define ERTS_RBT_T subtract_tree_t
+#define ERTS_RBT_KEY_T Eterm
+#define ERTS_RBT_FLAGS_T int
+#define ERTS_RBT_INIT_EMPTY_TNODE(T) \
+ do { \
+ (T)->parent = NULL; \
+ (T)->left = NULL; \
+ (T)->right = NULL; \
+ } while(0)
+#define ERTS_RBT_IS_RED(T) ((T)->is_red)
+#define ERTS_RBT_SET_RED(T) ((T)->is_red = 1)
+#define ERTS_RBT_IS_BLACK(T) (!ERTS_RBT_IS_RED(T))
+#define ERTS_RBT_SET_BLACK(T) ((T)->is_red = 0)
+#define ERTS_RBT_GET_FLAGS(T) ((T)->is_red)
+#define ERTS_RBT_SET_FLAGS(T, F) ((T)->is_red = F)
+#define ERTS_RBT_GET_PARENT(T) ((T)->parent)
+#define ERTS_RBT_SET_PARENT(T, P) ((T)->parent = P)
+#define ERTS_RBT_GET_RIGHT(T) ((T)->right)
+#define ERTS_RBT_SET_RIGHT(T, R) ((T)->right = (R))
+#define ERTS_RBT_GET_LEFT(T) ((T)->left)
+#define ERTS_RBT_SET_LEFT(T, L) ((T)->left = (L))
+#define ERTS_RBT_GET_KEY(T) ((T)->key)
+#define ERTS_RBT_CMP_KEYS(KX, KY) CMP_TERM(KX, KY)
+#define ERTS_RBT_WANT_LOOKUP_INSERT
+#define ERTS_RBT_WANT_LOOKUP
+#define ERTS_RBT_WANT_DELETE
+#define ERTS_RBT_UNDEF
+
+#include "erl_rbtree.h"
+
+static int subtract_continue(Process *p, ErtsSubtractContext *context);
+
+static void subtract_ctx_dtor(ErtsSubtractContext *context) {
+ switch (context->stage) {
+ case SUBTRACT_STAGE_SET_BUILD:
+ case SUBTRACT_STAGE_SET_FINISH:
+ erts_free(ERTS_ALC_T_LIST_TRAP, context->u.rhs_set.alloc_start);
+ break;
+ default:
+ break;
+ }
+}
-#define SMALL_VEC_SIZE 10
-static Eterm subtract(Process* p, Eterm A, Eterm B)
-{
- Eterm list;
- Eterm* hp;
- Uint need;
- Eterm res;
- Eterm small_vec[SMALL_VEC_SIZE]; /* Preallocated memory for small lists */
- Eterm* vec_p;
- Eterm* vp;
- Sint i;
- Sint n;
- Sint m;
-
- if ((n = erts_list_length(A)) < 0) {
- BIF_ERROR(p, BADARG);
+static int subtract_ctx_bin_dtor(Binary *context_bin) {
+ ErtsSubtractContext *context = ERTS_MAGIC_BIN_DATA(context_bin);
+ subtract_ctx_dtor(context);
+ return 1;
+}
+
+static void subtract_ctx_move(ErtsSubtractContext *from,
+ ErtsSubtractContext *to) {
+ int uses_result_cdr = 0;
+
+ to->stage = from->stage;
+
+ to->lhs_original = from->lhs_original;
+ to->rhs_original = from->rhs_original;
+
+ to->lhs_remaining = from->lhs_remaining;
+ to->rhs_remaining = from->rhs_remaining;
+
+ to->iterator = from->iterator;
+ to->result = from->result;
+
+ switch (to->stage) {
+ case SUBTRACT_STAGE_NAIVE_LHS:
+ sys_memcpy(to->u.lhs_elements,
+ from->u.lhs_elements,
+ sizeof(Eterm) * to->lhs_remaining);
+ break;
+ case SUBTRACT_STAGE_NAIVE_RHS:
+ sys_memcpy(to->u.rhs_elements,
+ from->u.rhs_elements,
+ sizeof(Eterm) * to->rhs_remaining);
+
+ uses_result_cdr = 1;
+ break;
+ case SUBTRACT_STAGE_SET_FINISH:
+ uses_result_cdr = 1;
+ /* FALL THROUGH */
+ case SUBTRACT_STAGE_SET_BUILD:
+ to->u.rhs_set.alloc_start = from->u.rhs_set.alloc_start;
+ to->u.rhs_set.alloc = from->u.rhs_set.alloc;
+ to->u.rhs_set.tree = from->u.rhs_set.tree;
+ break;
+ default:
+ break;
}
- if ((m = erts_list_length(B)) < 0) {
- BIF_ERROR(p, BADARG);
+
+ if (uses_result_cdr) {
+ if (from->result_cdr == &from->result) {
+ to->result_cdr = &to->result;
+ } else {
+ to->result_cdr = from->result_cdr;
+ }
}
-
- if (n == 0)
- BIF_RET(NIL);
- if (m == 0)
- BIF_RET(A);
-
- /* allocate element vector */
- if (n <= SMALL_VEC_SIZE)
- vec_p = small_vec;
- else
- vec_p = (Eterm*) erts_alloc(ERTS_ALC_T_TMP, n * sizeof(Eterm));
-
- /* PUT ALL ELEMENTS IN VP */
- vp = vec_p;
- list = A;
- i = n;
- while(i--) {
- Eterm* listp = list_val(list);
- *vp++ = CAR(listp);
- list = CDR(listp);
+}
+
+static Eterm subtract_create_trap_state(Process *p,
+ ErtsSubtractContext *context) {
+ Binary *state_bin;
+ Eterm *hp;
+
+ state_bin = erts_create_magic_binary(sizeof(ErtsSubtractContext),
+ subtract_ctx_bin_dtor);
+
+ subtract_ctx_move(context, ERTS_MAGIC_BIN_DATA(state_bin));
+
+ hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE);
+
+ return erts_mk_magic_ref(&hp, &MSO(p), state_bin);
+}
+
+static int subtract_enter_len_lhs(Process *p, ErtsSubtractContext *context) {
+ context->stage = SUBTRACT_STAGE_LEN_LHS;
+
+ context->iterator = context->lhs_original;
+ context->lhs_remaining = 0;
+
+ return subtract_continue(p, context);
+}
+
+static int subtract_enter_len_rhs(Process *p, ErtsSubtractContext *context) {
+ context->stage = SUBTRACT_STAGE_LEN_RHS;
+
+ context->iterator = context->rhs_original;
+ context->rhs_remaining = 0;
+
+ return subtract_continue(p, context);
+}
+
+static int subtract_get_length(Process *p, Eterm *iterator_p, Uint *count_p) {
+ static const Sint ELEMENTS_PER_RED = 32;
+
+ Sint budget, count;
+ Eterm iterator;
+
+ budget = ELEMENTS_PER_RED * ERTS_BIF_REDS_LEFT(p);
+ iterator = *iterator_p;
+
+#ifdef DEBUG
+ budget = budget / 10 + 1;
+#endif
+
+ for (count = 0; count < budget && is_list(iterator); count++) {
+ iterator = CDR(list_val(iterator));
}
-
- /* UNMARK ALL DELETED CELLS */
- list = B;
- m = 0; /* number of deleted elements */
- while(is_list(list)) {
- Eterm* listp = list_val(list);
- Eterm elem = CAR(listp);
- i = n;
- vp = vec_p;
- while(i--) {
- if (is_value(*vp) && eq(*vp, elem)) {
- *vp = THE_NON_VALUE;
- m++;
- break;
- }
- vp++;
- }
- list = CDR(listp);
+
+ if (!is_list(iterator) && !is_nil(iterator)) {
+ return -1;
}
-
- if (m == n) /* All deleted ? */
- res = NIL;
- else if (m == 0) /* None deleted ? */
- res = A;
- else { /* REBUILD LIST */
- res = NIL;
- need = 2*(n - m);
- hp = HAlloc(p, need);
- vp = vec_p + n - 1;
- while(vp >= vec_p) {
- if (is_value(*vp)) {
- res = CONS(hp, *vp, res);
- hp += 2;
- }
- vp--;
- }
+
+ BUMP_REDS(p, count / ELEMENTS_PER_RED);
+
+ *iterator_p = iterator;
+ *count_p += count;
+
+ if (is_nil(iterator)) {
+ return 1;
}
- if (vec_p != small_vec)
- erts_free(ERTS_ALC_T_TMP, (void *) vec_p);
- BIF_RET(res);
+
+ return 0;
}
-BIF_RETTYPE ebif_minusminus_2(BIF_ALIST_2)
-{
- return subtract(BIF_P, BIF_ARG_1, BIF_ARG_2);
+static int subtract_enter_naive_lhs(Process *p, ErtsSubtractContext *context) {
+ Eterm iterator;
+ int i = 0;
+
+ context->stage = SUBTRACT_STAGE_NAIVE_LHS;
+
+ context->iterator = context->rhs_original;
+ context->result = NIL;
+
+ iterator = context->lhs_original;
+
+ while (is_list(iterator)) {
+ const Eterm *cell = list_val(iterator);
+
+ ASSERT(i < SUBTRACT_LHS_THRESHOLD);
+
+ context->u.lhs_elements[i++] = CAR(cell);
+ iterator = CDR(cell);
+ }
+
+ ASSERT(i == context->lhs_remaining);
+
+ return subtract_continue(p, context);
}
-BIF_RETTYPE subtract_2(BIF_ALIST_2)
-{
- return subtract(BIF_P, BIF_ARG_1, BIF_ARG_2);
+static int subtract_naive_lhs(Process *p, ErtsSubtractContext *context) {
+ const Sint CHECKS_PER_RED = 16;
+ Sint checks, budget;
+
+ budget = CHECKS_PER_RED * ERTS_BIF_REDS_LEFT(p);
+ checks = 0;
+
+ while (checks < budget && is_list(context->iterator)) {
+ const Eterm *cell;
+ Eterm value, next;
+ int found_at;
+
+ cell = list_val(context->iterator);
+
+ value = CAR(cell);
+ next = CDR(cell);
+
+ for (found_at = 0; found_at < context->lhs_remaining; found_at++) {
+ if (EQ(value, context->u.lhs_elements[found_at])) {
+ /* We shift the array one step down as we have to preserve
+ * order.
+ *
+ * Note that we can't exit early as that would suppress errors
+ * in the right-hand side (this runs prior to determining the
+ * length of RHS). */
+
+ context->lhs_remaining--;
+ sys_memmove(&context->u.lhs_elements[found_at],
+ &context->u.lhs_elements[found_at + 1],
+ (context->lhs_remaining - found_at) * sizeof(Eterm));
+ break;
+ }
+ }
+
+ checks += MAX(1, context->lhs_remaining);
+ context->iterator = next;
+ }
+
+ BUMP_REDS(p, MIN(checks, budget) / CHECKS_PER_RED);
+
+ if (is_list(context->iterator)) {
+ return 0;
+ } else if (!is_nil(context->iterator)) {
+ return -1;
+ }
+
+ if (context->lhs_remaining > 0) {
+ Eterm *hp;
+ int i;
+
+ hp = HAlloc(p, context->lhs_remaining * 2);
+
+ for (i = context->lhs_remaining - 1; i >= 0; i--) {
+ Eterm value = context->u.lhs_elements[i];
+
+ context->result = CONS(hp, value, context->result);
+ hp += 2;
+ }
+ }
+
+ ASSERT(context->lhs_remaining > 0 || context->result == NIL);
+
+ return 1;
+}
+
+static int subtract_enter_naive_rhs(Process *p, ErtsSubtractContext *context) {
+ Eterm iterator;
+ int i = 0;
+
+ context->stage = SUBTRACT_STAGE_NAIVE_RHS;
+
+ context->iterator = context->lhs_original;
+ context->result_cdr = &context->result;
+ context->result = NIL;
+
+ iterator = context->rhs_original;
+
+ while (is_list(iterator)) {
+ const Eterm *cell = list_val(iterator);
+
+ ASSERT(i < SUBTRACT_RHS_THRESHOLD);
+
+ context->u.rhs_elements[i++] = CAR(cell);
+ iterator = CDR(cell);
+ }
+
+ ASSERT(i == context->rhs_remaining);
+
+ return subtract_continue(p, context);
+}
+
+static int subtract_naive_rhs(Process *p, ErtsSubtractContext *context) {
+ const Sint CHECKS_PER_RED = 16;
+ Sint checks, budget;
+
+ budget = CHECKS_PER_RED * ERTS_BIF_REDS_LEFT(p);
+ checks = 0;
+
+#ifdef DEBUG
+ budget = budget / 10 + 1;
+#endif
+
+ while (checks < budget && is_list(context->iterator)) {
+ const Eterm *cell;
+ Eterm value, next;
+ int found_at;
+
+ cell = list_val(context->iterator);
+ value = CAR(cell);
+ next = CDR(cell);
+
+ for (found_at = context->rhs_remaining - 1; found_at >= 0; found_at--) {
+ if (EQ(value, context->u.rhs_elements[found_at])) {
+ break;
+ }
+ }
+
+ if (found_at < 0) {
+ /* Destructively add the value to the result. This is safe
+ * since the GC is disabled and the unfinished term is never
+ * leaked to the outside world. */
+ Eterm *hp = HAllocX(p, 2, context->lhs_remaining * 2);
+
+ *context->result_cdr = make_list(hp);
+ context->result_cdr = &CDR(hp);
+
+ CAR(hp) = value;
+ } else if (found_at >= 0) {
+ Eterm swap;
+
+ if (context->rhs_remaining-- == 1) {
+ /* We've run out of items to remove, so the rest of the
+ * result will be equal to the remainder of the input. We know
+ * that LHS is well-formed as any errors would've been reported
+ * during length determination. */
+ *context->result_cdr = next;
+
+ BUMP_REDS(p, MIN(budget, checks) / CHECKS_PER_RED);
+
+ return 1;
+ }
+
+ swap = context->u.rhs_elements[context->rhs_remaining];
+ context->u.rhs_elements[found_at] = swap;
+ }
+
+ checks += context->rhs_remaining;
+ context->iterator = next;
+ context->lhs_remaining--;
+ }
+
+ /* The result only has to be terminated when returning it to the user, but
+ * we're doing it when trapping as well to prevent headaches when
+ * debugging. */
+ *context->result_cdr = NIL;
+
+ BUMP_REDS(p, MIN(budget, checks) / CHECKS_PER_RED);
+
+ if (is_list(context->iterator)) {
+ ASSERT(context->lhs_remaining > 0 && context->rhs_remaining > 0);
+ return 0;
+ }
+
+ return 1;
}
+static int subtract_enter_set_build(Process *p, ErtsSubtractContext *context) {
+ context->stage = SUBTRACT_STAGE_SET_BUILD;
+
+ context->u.rhs_set.alloc_start =
+ erts_alloc(ERTS_ALC_T_LIST_TRAP,
+ context->rhs_remaining * sizeof(subtract_tree_t));
+
+ context->u.rhs_set.alloc = context->u.rhs_set.alloc_start;
+ context->u.rhs_set.tree = NULL;
+
+ context->iterator = context->rhs_original;
+
+ return subtract_continue(p, context);
+}
+
+static int subtract_set_build(Process *p, ErtsSubtractContext *context) {
+ const static Sint INSERTIONS_PER_RED = 16;
+ Sint budget, insertions;
+
+ budget = INSERTIONS_PER_RED * ERTS_BIF_REDS_LEFT(p);
+ insertions = 0;
+
+#ifdef DEBUG
+ budget = budget / 10 + 1;
+#endif
+
+ while (insertions < budget && is_list(context->iterator)) {
+ subtract_tree_t *existing_node, *new_node;
+ const Eterm *cell;
+ Eterm value, next;
+
+ cell = list_val(context->iterator);
+ value = CAR(cell);
+ next = CDR(cell);
+
+ new_node = context->u.rhs_set.alloc;
+ new_node->key = value;
+ new_node->count = 1;
+
+ existing_node = subtract_rbt_lookup_insert(&context->u.rhs_set.tree,
+ new_node);
+
+ if (existing_node != NULL) {
+ existing_node->count++;
+ } else {
+ context->u.rhs_set.alloc++;
+ }
+
+ context->iterator = next;
+ insertions++;
+ }
+
+ BUMP_REDS(p, insertions / INSERTIONS_PER_RED);
+
+ ASSERT(is_list(context->iterator) || is_nil(context->iterator));
+ ASSERT(context->u.rhs_set.tree != NULL);
+
+ return is_nil(context->iterator);
+}
+
+static int subtract_enter_set_finish(Process *p, ErtsSubtractContext *context) {
+ context->stage = SUBTRACT_STAGE_SET_FINISH;
+
+ context->result_cdr = &context->result;
+ context->result = NIL;
+
+ context->iterator = context->lhs_original;
+
+ return subtract_continue(p, context);
+}
+
+static int subtract_set_finish(Process *p, ErtsSubtractContext *context) {
+ const Sint CHECKS_PER_RED = 8;
+ Sint checks, budget;
+
+ budget = CHECKS_PER_RED * ERTS_BIF_REDS_LEFT(p);
+ checks = 0;
+
+#ifdef DEBUG
+ budget = budget / 10 + 1;
+#endif
+
+ while (checks < budget && is_list(context->iterator)) {
+ subtract_tree_t *node;
+ const Eterm *cell;
+ Eterm value, next;
+
+ cell = list_val(context->iterator);
+ value = CAR(cell);
+ next = CDR(cell);
+
+ ASSERT(context->rhs_remaining > 0);
+
+ node = subtract_rbt_lookup(context->u.rhs_set.tree, value);
+
+ if (node == NULL) {
+ Eterm *hp = HAllocX(p, 2, context->lhs_remaining * 2);
+
+ *context->result_cdr = make_list(hp);
+ context->result_cdr = &CDR(hp);
+
+ CAR(hp) = value;
+ } else {
+ if (context->rhs_remaining-- == 1) {
+ *context->result_cdr = next;
+
+ BUMP_REDS(p, checks / CHECKS_PER_RED);
+
+ return 1;
+ }
+
+ if (node->count-- == 1) {
+ subtract_rbt_delete(&context->u.rhs_set.tree, node);
+ }
+ }
+
+ context->iterator = next;
+ context->lhs_remaining--;
+ checks++;
+ }
+
+ *context->result_cdr = NIL;
+
+ BUMP_REDS(p, checks / CHECKS_PER_RED);
+
+ if (is_list(context->iterator)) {
+ ASSERT(context->lhs_remaining > 0 && context->rhs_remaining > 0);
+ return 0;
+ }
+
+ return 1;
+}
+
+static int subtract_continue(Process *p, ErtsSubtractContext *context) {
+ switch (context->stage) {
+ case SUBTRACT_STAGE_START: {
+ return subtract_enter_len_lhs(p, context);
+ }
+
+ case SUBTRACT_STAGE_LEN_LHS: {
+ int res = subtract_get_length(p,
+ &context->iterator,
+ &context->lhs_remaining);
+
+ if (res != 1) {
+ return res;
+ }
+
+ if (context->lhs_remaining <= SUBTRACT_LHS_THRESHOLD) {
+ return subtract_enter_naive_lhs(p, context);
+ }
+
+ return subtract_enter_len_rhs(p, context);
+ }
+
+ case SUBTRACT_STAGE_NAIVE_LHS: {
+ return subtract_naive_lhs(p, context);
+ }
+
+ case SUBTRACT_STAGE_LEN_RHS: {
+ int res = subtract_get_length(p,
+ &context->iterator,
+ &context->rhs_remaining);
+
+ if (res != 1) {
+ return res;
+ }
+
+ /* We've walked through both lists fully now so we no longer need
+ * to check for errors past this point. */
+
+ if (context->rhs_remaining <= SUBTRACT_RHS_THRESHOLD) {
+ return subtract_enter_naive_rhs(p, context);
+ }
+
+ return subtract_enter_set_build(p, context);
+ }
+
+ case SUBTRACT_STAGE_NAIVE_RHS: {
+ return subtract_naive_rhs(p, context);
+ }
+
+ case SUBTRACT_STAGE_SET_BUILD: {
+ int res = subtract_set_build(p, context);
+
+ if (res != 1) {
+ return res;
+ }
+
+ return subtract_enter_set_finish(p, context);
+ }
+
+ case SUBTRACT_STAGE_SET_FINISH: {
+ return subtract_set_finish(p, context);
+ }
+
+ default:
+ ERTS_ASSERT(!"unreachable");
+ }
+}
+
+static int subtract_start(Process *p, Eterm lhs, Eterm rhs,
+ ErtsSubtractContext *context) {
+ context->stage = SUBTRACT_STAGE_START;
+
+ context->lhs_original = lhs;
+ context->rhs_original = rhs;
+
+ return subtract_continue(p, context);
+}
+
+/* erlang:'--'/2 */
+static Eterm subtract(Export *bif_entry, BIF_ALIST_2) {
+ Eterm lhs = BIF_ARG_1, rhs = BIF_ARG_2;
+
+ if ((is_list(lhs) || is_nil(lhs)) && (is_list(rhs) || is_nil(rhs))) {
+ /* We start with the context on the stack in the hopes that we won't
+ * have to trap. */
+ ErtsSubtractContext context;
+ int res;
+
+ res = subtract_start(BIF_P, lhs, rhs, &context);
+
+ if (res == 0) {
+ Eterm state_mref;
+
+ state_mref = subtract_create_trap_state(BIF_P, &context);
+ erts_set_gc_state(BIF_P, 0);
+
+ BIF_TRAP2(bif_entry, BIF_P, state_mref, NIL);
+ }
+
+ subtract_ctx_dtor(&context);
+
+ if (res < 0) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+
+ BIF_RET(context.result);
+ } else if (is_internal_magic_ref(lhs)) {
+ ErtsSubtractContext *context;
+ int (*dtor)(Binary*);
+ Binary *magic_bin;
+
+ int res;
+
+ magic_bin = erts_magic_ref2bin(lhs);
+ dtor = ERTS_MAGIC_BIN_DESTRUCTOR(magic_bin);
+
+ if (dtor != subtract_ctx_bin_dtor) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+
+ ASSERT(BIF_P->flags & F_DISABLE_GC);
+ ASSERT(rhs == NIL);
+
+ context = ERTS_MAGIC_BIN_DATA(magic_bin);
+ res = subtract_continue(BIF_P, context);
+
+ if (res == 0) {
+ BIF_TRAP2(bif_entry, BIF_P, lhs, NIL);
+ }
+
+ erts_set_gc_state(BIF_P, 1);
+
+ if (res < 0) {
+ ERTS_BIF_ERROR_TRAPPED2(BIF_P, BADARG, bif_entry,
+ context->lhs_original,
+ context->rhs_original);
+ }
+
+ BIF_RET(context->result);
+ }
+
+ ASSERT(!(BIF_P->flags & F_DISABLE_GC));
+
+ BIF_ERROR(BIF_P, BADARG);
+}
+
+BIF_RETTYPE ebif_minusminus_2(BIF_ALIST_2) {
+ return subtract(bif_export[BIF_ebif_minusminus_2], BIF_CALL_ARGS);
+}
+
+BIF_RETTYPE subtract_2(BIF_ALIST_2) {
+ return subtract(bif_export[BIF_subtract_2], BIF_CALL_ARGS);
+}
+
+
BIF_RETTYPE lists_member_2(BIF_ALIST_2)
{
Eterm term;
@@ -283,7 +905,7 @@ static BIF_RETTYPE lists_reverse_alloc(Process *c_p,
{
static const Uint CELLS_PER_RED = 40;
- Eterm *heap_top, *heap_end;
+ Eterm *alloc_top, *alloc_end;
Uint cells_left, max_cells;
Eterm list, tail;
Eterm lookahead;
@@ -305,18 +927,18 @@ static BIF_RETTYPE lists_reverse_alloc(Process *c_p,
BIF_ERROR(c_p, BADARG);
}
- heap_top = HAlloc(c_p, 2 * (max_cells - cells_left));
- heap_end = heap_top + 2 * (max_cells - cells_left);
+ alloc_top = HAlloc(c_p, 2 * (max_cells - cells_left));
+ alloc_end = alloc_top + 2 * (max_cells - cells_left);
- while (heap_top < heap_end) {
+ while (alloc_top < alloc_end) {
Eterm *pair = list_val(list);
- tail = CONS(heap_top, CAR(pair), tail);
+ tail = CONS(alloc_top, CAR(pair), tail);
list = CDR(pair);
ASSERT(is_list(list) || is_nil(list));
- heap_top += 2;
+ alloc_top += 2;
}
if (is_nil(list)) {
@@ -333,7 +955,7 @@ static BIF_RETTYPE lists_reverse_onheap(Process *c_p,
{
static const Uint CELLS_PER_RED = 60;
- Eterm *heap_top, *heap_end;
+ Eterm *alloc_start, *alloc_top, *alloc_end;
Uint cells_left, max_cells;
Eterm list, tail;
@@ -343,21 +965,27 @@ static BIF_RETTYPE lists_reverse_onheap(Process *c_p,
cells_left = max_cells = CELLS_PER_RED * (1 + ERTS_BIF_REDS_LEFT(c_p));
ASSERT(HEAP_LIMIT(c_p) >= HEAP_TOP(c_p) + 2);
- heap_end = HEAP_LIMIT(c_p) - 2;
- heap_top = HEAP_TOP(c_p);
+ alloc_start = HEAP_TOP(c_p);
+ alloc_end = HEAP_LIMIT(c_p) - 2;
+ alloc_top = alloc_start;
+
+ /* Don't process more cells than we have reductions for. */
+ alloc_end = MIN(alloc_top + (cells_left * 2), alloc_end);
- while (heap_top < heap_end && is_list(list)) {
+ while (alloc_top < alloc_end && is_list(list)) {
Eterm *pair = list_val(list);
- tail = CONS(heap_top, CAR(pair), tail);
+ tail = CONS(alloc_top, CAR(pair), tail);
list = CDR(pair);
- heap_top += 2;
+ alloc_top += 2;
}
- cells_left -= (heap_top - heap_end) / 2;
+ cells_left -= (alloc_top - alloc_start) / 2;
+ HEAP_TOP(c_p) = alloc_top;
+
+ ASSERT(cells_left >= 0 && cells_left <= max_cells);
BUMP_REDS(c_p, (max_cells - cells_left) / CELLS_PER_RED);
- HEAP_TOP(c_p) = heap_top;
if (is_nil(list)) {
BIF_RET(tail);
diff --git a/erts/emulator/beam/erl_utils.h b/erts/emulator/beam/erl_utils.h
index b3bfa69052..880febba8b 100644
--- a/erts/emulator/beam/erl_utils.h
+++ b/erts/emulator/beam/erl_utils.h
@@ -22,6 +22,7 @@
#define ERL_UTILS_H__
#include "sys.h"
+#include "atom.h"
#include "erl_printf.h"
struct process;
@@ -112,10 +113,12 @@ int eq(Eterm, Eterm);
#define EQ(x,y) (((x) == (y)) || (is_not_both_immed((x),(y)) && eq((x),(y))))
-int erts_cmp_atoms(Eterm a, Eterm b);
-Sint erts_cmp(Eterm, Eterm, int, int);
-Sint erts_cmp_compound(Eterm, Eterm, int, int);
+ERTS_GLB_INLINE Sint erts_cmp(Eterm, Eterm, int, int);
+ERTS_GLB_INLINE int erts_cmp_atoms(Eterm a, Eterm b);
+
Sint cmp(Eterm a, Eterm b);
+Sint erts_cmp_compound(Eterm, Eterm, int, int);
+
#define CMP(A,B) erts_cmp(A,B,0,0)
#define CMP_TERM(A,B) erts_cmp(A,B,1,0)
#define CMP_EQ_ONLY(A,B) erts_cmp(A,B,0,1)
@@ -150,4 +153,56 @@ Sint cmp(Eterm a, Eterm b);
if (erts_cmp_compound(X,Y,0,EqOnly) Op 0) { Action; }; \
}
+#define erts_float_comp(x,y) (((x)<(y)) ? -1 : (((x)==(y)) ? 0 : 1))
+
+#if ERTS_GLB_INLINE_INCL_FUNC_DEF
+
+ERTS_GLB_INLINE int erts_cmp_atoms(Eterm a, Eterm b) {
+ Atom *aa = atom_tab(atom_val(a));
+ Atom *bb = atom_tab(atom_val(b));
+
+ byte *name_a, *name_b;
+ int len_a, len_b, diff;
+
+ diff = aa->ord0 - bb->ord0;
+
+ if (diff != 0) {
+ return diff;
+ }
+
+ name_a = &aa->name[3];
+ name_b = &bb->name[3];
+ len_a = aa->len-3;
+ len_b = bb->len-3;
+
+ if (len_a > 0 && len_b > 0) {
+ diff = sys_memcmp(name_a, name_b, MIN(len_a, len_b));
+
+ if (diff != 0) {
+ return diff;
+ }
+ }
+
+ return len_a - len_b;
+}
+
+ERTS_GLB_INLINE Sint erts_cmp(Eterm a, Eterm b, int exact, int eq_only) {
+ if (is_atom(a) && is_atom(b)) {
+ return erts_cmp_atoms(a, b);
+ } else if (is_both_small(a, b)) {
+ return (signed_val(a) - signed_val(b));
+ } else if (is_float(a) && is_float(b)) {
+ FloatDef af, bf;
+
+ GET_DOUBLE(a, af);
+ GET_DOUBLE(b, bf);
+
+ return erts_float_comp(af.fd, bf.fd);
+ }
+
+ return erts_cmp_compound(a,b,exact,eq_only);
+}
+
+#endif /* ERTS_GLB_INLINE_INCL_FUNC_DEF */
+
#endif
diff --git a/erts/emulator/beam/external.c b/erts/emulator/beam/external.c
index 621ba108ba..9a66e491f3 100644
--- a/erts/emulator/beam/external.c
+++ b/erts/emulator/beam/external.c
@@ -1953,7 +1953,8 @@ static Eterm erts_term_to_binary_int(Process* p, Eterm Term, int level, Uint fla
#define RETURN_STATE() \
do { \
- hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE+3); \
+ static const int TUPLE2_SIZE = 2 + 1; \
+ hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE + TUPLE2_SIZE); \
c_term = erts_mk_magic_ref(&hp, &MSO(p), context_b); \
res = TUPLE2(hp, Term, c_term); \
BUMP_ALL_REDS(p); \
diff --git a/erts/emulator/beam/utils.c b/erts/emulator/beam/utils.c
index 08f8ca9788..d81bd89a48 100644
--- a/erts/emulator/beam/utils.c
+++ b/erts/emulator/beam/utils.c
@@ -2615,27 +2615,6 @@ not_equal:
}
-/*
- * Lexically compare two strings of bytes (string s1 length l1 and s2 l2).
- *
- * s1 < s2 return -1
- * s1 = s2 return 0
- * s1 > s2 return +1
- */
-static int cmpbytes(byte *s1, int l1, byte *s2, int l2)
-{
- int i;
- i = 0;
- while((i < l1) && (i < l2)) {
- if (s1[i] < s2[i]) return(-1);
- if (s1[i] > s2[i]) return(1);
- i++;
- }
- if (l1 < l2) return(-1);
- if (l1 > l2) return(1);
- return(0);
-}
-
/*
* Compare objects.
@@ -2649,20 +2628,6 @@ static int cmpbytes(byte *s1, int l1, byte *s2, int l2)
*
*/
-
-#define float_comp(x,y) (((x)<(y)) ? -1 : (((x)==(y)) ? 0 : 1))
-
-int erts_cmp_atoms(Eterm a, Eterm b)
-{
- Atom *aa = atom_tab(atom_val(a));
- Atom *bb = atom_tab(atom_val(b));
- int diff = aa->ord0 - bb->ord0;
- if (diff)
- return diff;
- return cmpbytes(aa->name+3, aa->len-3,
- bb->name+3, bb->len-3);
-}
-
/* cmp(Eterm a, Eterm b)
* For compatibility with HiPE - arith-based compare.
*/
@@ -2673,22 +2638,6 @@ Sint cmp(Eterm a, Eterm b)
Sint erts_cmp_compound(Eterm a, Eterm b, int exact, int eq_only);
-Sint erts_cmp(Eterm a, Eterm b, int exact, int eq_only)
-{
- if (is_atom(a) && is_atom(b)) {
- return erts_cmp_atoms(a, b);
- } else if (is_both_small(a, b)) {
- return (signed_val(a) - signed_val(b));
- } else if (is_float(a) && is_float(b)) {
- FloatDef af, bf;
- GET_DOUBLE(a, af);
- GET_DOUBLE(b, bf);
- return float_comp(af.fd, bf.fd);
- }
- return erts_cmp_compound(a,b,exact,eq_only);
-}
-
-
/* erts_cmp(Eterm a, Eterm b, int exact)
* exact = 1 -> term-based compare
* exact = 0 -> arith-based compare
@@ -2985,7 +2934,7 @@ tailrecur_ne:
GET_DOUBLE(a, af);
GET_DOUBLE(b, bf);
- ON_CMP_GOTO(float_comp(af.fd, bf.fd));
+ ON_CMP_GOTO(erts_float_comp(af.fd, bf.fd));
}
case (_TAG_HEADER_POS_BIG >> _TAG_PRIMARY_SIZE):
case (_TAG_HEADER_NEG_BIG >> _TAG_PRIMARY_SIZE):
@@ -3022,10 +2971,7 @@ tailrecur_ne:
ErlFunThing* f2 = (ErlFunThing *) fun_val(b);
Sint diff;
- diff = cmpbytes(atom_tab(atom_val(f1->fe->module))->name,
- atom_tab(atom_val(f1->fe->module))->len,
- atom_tab(atom_val(f2->fe->module))->name,
- atom_tab(atom_val(f2->fe->module))->len);
+ diff = erts_cmp_atoms((f1->fe)->module, (f2->fe)->module);
if (diff != 0) {
RETURN_NEQ(diff);
}
@@ -3219,7 +3165,7 @@ tailrecur_ne:
if (f2.fd < MAX_LOSSLESS_FLOAT && f2.fd > MIN_LOSSLESS_FLOAT) {
/* Float is within the no loss limit */
f1.fd = signed_val(aw);
- j = float_comp(f1.fd, f2.fd);
+ j = erts_float_comp(f1.fd, f2.fd);
}
#if ERTS_SIZEOF_ETERM == 8
else if (f2.fd > (double) (MAX_SMALL + 1)) {
@@ -3266,7 +3212,7 @@ tailrecur_ne:
if (big_to_double(aw, &f1.fd) < 0) {
j = big_sign(aw) ? -1 : 1;
} else {
- j = float_comp(f1.fd, f2.fd);
+ j = erts_float_comp(f1.fd, f2.fd);
}
} else {
big = double_to_big(f2.fd, big_buf, sizeof(big_buf)/sizeof(Eterm));
@@ -3282,7 +3228,7 @@ tailrecur_ne:
if (f1.fd < MAX_LOSSLESS_FLOAT && f1.fd > MIN_LOSSLESS_FLOAT) {
/* Float is within the no loss limit */
f2.fd = signed_val(bw);
- j = float_comp(f1.fd, f2.fd);
+ j = erts_float_comp(f1.fd, f2.fd);
}
#if ERTS_SIZEOF_ETERM == 8
else if (f1.fd > (double) (MAX_SMALL + 1)) {
diff --git a/lib/stdlib/doc/src/lists.xml b/lib/stdlib/doc/src/lists.xml
index c3d5d7e07a..e4215a5336 100644
--- a/lib/stdlib/doc/src/lists.xml
+++ b/lib/stdlib/doc/src/lists.xml
@@ -850,14 +850,6 @@ splitwith(Pred, List) ->
> <input>lists:subtract("123212", "212").</input>
"312".</pre>
<p><c>lists:subtract(A, B)</c> is equivalent to <c>A -- B</c>.</p>
- <warning>
- <p>The complexity of <c>lists:subtract(A, B)</c> is proportional to
- <c>length(A)*length(B)</c>, meaning that it is very slow if both
- <c>A</c> and <c>B</c> are long lists. (If both lists are long, it
- is a much better choice to use ordered lists and
- <seealso marker="ordsets#subtract/2">
- <c>ordsets:subtract/2</c></seealso>.</p>
- </warning>
</desc>
</func>
diff --git a/lib/stdlib/test/lists_SUITE.erl b/lib/stdlib/test/lists_SUITE.erl
index 837ab4e97e..9a94bcc012 100644
--- a/lib/stdlib/test/lists_SUITE.erl
+++ b/lib/stdlib/test/lists_SUITE.erl
@@ -2597,6 +2597,13 @@ subtract(Config) when is_list(Config) ->
{'EXIT',_} = (catch sub([a|b], [])),
{'EXIT',_} = (catch sub([a|b], [a])),
+ %% Trapping, both crashing and otherwise.
+ [sub_trapping(N) || N <- lists:seq(0, 18)],
+
+ %% The current implementation chooses which algorithm to use based on
+ %% certain thresholds, and we need proper coverage for all corner cases.
+ [sub_thresholds(N) || N <- lists:seq(0, 32)],
+
ok.
sub_non_matching(A, B) ->
@@ -2606,6 +2613,41 @@ sub(A, B) ->
Res = A -- B,
Res = lists:subtract(A, B).
+sub_trapping(N) ->
+ List = lists:duplicate(N + (1 bsl N), gurka),
+ ImproperList = List ++ crash,
+
+ {'EXIT',_} = (catch sub_trapping_1(ImproperList, [])),
+ {'EXIT',_} = (catch sub_trapping_1(List, ImproperList)),
+
+ List = List -- lists:duplicate(N + (1 bsl N), gaffel),
+ ok = sub_trapping_1(List, []).
+
+sub_trapping_1([], _) -> ok;
+sub_trapping_1(L, R) -> sub_trapping_1(L -- R, [gurka | R]).
+
+sub_thresholds(N) ->
+ %% This needs to be long enough to cause trapping.
+ OtherLen = 1 bsl 18,
+ Other = lists:seq(0, OtherLen - 1),
+
+ Disjoint = lists:seq(-N, -1),
+ Subset = lists:seq(1, N),
+
+ %% LHS is disjoint from RHS, so all elements must be retained.
+ Disjoint = Disjoint -- Other,
+
+ %% LHS is covered by RHS, so all elements must be removed.
+ [] = Subset -- Other,
+
+ %% RHS is disjoint from LHS, so all elements must be retained.
+ Other = Other -- Disjoint,
+
+ %% RHS is covered by LHS, so N elements must be removed.
+ N = OtherLen - length(Other -- Subset),
+
+ ok.
+
%% Test lists:droplast/1
droplast(Config) when is_list(Config) ->
[] = lists:droplast([x]),
diff --git a/system/doc/efficiency_guide/commoncaveats.xml b/system/doc/efficiency_guide/commoncaveats.xml
index b41ffc3902..367da09ba3 100644
--- a/system/doc/efficiency_guide/commoncaveats.xml
+++ b/system/doc/efficiency_guide/commoncaveats.xml
@@ -169,53 +169,5 @@ multiple_setelement(T0) ->
{Bin1,Bin2} = split_binary(Bin, Num)</code>
</section>
- <section>
- <title>Operator "--"</title>
- <p>The "<c>--</c>" operator has a complexity
- proportional to the product of the length of its operands.
- This means that the operator is very slow if both of its operands
- are long lists:</p>
-
- <p><em>DO NOT</em></p>
- <code type="none"><![CDATA[
- HugeList1 -- HugeList2]]></code>
-
- <p>Instead use the <seealso marker="stdlib:ordsets">ordsets</seealso>
- module in STDLIB:</p>
-
- <p><em>DO</em></p>
- <code type="none">
- HugeSet1 = ordsets:from_list(HugeList1),
- HugeSet2 = ordsets:from_list(HugeList2),
- ordsets:subtract(HugeSet1, HugeSet2)</code>
-
- <p>Obviously, that code does not work if the original order
- of the list is important. If the order of the list must be
- preserved, do as follows:</p>
-
- <p><em>DO</em></p>
- <code type="none"><![CDATA[
- Set = gb_sets:from_list(HugeList2),
- [E || E <- HugeList1, not gb_sets:is_element(E, Set)]]]></code>
-
- <note><p>This code behaves differently from "<c>--</c>"
- if the lists contain duplicate elements (one occurrence
- of an element in HugeList2 removes <em>all</em>
- occurrences in HugeList1.)</p>
- <p>Also, this code compares lists elements using the
- "<c>==</c>" operator, while "<c>--</c>" uses the "<c>=:=</c>" operator.
- If that difference is important, <c>sets</c> can be used instead of
- <c>gb_sets</c>, but <c>sets:from_list/1</c> is much
- slower than <c>gb_sets:from_list/1</c> for long lists.</p></note>
-
- <p>Using the "<c>--</c>" operator to delete an element
- from a list is not a performance problem:</p>
-
- <p><em>OK</em></p>
- <code type="none">
- HugeList1 -- [Element]</code>
-
- </section>
-
</chapter>
diff --git a/system/doc/efficiency_guide/retired_myths.xml b/system/doc/efficiency_guide/retired_myths.xml
index 9b914a3b6e..144c942c2b 100644
--- a/system/doc/efficiency_guide/retired_myths.xml
+++ b/system/doc/efficiency_guide/retired_myths.xml
@@ -60,4 +60,18 @@
That leads us to the myth that tail-recursive functions are faster
than body-recursive functions.</p>
</section>
+
+ <section>
+ <title>Myth: List subtraction ("--" operator) is slow</title>
+
+ <p>List subtraction used to have a run-time complexity proportional to the
+ product of the length of its operands, so it was extremely slow when both
+ lists were long.</p>
+
+ <p>As of OTP 22 the run-time complexity is "n log n" and the operation will
+ complete quickly even when both lists are very long. In fact, it is
+ faster and uses less memory than the commonly used workaround to convert
+ both lists to ordered sets before subtracting them with
+ <c>ordsets:subtract/2</c>.</p>
+ </section>
</chapter>