18 files changed, 859 insertions, 150 deletions
diff --git a/OTP_VERSION b/OTP_VERSION
index 7e32f799c0..bdfd396d4a 100644
--- a/OTP_VERSION
+++ b/OTP_VERSION
@@ -1 +1 @@
-20.3.8.11
+20.3.8.13
diff --git a/erts/doc/src/notes.xml b/erts/doc/src/notes.xml
index e339b22e98..b909e18dc6 100644
--- a/erts/doc/src/notes.xml
+++ b/erts/doc/src/notes.xml
@@ -31,6 +31,21 @@
   </header>
   <p>This document describes the changes made to the ERTS application.</p>
 
+<section><title>Erts 9.3.3.6</title>
+
+    <section><title>Improvements and New Features</title>
+      <list>
+        <item>
+	    <p>List subtraction (The <c>--</c> operator) will now
+	    yield properly on large inputs.</p>
+          <p>
+	    Own Id: OTP-15371</p>
+        </item>
+      </list>
+    </section>
+
+</section>
+
 <section><title>Erts 9.3.3.5</title>
 
     <section><title>Fixed Bugs and Malfunctions</title>
diff --git a/erts/emulator/beam/erl_alloc.types b/erts/emulator/beam/erl_alloc.types
index 252bf1cc7e..af1133b853 100644
--- a/erts/emulator/beam/erl_alloc.types
+++ b/erts/emulator/beam/erl_alloc.types
@@ -322,6 +322,7 @@ type	THR_PRGR_DATA	LONG_LIVED	SYSTEM		thr_prgr_data
 type	T_THR_PRGR_DATA	SHORT_LIVED	SYSTEM		temp_thr_prgr_data
 type	RELEASE_LAREA	SHORT_LIVED	SYSTEM		release_literal_area
 +endif
+type    LIST_TRAP       SHORT_LIVED     PROCESSES       list_bif_trap_state
 
 #
 # Types used for special emulators
diff --git a/erts/emulator/beam/erl_bif_lists.c b/erts/emulator/beam/erl_bif_lists.c
index 73d327da3e..94a41c285a 100644
--- a/erts/emulator/beam/erl_bif_lists.c
+++ b/erts/emulator/beam/erl_bif_lists.c
@@ -29,12 +29,13 @@
 #include "sys.h"
 #include "erl_vm.h"
 #include "global.h"
-#include "erl_process.h"
-#include "error.h"
 #include "bif.h"
+#include "erl_binary.h"
+
 
 static Eterm keyfind(int Bif, Process* p, Eterm Key, Eterm Pos, Eterm List);
 
+
 static BIF_RETTYPE append(Process* p, Eterm A, Eterm B)
 {
     Eterm list;
@@ -146,103 +147,725 @@ BIF_RETTYPE append_2(BIF_ALIST_2)
     return append(BIF_P, BIF_ARG_1, BIF_ARG_2);
 }
 
-/*
- * erlang:'--'/2
- */
+/* erlang:'--'/2
+ *
+ * Subtracts a list from another (LHS -- RHS), removing the first occurrence of
+ * each element in LHS from RHS. There is no type coercion so the elements must
+ * match exactly.
+ *
+ * The BIF is broken into several stages that can all trap individually, and it
+ * chooses its algorithm based on input size. If either input is small it will
+ * use a linear scan tuned to which side it's on, and if both inputs are large
+ * enough it will convert RHS into a multiset to provide good asymptotic
+ * behavior. */
 
-#define SMALL_VEC_SIZE 10
-static Eterm subtract(Process* p, Eterm A, Eterm B)
-{
-    Eterm  list;
-    Eterm* hp;
-    Uint  need;
-    Eterm  res;
-    Eterm small_vec[SMALL_VEC_SIZE];	/* Preallocated memory for small lists */
-    Eterm* vec_p;
-    Eterm* vp;
-    Sint i;
-    Sint n;
-    Sint m;
-    
-    if ((n = erts_list_length(A)) < 0) {
-	BIF_ERROR(p, BADARG);
+#define SUBTRACT_LHS_THRESHOLD 16
+#define SUBTRACT_RHS_THRESHOLD 16
+
+typedef enum {
+    SUBTRACT_STAGE_START,
+    SUBTRACT_STAGE_LEN_LHS,
+
+    /* Naive linear scan that's efficient when
+     * LEN_LHS <= SUBTRACT_LHS_THRESHOLD. */
+    SUBTRACT_STAGE_NAIVE_LHS,
+
+    SUBTRACT_STAGE_LEN_RHS,
+
+    /* As SUBTRACT_STAGE_NAIVE_LHS but for RHS. */
+    SUBTRACT_STAGE_NAIVE_RHS,
+
+    /* Creates a multiset from RHS for faster lookups before sweeping through
+     * LHS. The set is implemented as a red-black tree and duplicate elements
+     * are handled by a counter on each node. */
+    SUBTRACT_STAGE_SET_BUILD,
+    SUBTRACT_STAGE_SET_FINISH
+} ErtsSubtractCtxStage;
+
+typedef struct subtract_node__ {
+    struct subtract_node__ *parent;
+    struct subtract_node__ *left;
+    struct subtract_node__ *right;
+    int is_red;
+
+    Eterm key;
+    Uint count;
+} subtract_tree_t;
+
+typedef struct {
+    ErtsSubtractCtxStage stage;
+
+    Eterm lhs_original;
+    Eterm rhs_original;
+
+    Uint lhs_remaining;
+    Uint rhs_remaining;
+
+    Eterm iterator;
+
+    Eterm *result_cdr;
+    Eterm result;
+
+    union {
+        Eterm lhs_elements[SUBTRACT_LHS_THRESHOLD];
+        Eterm rhs_elements[SUBTRACT_RHS_THRESHOLD];
+
+        struct {
+            subtract_tree_t *tree;
+
+            /* A memory area for the tree's nodes, saving us the need to have
+             * one allocation per node. */
+            subtract_tree_t *alloc_start;
+            subtract_tree_t *alloc;
+        } rhs_set;
+    } u;
+} ErtsSubtractContext;
+
+#define ERTS_RBT_PREFIX subtract
+#define ERTS_RBT_T subtract_tree_t
+#define ERTS_RBT_KEY_T Eterm
+#define ERTS_RBT_FLAGS_T int
+#define ERTS_RBT_INIT_EMPTY_TNODE(T) \
+    do { \
+        (T)->parent = NULL; \
+        (T)->left = NULL; \
+        (T)->right = NULL; \
+    } while(0)
+#define ERTS_RBT_IS_RED(T) ((T)->is_red)
+#define ERTS_RBT_SET_RED(T) ((T)->is_red = 1)
+#define ERTS_RBT_IS_BLACK(T) (!ERTS_RBT_IS_RED(T))
+#define ERTS_RBT_SET_BLACK(T) ((T)->is_red = 0)
+#define ERTS_RBT_GET_FLAGS(T) ((T)->is_red)
+#define ERTS_RBT_SET_FLAGS(T, F) ((T)->is_red = F)
+#define ERTS_RBT_GET_PARENT(T) ((T)->parent)
+#define ERTS_RBT_SET_PARENT(T, P) ((T)->parent = P)
+#define ERTS_RBT_GET_RIGHT(T) ((T)->right)
+#define ERTS_RBT_SET_RIGHT(T, R) ((T)->right = (R))
+#define ERTS_RBT_GET_LEFT(T) ((T)->left)
+#define ERTS_RBT_SET_LEFT(T, L) ((T)->left = (L))
+#define ERTS_RBT_GET_KEY(T) ((T)->key)
+#define ERTS_RBT_IS_LT(KX, KY) (CMP_TERM(KX, KY) < 0)
+#define ERTS_RBT_IS_EQ(KX, KY) EQ(KX, KY)
+#define ERTS_RBT_WANT_LOOKUP_INSERT
+#define ERTS_RBT_WANT_LOOKUP
+#define ERTS_RBT_WANT_DELETE
+#define ERTS_RBT_UNDEF
+
+#include "erl_rbtree.h"
+
+static int subtract_continue(Process *p, ErtsSubtractContext *context);
+
+static void subtract_ctx_dtor(ErtsSubtractContext *context) {
+    switch (context->stage) {
+        case SUBTRACT_STAGE_SET_BUILD:
+        case SUBTRACT_STAGE_SET_FINISH:
+            erts_free(ERTS_ALC_T_LIST_TRAP, context->u.rhs_set.alloc_start);
+            break;
+        default:
+            break;
     }
-    if ((m = erts_list_length(B)) < 0) {
-	BIF_ERROR(p, BADARG);
+}
+
+static int subtract_ctx_bin_dtor(Binary *context_bin) {
+    ErtsSubtractContext *context = ERTS_MAGIC_BIN_DATA(context_bin);
+    subtract_ctx_dtor(context);
+    return 1;
+}
+
+static void subtract_ctx_move(ErtsSubtractContext *from,
+                              ErtsSubtractContext *to) {
+    int uses_result_cdr = 0;
+
+    to->stage = from->stage;
+
+    to->lhs_original = from->lhs_original;
+    to->rhs_original = from->rhs_original;
+
+    to->lhs_remaining = from->lhs_remaining;
+    to->rhs_remaining = from->rhs_remaining;
+
+    to->iterator = from->iterator;
+    to->result = from->result;
+
+    switch (to->stage) {
+        case SUBTRACT_STAGE_NAIVE_LHS:
+            sys_memcpy(to->u.lhs_elements,
+                       from->u.lhs_elements,
+                       sizeof(Eterm) * to->lhs_remaining);
+            break;
+        case SUBTRACT_STAGE_NAIVE_RHS:
+            sys_memcpy(to->u.rhs_elements,
+                       from->u.rhs_elements,
+                       sizeof(Eterm) * to->rhs_remaining);
+
+            uses_result_cdr = 1;
+            break;
+        case SUBTRACT_STAGE_SET_FINISH:
+            uses_result_cdr = 1;
+            /* FALL THROUGH */
+        case SUBTRACT_STAGE_SET_BUILD:
+            to->u.rhs_set.alloc_start = from->u.rhs_set.alloc_start;
+            to->u.rhs_set.alloc = from->u.rhs_set.alloc;
+            to->u.rhs_set.tree = from->u.rhs_set.tree;
+            break;
+        default:
+            break;
     }
-    
-    if (n == 0)
-	BIF_RET(NIL);
-    if (m == 0)
-	BIF_RET(A);
-    
-    /* allocate element vector */
-    if (n <= SMALL_VEC_SIZE)
-	vec_p = small_vec;
-    else
-	vec_p = (Eterm*) erts_alloc(ERTS_ALC_T_TMP, n * sizeof(Eterm));
-    
-    /* PUT ALL ELEMENTS IN VP */
-    vp = vec_p;
-    list = A;
-    i = n;
-    while(i--) {
-	Eterm* listp = list_val(list);
-	*vp++ = CAR(listp);
-	list = CDR(listp);
+
+    if (uses_result_cdr) {
+        if (from->result_cdr == &from->result) {
+            to->result_cdr = &to->result;
+        } else {
+            to->result_cdr = from->result_cdr;
+        }
     }
-    
-    /* UNMARK ALL DELETED CELLS */
-    list = B;
-    m = 0;  /* number of deleted elements */
-    while(is_list(list)) {
-	Eterm* listp = list_val(list);
-	Eterm  elem = CAR(listp);
-	i = n;
-	vp = vec_p;
-	while(i--) {
-	    if (is_value(*vp) && eq(*vp, elem)) {
-		*vp = THE_NON_VALUE;
-		m++;
-		break;
-	    }
-	    vp++;
-	}
-	list = CDR(listp);
+}
+
+static Eterm subtract_create_trap_state(Process *p,
+                                        ErtsSubtractContext *context) {
+    Binary *state_bin;
+    Eterm *hp;
+
+    state_bin = erts_create_magic_binary(sizeof(ErtsSubtractContext),
+                                         subtract_ctx_bin_dtor);
+
+    subtract_ctx_move(context, ERTS_MAGIC_BIN_DATA(state_bin));
+
+    hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE);
+
+    return erts_mk_magic_ref(&hp, &MSO(p), state_bin);
+}
+
+static int subtract_enter_len_lhs(Process *p, ErtsSubtractContext *context) {
+    context->stage = SUBTRACT_STAGE_LEN_LHS;
+
+    context->iterator = context->lhs_original;
+    context->lhs_remaining = 0;
+
+    return subtract_continue(p, context);
+}
+
+static int subtract_enter_len_rhs(Process *p, ErtsSubtractContext *context) {
+    context->stage = SUBTRACT_STAGE_LEN_RHS;
+
+    context->iterator = context->rhs_original;
+    context->rhs_remaining = 0;
+
+    return subtract_continue(p, context);
+}
+
+static int subtract_get_length(Process *p, Eterm *iterator_p, Uint *count_p) {
+    static const Sint ELEMENTS_PER_RED = 32;
+
+    Sint budget, count;
+    Eterm iterator;
+
+    budget = ELEMENTS_PER_RED * ERTS_BIF_REDS_LEFT(p);
+    iterator = *iterator_p;
+
+#ifdef DEBUG
+    budget = budget / 10 + 1;
+#endif
+
+    for (count = 0; count < budget && is_list(iterator); count++) {
+        iterator = CDR(list_val(iterator));
     }
-    
-    if (m == n)      /* All deleted ? */
-	res = NIL;
-    else if (m == 0)  /* None deleted ? */
-	res = A;
-    else {			/* REBUILD LIST */
-	res = NIL;
-	need = 2*(n - m);
-	hp = HAlloc(p, need);
-	vp = vec_p + n - 1;
-	while(vp >= vec_p) {
-	    if (is_value(*vp)) {
-		res = CONS(hp, *vp, res);
-		hp += 2;
-	    }
-	    vp--;
-	}
+
+    if (!is_list(iterator) && !is_nil(iterator)) {
+        return -1;
+    }
+
+    BUMP_REDS(p, count / ELEMENTS_PER_RED);
+
+    *iterator_p = iterator;
+    *count_p += count;
+
+    if (is_nil(iterator)) {
+        return 1;
     }
-    if (vec_p != small_vec)
-	erts_free(ERTS_ALC_T_TMP, (void *) vec_p);
-    BIF_RET(res);
+
+    return 0;
 }
 
-BIF_RETTYPE ebif_minusminus_2(BIF_ALIST_2)
-{
-    return subtract(BIF_P, BIF_ARG_1, BIF_ARG_2);
+static int subtract_enter_naive_lhs(Process *p, ErtsSubtractContext *context) {
+    Eterm iterator;
+    int i = 0;
+
+    context->stage = SUBTRACT_STAGE_NAIVE_LHS;
+
+    context->iterator = context->rhs_original;
+    context->result = NIL;
+
+    iterator = context->lhs_original;
+
+    while (is_list(iterator)) {
+        const Eterm *cell = list_val(iterator);
+
+        ASSERT(i < SUBTRACT_LHS_THRESHOLD);
+
+        context->u.lhs_elements[i++] = CAR(cell);
+        iterator = CDR(cell);
+    }
+
+    ASSERT(i == context->lhs_remaining);
+
+    return subtract_continue(p, context);
 }
 
-BIF_RETTYPE subtract_2(BIF_ALIST_2)
-{
-    return subtract(BIF_P, BIF_ARG_1, BIF_ARG_2);
+static int subtract_naive_lhs(Process *p, ErtsSubtractContext *context) {
+    const Sint CHECKS_PER_RED = 16;
+    Sint checks, budget;
+
+    budget = CHECKS_PER_RED * ERTS_BIF_REDS_LEFT(p);
+    checks = 0;
+
+    while (checks < budget && is_list(context->iterator)) {
+        const Eterm *cell;
+        Eterm value, next;
+        int found_at;
+
+        cell = list_val(context->iterator);
+
+        value = CAR(cell);
+        next = CDR(cell);
+
+        for (found_at = 0; found_at < context->lhs_remaining; found_at++) {
+            if (EQ(value, context->u.lhs_elements[found_at])) {
+                /* We shift the array one step down as we have to preserve
+                 * order.
+                 *
+                 * Note that we can't exit early as that would suppress errors
+                 * in the right-hand side (this runs prior to determining the
+                 * length of RHS). */
+
+                context->lhs_remaining--;
+                sys_memmove(&context->u.lhs_elements[found_at],
+                            &context->u.lhs_elements[found_at + 1],
+                            (context->lhs_remaining - found_at) * sizeof(Eterm));
+                break;
+            }
+        }
+
+        checks += MAX(1, context->lhs_remaining);
+        context->iterator = next;
+    }
+
+    BUMP_REDS(p, MIN(checks, budget) / CHECKS_PER_RED);
+
+    if (is_list(context->iterator)) {
+        return 0;
+    } else if (!is_nil(context->iterator)) {
+        return -1;
+    }
+
+    if (context->lhs_remaining > 0) {
+        Eterm *hp;
+        int i;
+
+        hp = HAlloc(p, context->lhs_remaining * 2);
+
+        for (i = context->lhs_remaining - 1; i >= 0; i--) {
+            Eterm value = context->u.lhs_elements[i];
+
+            context->result = CONS(hp, value, context->result);
+            hp += 2;
+        }
+    }
+
+    ASSERT(context->lhs_remaining > 0 || context->result == NIL);
+
+    return 1;
+}
+
+static int subtract_enter_naive_rhs(Process *p, ErtsSubtractContext *context) {
+    Eterm iterator;
+    int i = 0;
+
+    context->stage = SUBTRACT_STAGE_NAIVE_RHS;
+
+    context->iterator = context->lhs_original;
+    context->result_cdr = &context->result;
+    context->result = NIL;
+
+    iterator = context->rhs_original;
+
+    while (is_list(iterator)) {
+        const Eterm *cell = list_val(iterator);
+
+        ASSERT(i < SUBTRACT_RHS_THRESHOLD);
+
+        context->u.rhs_elements[i++] = CAR(cell);
+        iterator = CDR(cell);
+    }
+
+    ASSERT(i == context->rhs_remaining);
+
+    return subtract_continue(p, context);
+}
+
+static int subtract_naive_rhs(Process *p, ErtsSubtractContext *context) {
+    const Sint CHECKS_PER_RED = 16;
+    Sint checks, budget;
+
+    budget = CHECKS_PER_RED * ERTS_BIF_REDS_LEFT(p);
+    checks = 0;
+
+#ifdef DEBUG
+    budget = budget / 10 + 1;
+#endif
+
+    while (checks < budget && is_list(context->iterator)) {
+        const Eterm *cell;
+        Eterm value, next;
+        int found_at;
+
+        cell = list_val(context->iterator);
+        value = CAR(cell);
+        next = CDR(cell);
+
+        for (found_at = context->rhs_remaining - 1; found_at >= 0; found_at--) {
+            if (EQ(value, context->u.rhs_elements[found_at])) {
+                break;
+            }
+        }
+
+        if (found_at < 0) {
+            /* Destructively add the value to the result. This is safe
+             * since the GC is disabled and the unfinished term is never
+             * leaked to the outside world. */
+            Eterm *hp = HAllocX(p, 2, context->lhs_remaining * 2);
+
+            *context->result_cdr = make_list(hp);
+            context->result_cdr = &CDR(hp);
+
+            CAR(hp) = value;
+        } else if (found_at >= 0) {
+            Eterm swap;
+
+            if (context->rhs_remaining-- == 1) {
+                /* We've run out of items to remove, so the rest of the
+                 * result will be equal to the remainder of the input. We know
+                 * that LHS is well-formed as any errors would've been reported
+                 * during length determination. */
+                *context->result_cdr = next;
+
+                BUMP_REDS(p, MIN(budget, checks) / CHECKS_PER_RED);
+
+                return 1;
+            }
+
+            swap = context->u.rhs_elements[context->rhs_remaining];
+            context->u.rhs_elements[found_at] = swap;
+        }
+
+        checks += context->rhs_remaining;
+        context->iterator = next;
+        context->lhs_remaining--;
+    }
+
+    /* The result only has to be terminated when returning it to the user, but
+     * we're doing it when trapping as well to prevent headaches when
+     * debugging. */
+    *context->result_cdr = NIL;
+
+    BUMP_REDS(p, MIN(budget, checks) / CHECKS_PER_RED);
+
+    if (is_list(context->iterator)) {
+        ASSERT(context->lhs_remaining > 0 && context->rhs_remaining > 0);
+        return 0;
+    }
+
+    return 1;
+}
+
+static int subtract_enter_set_build(Process *p, ErtsSubtractContext *context) {
+    context->stage = SUBTRACT_STAGE_SET_BUILD;
+
+    context->u.rhs_set.alloc_start =
+        erts_alloc(ERTS_ALC_T_LIST_TRAP,
+                   context->rhs_remaining * sizeof(subtract_tree_t));
+
+    context->u.rhs_set.alloc = context->u.rhs_set.alloc_start;
+    context->u.rhs_set.tree = NULL;
+
+    context->iterator = context->rhs_original;
+
+    return subtract_continue(p, context);
+}
+
+static int subtract_set_build(Process *p, ErtsSubtractContext *context) {
+    const static Sint INSERTIONS_PER_RED = 16;
+    Sint budget, insertions;
+
+    budget = INSERTIONS_PER_RED * ERTS_BIF_REDS_LEFT(p);
+    insertions = 0;
+
+#ifdef DEBUG
+    budget = budget / 10 + 1;
+#endif
+
+    while (insertions < budget && is_list(context->iterator)) {
+        subtract_tree_t *existing_node, *new_node;
+        const Eterm *cell;
+        Eterm value, next;
+
+        cell = list_val(context->iterator);
+        value = CAR(cell);
+        next = CDR(cell);
+
+        new_node = context->u.rhs_set.alloc;
+        new_node->key = value;
+        new_node->count = 1;
+
+        existing_node = subtract_rbt_lookup_insert(&context->u.rhs_set.tree,
+                                                   new_node);
+
+        if (existing_node != NULL) {
+            existing_node->count++;
+        } else {
+            context->u.rhs_set.alloc++;
+        }
+
+        context->iterator = next;
+        insertions++;
+    }
+
+    BUMP_REDS(p, insertions / INSERTIONS_PER_RED);
+
+    ASSERT(is_list(context->iterator) || is_nil(context->iterator));
+    ASSERT(context->u.rhs_set.tree != NULL);
+
+    return is_nil(context->iterator);
+}
+
+static int subtract_enter_set_finish(Process *p, ErtsSubtractContext *context) {
+    context->stage = SUBTRACT_STAGE_SET_FINISH;
+
+    context->result_cdr = &context->result;
+    context->result = NIL;
+
+    context->iterator = context->lhs_original;
+
+    return subtract_continue(p, context);
+}
+
+static int subtract_set_finish(Process *p, ErtsSubtractContext *context) {
+    const Sint CHECKS_PER_RED = 8;
+    Sint checks, budget;
+
+    budget = CHECKS_PER_RED * ERTS_BIF_REDS_LEFT(p);
+    checks = 0;
+
+#ifdef DEBUG
+    budget = budget / 10 + 1;
+#endif
+
+    while (checks < budget && is_list(context->iterator)) {
+        subtract_tree_t *node;
+        const Eterm *cell;
+        Eterm value, next;
+
+        cell = list_val(context->iterator);
+        value = CAR(cell);
+        next = CDR(cell);
+
+        ASSERT(context->rhs_remaining > 0);
+
+        node = subtract_rbt_lookup(context->u.rhs_set.tree, value);
+
+        if (node == NULL) {
+            Eterm *hp = HAllocX(p, 2, context->lhs_remaining * 2);
+
+            *context->result_cdr = make_list(hp);
+            context->result_cdr = &CDR(hp);
+
+            CAR(hp) = value;
+        } else {
+            if (context->rhs_remaining-- == 1) {
+                *context->result_cdr = next;
+
+                BUMP_REDS(p, checks / CHECKS_PER_RED);
+
+                return 1;
+            }
+
+            if (node->count-- == 1) {
+                subtract_rbt_delete(&context->u.rhs_set.tree, node);
+            }
+        }
+
+        context->iterator = next;
+        context->lhs_remaining--;
+        checks++;
+    }
+
+    *context->result_cdr = NIL;
+
+    BUMP_REDS(p, checks / CHECKS_PER_RED);
+
+    if (is_list(context->iterator)) {
+        ASSERT(context->lhs_remaining > 0 && context->rhs_remaining > 0);
+        return 0;
+    }
+
+    return 1;
+}
+
+static int subtract_continue(Process *p, ErtsSubtractContext *context) {
+    switch (context->stage) {
+        case SUBTRACT_STAGE_START: {
+            return subtract_enter_len_lhs(p, context);
+        }
+
+        case SUBTRACT_STAGE_LEN_LHS: {
+            int res = subtract_get_length(p,
+                                          &context->iterator,
+                                          &context->lhs_remaining);
+
+            if (res != 1) {
+                return res;
+            }
+
+            if (context->lhs_remaining <= SUBTRACT_LHS_THRESHOLD) {
+                return subtract_enter_naive_lhs(p, context);
+            }
+
+            return subtract_enter_len_rhs(p, context);
+        }
+
+        case SUBTRACT_STAGE_NAIVE_LHS: {
+            return subtract_naive_lhs(p, context);
+        }
+
+        case SUBTRACT_STAGE_LEN_RHS: {
+            int res = subtract_get_length(p,
+                                          &context->iterator,
+                                          &context->rhs_remaining);
+
+            if (res != 1) {
+                return res;
+            }
+
+            /* We've walked through both lists fully now so we no longer need
+             * to check for errors past this point. */
+
+            if (context->rhs_remaining <= SUBTRACT_RHS_THRESHOLD) {
+                return subtract_enter_naive_rhs(p, context);
+            }
+
+            return subtract_enter_set_build(p, context);
+        }
+
+        case SUBTRACT_STAGE_NAIVE_RHS: {
+            return subtract_naive_rhs(p, context);
+        }
+
+        case SUBTRACT_STAGE_SET_BUILD: {
+            int res = subtract_set_build(p, context);
+
+            if (res != 1) {
+                return res;
+            }
+
+            return subtract_enter_set_finish(p, context);
+        }
+
+        case SUBTRACT_STAGE_SET_FINISH: {
+            return subtract_set_finish(p, context);
+        }
+
+        default:
+            ERTS_ASSERT(!"unreachable");
+    }
+}
+
+static int subtract_start(Process *p, Eterm lhs, Eterm rhs,
+                          ErtsSubtractContext *context) {
+    context->stage = SUBTRACT_STAGE_START;
+
+    context->lhs_original = lhs;
+    context->rhs_original = rhs;
+
+    return subtract_continue(p, context);
 }
 
+/* erlang:'--'/2 */
+static Eterm subtract(Export *bif_entry, BIF_ALIST_2) {
+    Eterm lhs = BIF_ARG_1, rhs = BIF_ARG_2;
+
+    if ((is_list(lhs) || is_nil(lhs)) && (is_list(rhs) || is_nil(rhs))) {
+        /* We start with the context on the stack in the hopes that we won't
+         * have to trap. */
+        ErtsSubtractContext context;
+        int res;
+
+        res = subtract_start(BIF_P, lhs, rhs, &context);
+
+        if (res == 0) {
+            Eterm state_mref;
+
+            state_mref = subtract_create_trap_state(BIF_P, &context);
+            erts_set_gc_state(BIF_P, 0);
+
+            BIF_TRAP2(bif_entry, BIF_P, state_mref, NIL);
+        }
+
+        subtract_ctx_dtor(&context);
+
+        if (res < 0) {
+            BIF_ERROR(BIF_P, BADARG);
+        }
+
+        BIF_RET(context.result);
+    } else if (is_internal_magic_ref(lhs)) {
+        ErtsSubtractContext *context;
+        int (*dtor)(Binary*);
+        Binary *magic_bin;
+
+        int res;
+
+        magic_bin = erts_magic_ref2bin(lhs);
+        dtor = ERTS_MAGIC_BIN_DESTRUCTOR(magic_bin);
+
+        if (dtor != subtract_ctx_bin_dtor) {
+            BIF_ERROR(BIF_P, BADARG);
+        }
+
+        ASSERT(BIF_P->flags & F_DISABLE_GC);
+        ASSERT(rhs == NIL);
+
+        context = ERTS_MAGIC_BIN_DATA(magic_bin);
+        res = subtract_continue(BIF_P, context);
+
+        if (res == 0) {
+            BIF_TRAP2(bif_entry, BIF_P, lhs, NIL);
+        }
+
+        erts_set_gc_state(BIF_P, 1);
+
+        if (res < 0) {
+            ERTS_BIF_ERROR_TRAPPED2(BIF_P, BADARG, bif_entry,
+                                    context->lhs_original,
+                                    context->rhs_original);
+        }
+
+        BIF_RET(context->result);
+    }
+
+    ASSERT(!(BIF_P->flags & F_DISABLE_GC));
+
+    BIF_ERROR(BIF_P, BADARG);
+}
+
+BIF_RETTYPE ebif_minusminus_2(BIF_ALIST_2) {
+    return subtract(bif_export[BIF_ebif_minusminus_2], BIF_CALL_ARGS);
+}
+
+BIF_RETTYPE subtract_2(BIF_ALIST_2) {
+    return subtract(bif_export[BIF_subtract_2], BIF_CALL_ARGS);
+}
+
+
 BIF_RETTYPE lists_member_2(BIF_ALIST_2)
 {
     Eterm term;
diff --git a/erts/vsn.mk b/erts/vsn.mk
index b8ff161df9..71f4d0bec9 100644
--- a/erts/vsn.mk
+++ b/erts/vsn.mk
@@ -18,7 +18,7 @@
 # %CopyrightEnd%
 # 
 
-VSN = 9.3.3.5
+VSN = 9.3.3.6
 
 # Port number 4365 in 4.2
 # Port number 4366 in 4.3
diff --git a/lib/ssl/doc/src/notes.xml b/lib/ssl/doc/src/notes.xml
index b2a774adf0..caa1110307 100644
--- a/lib/ssl/doc/src/notes.xml
+++ b/lib/ssl/doc/src/notes.xml
@@ -27,6 +27,38 @@
   </header>
   <p>This document describes the changes made to the SSL application.</p>
 
+<section><title>SSL 8.2.6.4</title>
+
+    <section><title>Fixed Bugs and Malfunctions</title>
+      <list>
+        <item>
+          <p>
+	    Add engine support for RSA key exchange</p>
+          <p>
+	    Own Id: OTP-15420</p>
+        </item>
+      </list>
+    </section>
+
+</section>
+
+<section><title>SSL 8.2.6.3</title>
+
+    <section><title>Fixed Bugs and Malfunctions</title>
+      <list>
+        <item>
+          <p>
+	    Extend check for undelivered data at closing, could under
+	    some circumstances fail to deliverd all data that was
+	    acctualy recivied.</p>
+          <p>
+	    Own Id: OTP-15412</p>
+        </item>
+      </list>
+    </section>
+
+</section>
+
 <section><title>SSL 8.2.6.2</title>
 
     <section><title>Fixed Bugs and Malfunctions</title>
diff --git a/lib/ssl/src/ssl_handshake.erl b/lib/ssl/src/ssl_handshake.erl
index cd601c04c0..c5a87e28bc 100644
--- a/lib/ssl/src/ssl_handshake.erl
+++ b/lib/ssl/src/ssl_handshake.erl
@@ -898,6 +898,13 @@ premaster_secret(EncSecret, #'RSAPrivateKey'{} = RSAPrivateKey) ->
     catch
 	_:_ ->
 	    throw(?ALERT_REC(?FATAL, ?DECRYPT_ERROR))
+    end;
+premaster_secret(EncSecret, #{algorithm := rsa} = Engine) ->
+    try crypto:private_decrypt(rsa, EncSecret, maps:remove(algorithm, Engine),
+				   [{rsa_pad, rsa_pkcs1_padding}])
+    catch
+	_:_ ->
+	    throw(?ALERT_REC(?FATAL, ?DECRYPT_ERROR))
     end.
 %%====================================================================
 %% Extensions handling
diff --git a/lib/ssl/src/tls_connection.erl b/lib/ssl/src/tls_connection.erl
index 914ee9f22f..d3b3902fea 100644
--- a/lib/ssl/src/tls_connection.erl
+++ b/lib/ssl/src/tls_connection.erl
@@ -676,6 +676,7 @@ handle_info({CloseTag, Socket}, StateName,
             #state{socket = Socket, close_tag = CloseTag,
                    socket_options = #socket_options{active = Active},
                    protocol_buffers = #protocol_buffers{tls_cipher_texts = CTs},
+                   user_data_buffer = Buffer,
 		   negotiated_version = Version} = State) ->
 
     %% Note that as of TLS 1.1,
@@ -683,7 +684,7 @@ handle_info({CloseTag, Socket}, StateName,
     %% session not be resumed.  This is a change from TLS 1.0 to conform
     %% with widespread implementation practice.
 
-    case (Active == false) andalso (CTs =/= []) of
+    case (Active == false) andalso ((CTs =/= []) or (Buffer =/= <<>>)) of
         false ->
             case Version of
                 {1, N} when N >= 1 ->
diff --git a/lib/ssl/test/ssl_engine_SUITE.erl b/lib/ssl/test/ssl_engine_SUITE.erl
index 8025e4e0ed..c348fa0a9c 100644
--- a/lib/ssl/test/ssl_engine_SUITE.erl
+++ b/lib/ssl/test/ssl_engine_SUITE.erl
@@ -90,12 +90,14 @@ end_per_testcase(_TestCase, Config) ->
 private_key(Config) when is_list(Config) ->
     ClientFileBase = filename:join([proplists:get_value(priv_dir, Config), "client_engine"]),
     ServerFileBase = filename:join([proplists:get_value(priv_dir, Config), "server_engine"]),
+    Ext = x509_test:extensions([{key_usage, [digitalSignature, keyEncipherment]}]),
     #{server_config := ServerConf,
       client_config := ClientConf} = GenCertData =
         public_key:pkix_test_data(#{server_chain => 
                                         #{root => [{key, ssl_test_lib:hardcode_rsa_key(1)}],
                                           intermediates => [[{key, ssl_test_lib:hardcode_rsa_key(2)}]],
-                                          peer => [{key, ssl_test_lib:hardcode_rsa_key(3)}
+                                          peer => [{extensions, Ext},
+                                                   {key, ssl_test_lib:hardcode_rsa_key(3)}
                                                   ]},
                                     client_chain => 
                                         #{root => [{key, ssl_test_lib:hardcode_rsa_key(4)}],
@@ -131,6 +133,12 @@ private_key(Config) when is_list(Config) ->
     %% Test with engine
     test_tls_connection(EngineServerConf, EngineClientConf, Config),
     
+    %% Test with engine and rsa keyexchange
+    RSASuites = all_kex_rsa_suites([{tls_version, 'tlsv1.2'} | Config]),
+    
+    test_tls_connection([{ciphers, RSASuites}, {versions, ['tlsv1.2']} | EngineServerConf], 
+                        [{ciphers, RSASuites}, {versions, ['tlsv1.2']} | EngineClientConf], Config),
+    
     %% Test with engine and present file arugments
     test_tls_connection(EngineFileServerConf, EngineFileClientConf, Config),
     
@@ -160,3 +168,8 @@ test_tls_connection(ServerConf, ClientConf, Config) ->
     ssl_test_lib:check_result(Server, ok, Client, ok),
     ssl_test_lib:close(Server),
     ssl_test_lib:close(Client).
+
+all_kex_rsa_suites(Config) ->
+    Version = proplists:get_value(tls_version, Config),
+    All = ssl:cipher_suites(all, Version),
+    ssl:filter_cipher_suites(All,[{key_exchange, fun(rsa) -> true;(_) -> false end}]).
diff --git a/lib/ssl/vsn.mk b/lib/ssl/vsn.mk
index b46c1334cf..1280efa77c 100644
--- a/lib/ssl/vsn.mk
+++ b/lib/ssl/vsn.mk
@@ -1 +1 @@
-SSL_VSN = 8.2.6.2
+SSL_VSN = 8.2.6.4
diff --git a/lib/stdlib/doc/src/lists.xml b/lib/stdlib/doc/src/lists.xml
index 7efafedc82..55227aaee5 100644
--- a/lib/stdlib/doc/src/lists.xml
+++ b/lib/stdlib/doc/src/lists.xml
@@ -838,14 +838,6 @@ splitwith(Pred, List) ->
 > <input>lists:subtract("123212", "212").</input>
 "312".</pre>
         <p><c>lists:subtract(A, B)</c> is equivalent to <c>A -- B</c>.</p>
-        <warning>
-          <p>The complexity of <c>lists:subtract(A, B)</c> is proportional to
-            <c>length(A)*length(B)</c>, meaning that it is very slow if both
-            <c>A</c> and <c>B</c> are long lists. (If both lists are long, it
-            is a much better choice to use ordered lists and
-            <seealso marker="ordsets#subtract/2">
-            <c>ordsets:subtract/2</c></seealso>.</p>
-        </warning>
       </desc>
     </func>
 
diff --git a/lib/stdlib/doc/src/notes.xml b/lib/stdlib/doc/src/notes.xml
index e26c4aba74..3c7c8bf400 100644
--- a/lib/stdlib/doc/src/notes.xml
+++ b/lib/stdlib/doc/src/notes.xml
@@ -31,6 +31,21 @@
   </header>
   <p>This document describes the changes made to the STDLIB application.</p>
 
+<section><title>STDLIB 3.4.5.1</title>
+
+    <section><title>Improvements and New Features</title>
+      <list>
+        <item>
+	    <p>List subtraction (The <c>--</c> operator) will now
+	    yield properly on large inputs.</p>
+          <p>
+	    Own Id: OTP-15371</p>
+        </item>
+      </list>
+    </section>
+
+</section>
+
 <section><title>STDLIB 3.4.5</title>
 
     <section><title>Fixed Bugs and Malfunctions</title>
diff --git a/lib/stdlib/test/lists_SUITE.erl b/lib/stdlib/test/lists_SUITE.erl
index 7c99244b36..c380b3bba1 100644
--- a/lib/stdlib/test/lists_SUITE.erl
+++ b/lib/stdlib/test/lists_SUITE.erl
@@ -2597,6 +2597,13 @@ subtract(Config) when is_list(Config) ->
     {'EXIT',_} = (catch sub([a|b], [])),
     {'EXIT',_} = (catch sub([a|b], [a])),
 
+    %% Trapping, both crashing and otherwise.
+    [sub_trapping(N) || N <- lists:seq(0, 18)],
+
+    %% The current implementation chooses which algorithm to use based on
+    %% certain thresholds, and we need proper coverage for all corner cases.
+    [sub_thresholds(N) || N <- lists:seq(0, 32)],
+
     ok.
 
 sub_non_matching(A, B) ->
@@ -2606,6 +2613,41 @@ sub(A, B) ->
     Res = A -- B,
     Res = lists:subtract(A, B).
 
+sub_trapping(N) ->
+    List = lists:duplicate(N + (1 bsl N), gurka),
+    ImproperList = List ++ crash,
+
+    {'EXIT',_} = (catch sub_trapping_1(ImproperList, [])),
+    {'EXIT',_} = (catch sub_trapping_1(List, ImproperList)),
+
+    List = List -- lists:duplicate(N + (1 bsl N), gaffel),
+    ok = sub_trapping_1(List, []).
+
+sub_trapping_1([], _) -> ok;
+sub_trapping_1(L, R) -> sub_trapping_1(L -- R, [gurka | R]).
+
+sub_thresholds(N) ->
+    %% This needs to be long enough to cause trapping.
+    OtherLen = 1 bsl 18,
+    Other = lists:seq(0, OtherLen - 1),
+
+    Disjoint = lists:seq(-N, -1),
+    Subset = lists:seq(1, N),
+
+    %% LHS is disjoint from RHS, so all elements must be retained.
+    Disjoint = Disjoint -- Other,
+
+    %% LHS is covered by RHS, so all elements must be removed.
+    [] = Subset -- Other,
+
+    %% RHS is disjoint from LHS, so all elements must be retained.
+    Other = Other -- Disjoint,
+
+    %% RHS is covered by LHS, so N elements must be removed.
+    N = OtherLen - length(Other -- Subset),
+
+    ok.
+
 %% Test lists:droplast/1
 droplast(Config) when is_list(Config) ->
     [] = lists:droplast([x]),
diff --git a/lib/stdlib/vsn.mk b/lib/stdlib/vsn.mk
index 09a4d6fb50..34ba8c083d 100644
--- a/lib/stdlib/vsn.mk
+++ b/lib/stdlib/vsn.mk
@@ -1 +1 @@
-STDLIB_VSN = 3.4.5
+STDLIB_VSN = 3.4.5.1
diff --git a/make/otp_version_tickets b/make/otp_version_tickets
index 577b53efa7..2cba13f734 100644
--- a/make/otp_version_tickets
+++ b/make/otp_version_tickets
@@ -1 +1 @@
-OTP-15399
+OTP-15420
diff --git a/otp_versions.table b/otp_versions.table
index a66dbc71cf..be80671a41 100644
--- a/otp_versions.table
+++ b/otp_versions.table
@@ -1,3 +1,5 @@
+OTP-20.3.8.13 : ssl-8.2.6.4 # asn1-5.0.5.1 common_test-1.15.4 compiler-7.1.5.2 cosEvent-2.2.2 cosEventDomain-1.2.2 cosFileTransfer-1.2.2 cosNotification-1.2.3 cosProperty-1.2.3 cosTime-1.2.3 cosTransactions-1.3.3 crypto-4.2.2.2 debugger-4.2.4 dialyzer-3.2.4 diameter-2.1.4 edoc-0.9.2 eldap-1.2.3.1 erl_docgen-0.7.3 erl_interface-3.10.2.1 erts-9.3.3.6 et-1.6.1 eunit-2.3.5 hipe-3.17.1 ic-4.4.4.2 inets-6.5.2.4 jinterface-1.8.1 kernel-5.4.3.2 megaco-3.18.3 mnesia-4.15.3.2 observer-2.7 odbc-2.12.1 orber-3.8.4 os_mon-2.4.4 otp_mibs-1.1.2 parsetools-2.1.6 public_key-1.5.2 reltool-0.7.5 runtime_tools-1.12.5 sasl-3.1.2 snmp-5.2.11 ssh-4.6.9.1 stdlib-3.4.5.1 syntax_tools-2.1.4.1 tools-2.11.2 wx-1.8.3 xmerl-1.3.16 :
+OTP-20.3.8.12 : erts-9.3.3.6 ssl-8.2.6.3 stdlib-3.4.5.1 # asn1-5.0.5.1 common_test-1.15.4 compiler-7.1.5.2 cosEvent-2.2.2 cosEventDomain-1.2.2 cosFileTransfer-1.2.2 cosNotification-1.2.3 cosProperty-1.2.3 cosTime-1.2.3 cosTransactions-1.3.3 crypto-4.2.2.2 debugger-4.2.4 dialyzer-3.2.4 diameter-2.1.4 edoc-0.9.2 eldap-1.2.3.1 erl_docgen-0.7.3 erl_interface-3.10.2.1 et-1.6.1 eunit-2.3.5 hipe-3.17.1 ic-4.4.4.2 inets-6.5.2.4 jinterface-1.8.1 kernel-5.4.3.2 megaco-3.18.3 mnesia-4.15.3.2 observer-2.7 odbc-2.12.1 orber-3.8.4 os_mon-2.4.4 otp_mibs-1.1.2 parsetools-2.1.6 public_key-1.5.2 reltool-0.7.5 runtime_tools-1.12.5 sasl-3.1.2 snmp-5.2.11 ssh-4.6.9.1 syntax_tools-2.1.4.1 tools-2.11.2 wx-1.8.3 xmerl-1.3.16 :
 OTP-20.3.8.11 : erts-9.3.3.5 # asn1-5.0.5.1 common_test-1.15.4 compiler-7.1.5.2 cosEvent-2.2.2 cosEventDomain-1.2.2 cosFileTransfer-1.2.2 cosNotification-1.2.3 cosProperty-1.2.3 cosTime-1.2.3 cosTransactions-1.3.3 crypto-4.2.2.2 debugger-4.2.4 dialyzer-3.2.4 diameter-2.1.4 edoc-0.9.2 eldap-1.2.3.1 erl_docgen-0.7.3 erl_interface-3.10.2.1 et-1.6.1 eunit-2.3.5 hipe-3.17.1 ic-4.4.4.2 inets-6.5.2.4 jinterface-1.8.1 kernel-5.4.3.2 megaco-3.18.3 mnesia-4.15.3.2 observer-2.7 odbc-2.12.1 orber-3.8.4 os_mon-2.4.4 otp_mibs-1.1.2 parsetools-2.1.6 public_key-1.5.2 reltool-0.7.5 runtime_tools-1.12.5 sasl-3.1.2 snmp-5.2.11 ssh-4.6.9.1 ssl-8.2.6.2 stdlib-3.4.5 syntax_tools-2.1.4.1 tools-2.11.2 wx-1.8.3 xmerl-1.3.16 :
 OTP-20.3.8.10 : eldap-1.2.3.1 erts-9.3.3.4 # asn1-5.0.5.1 common_test-1.15.4 compiler-7.1.5.2 cosEvent-2.2.2 cosEventDomain-1.2.2 cosFileTransfer-1.2.2 cosNotification-1.2.3 cosProperty-1.2.3 cosTime-1.2.3 cosTransactions-1.3.3 crypto-4.2.2.2 debugger-4.2.4 dialyzer-3.2.4 diameter-2.1.4 edoc-0.9.2 erl_docgen-0.7.3 erl_interface-3.10.2.1 et-1.6.1 eunit-2.3.5 hipe-3.17.1 ic-4.4.4.2 inets-6.5.2.4 jinterface-1.8.1 kernel-5.4.3.2 megaco-3.18.3 mnesia-4.15.3.2 observer-2.7 odbc-2.12.1 orber-3.8.4 os_mon-2.4.4 otp_mibs-1.1.2 parsetools-2.1.6 public_key-1.5.2 reltool-0.7.5 runtime_tools-1.12.5 sasl-3.1.2 snmp-5.2.11 ssh-4.6.9.1 ssl-8.2.6.2 stdlib-3.4.5 syntax_tools-2.1.4.1 tools-2.11.2 wx-1.8.3 xmerl-1.3.16 :
 OTP-20.3.8.9 : compiler-7.1.5.2 # asn1-5.0.5.1 common_test-1.15.4 cosEvent-2.2.2 cosEventDomain-1.2.2 cosFileTransfer-1.2.2 cosNotification-1.2.3 cosProperty-1.2.3 cosTime-1.2.3 cosTransactions-1.3.3 crypto-4.2.2.2 debugger-4.2.4 dialyzer-3.2.4 diameter-2.1.4 edoc-0.9.2 eldap-1.2.3 erl_docgen-0.7.3 erl_interface-3.10.2.1 erts-9.3.3.3 et-1.6.1 eunit-2.3.5 hipe-3.17.1 ic-4.4.4.2 inets-6.5.2.4 jinterface-1.8.1 kernel-5.4.3.2 megaco-3.18.3 mnesia-4.15.3.2 observer-2.7 odbc-2.12.1 orber-3.8.4 os_mon-2.4.4 otp_mibs-1.1.2 parsetools-2.1.6 public_key-1.5.2 reltool-0.7.5 runtime_tools-1.12.5 sasl-3.1.2 snmp-5.2.11 ssh-4.6.9.1 ssl-8.2.6.2 stdlib-3.4.5 syntax_tools-2.1.4.1 tools-2.11.2 wx-1.8.3 xmerl-1.3.16 :
diff --git a/system/doc/efficiency_guide/commoncaveats.xml b/system/doc/efficiency_guide/commoncaveats.xml
index b41ffc3902..367da09ba3 100644
--- a/system/doc/efficiency_guide/commoncaveats.xml
+++ b/system/doc/efficiency_guide/commoncaveats.xml
@@ -169,53 +169,5 @@ multiple_setelement(T0) ->
         {Bin1,Bin2} = split_binary(Bin, Num)</code>
    </section>
 
-  <section>
-    <title>Operator "--"</title>
-     <p>The "<c>--</c>" operator has a complexity
-     proportional to the product of the length of its operands.
-     This means that the operator is very slow if both of its operands
-     are long lists:</p>
-
-        <p><em>DO NOT</em></p>
-        <code type="none"><![CDATA[
-        HugeList1 -- HugeList2]]></code>
-
-     <p>Instead use the <seealso marker="stdlib:ordsets">ordsets</seealso>
-     module in STDLIB:</p>
-
-        <p><em>DO</em></p>
-        <code type="none">
-        HugeSet1 = ordsets:from_list(HugeList1),
-        HugeSet2 = ordsets:from_list(HugeList2),
-        ordsets:subtract(HugeSet1, HugeSet2)</code>
-
-     <p>Obviously, that code does not work if the original order
-     of the list is important. If the order of the list must be
-     preserved, do as follows:</p>
-
-        <p><em>DO</em></p>
-        <code type="none"><![CDATA[
-        Set = gb_sets:from_list(HugeList2),
-        [E || E <- HugeList1, not gb_sets:is_element(E, Set)]]]></code>
-
-     <note><p>This code behaves differently from "<c>--</c>"
-     if the lists contain duplicate elements (one occurrence
-     of an element in HugeList2 removes <em>all</em>
-     occurrences in HugeList1.)</p>
-     <p>Also, this code compares lists elements using the
-     "<c>==</c>" operator, while "<c>--</c>" uses the "<c>=:=</c>" operator.
-     If that difference is important, <c>sets</c> can be used instead of
-     <c>gb_sets</c>, but <c>sets:from_list/1</c> is much
-     slower than <c>gb_sets:from_list/1</c> for long lists.</p></note>
-
-     <p>Using the "<c>--</c>" operator to delete an element
-     from a list is not a performance problem:</p>
-
-        <p><em>OK</em></p>
-        <code type="none">
-        HugeList1 -- [Element]</code>
-
-   </section>
-
 </chapter>
 
diff --git a/system/doc/efficiency_guide/retired_myths.xml b/system/doc/efficiency_guide/retired_myths.xml
index 9b914a3b6e..144c942c2b 100644
--- a/system/doc/efficiency_guide/retired_myths.xml
+++ b/system/doc/efficiency_guide/retired_myths.xml
@@ -60,4 +60,18 @@
     That leads us to the myth that tail-recursive functions are faster
     than body-recursive functions.</p>
   </section>
+
+  <section>
+    <title>Myth: List subtraction ("--" operator) is slow</title>
+
+    <p>List subtraction used to have a run-time complexity proportional to the
+      product of the length of its operands, so it was extremely slow when both
+      lists were long.</p>
+
+    <p>As of OTP 22 the run-time complexity is "n log n" and the operation will
+      complete quickly even when both lists are very long. In fact, it is
+      faster and uses less memory than the commonly used workaround to convert
+      both lists to ordered sets before subtracting them with
+      <c>ordsets:subtract/2</c>.</p>
+  </section>
 </chapter>