34 files changed, 3601 insertions, 278 deletions
diff --git a/erts/emulator/Makefile.in b/erts/emulator/Makefile.in
index 054692819e..57a9d45887 100644
--- a/erts/emulator/Makefile.in
+++ b/erts/emulator/Makefile.in
@@ -633,21 +633,24 @@ GENERATE += $(TTF_DIR)/driver_tab.c
 # This list must be consistent with PRE_LOADED_MODULES in
 # erts/preloaded/src/Makefile.
 
-PRELOAD_BEAM =		$(ERL_TOP)/erts/preloaded/ebin/otp_ring0.beam \
-			$(ERL_TOP)/erts/preloaded/ebin/erts_code_purger.beam \
-			$(ERL_TOP)/erts/preloaded/ebin/init.beam \
-			$(ERL_TOP)/erts/preloaded/ebin/prim_buffer.beam \
-			$(ERL_TOP)/erts/preloaded/ebin/prim_eval.beam \
-			$(ERL_TOP)/erts/preloaded/ebin/prim_inet.beam \
-			$(ERL_TOP)/erts/preloaded/ebin/prim_file.beam \
-			$(ERL_TOP)/erts/preloaded/ebin/zlib.beam \
-			$(ERL_TOP)/erts/preloaded/ebin/prim_zip.beam \
-			$(ERL_TOP)/erts/preloaded/ebin/erl_prim_loader.beam \
-			$(ERL_TOP)/erts/preloaded/ebin/erlang.beam \
-			$(ERL_TOP)/erts/preloaded/ebin/erts_internal.beam \
-			$(ERL_TOP)/erts/preloaded/ebin/erl_tracer.beam \
-			$(ERL_TOP)/erts/preloaded/ebin/erts_literal_area_collector.beam \
-			$(ERL_TOP)/erts/preloaded/ebin/erts_dirty_process_signal_handler.beam
+PRELOAD_BEAM =	$(ERL_TOP)/erts/preloaded/ebin/otp_ring0.beam \
+		$(ERL_TOP)/erts/preloaded/ebin/erts_code_purger.beam \
+		$(ERL_TOP)/erts/preloaded/ebin/init.beam \
+		$(ERL_TOP)/erts/preloaded/ebin/prim_buffer.beam \
+		$(ERL_TOP)/erts/preloaded/ebin/prim_eval.beam \
+		$(ERL_TOP)/erts/preloaded/ebin/prim_inet.beam \
+		$(ERL_TOP)/erts/preloaded/ebin/prim_file.beam \
+		$(ERL_TOP)/erts/preloaded/ebin/zlib.beam \
+		$(ERL_TOP)/erts/preloaded/ebin/prim_zip.beam \
+		$(ERL_TOP)/erts/preloaded/ebin/erl_prim_loader.beam \
+		$(ERL_TOP)/erts/preloaded/ebin/erlang.beam \
+		$(ERL_TOP)/erts/preloaded/ebin/erts_internal.beam \
+		$(ERL_TOP)/erts/preloaded/ebin/erl_tracer.beam \
+		$(ERL_TOP)/erts/preloaded/ebin/erts_literal_area_collector.beam \
+		$(ERL_TOP)/erts/preloaded/ebin/erts_dirty_process_signal_handler.beam \
+		$(ERL_TOP)/erts/preloaded/ebin/atomics.beam \
+		$(ERL_TOP)/erts/preloaded/ebin/counters.beam \
+		$(ERL_TOP)/erts/preloaded/ebin/persistent_term.beam
 
 ifeq ($(TARGET),win32)
 # On windows the preloaded objects are in a resource object.
@@ -839,6 +842,8 @@ RUN_OBJS += \
 	$(OBJDIR)/erl_bif_ddll.o  	$(OBJDIR)/erl_bif_guard.o \
 	$(OBJDIR)/erl_bif_info.o	$(OBJDIR)/erl_bif_op.o \
 	$(OBJDIR)/erl_bif_os.o		$(OBJDIR)/erl_bif_lists.o \
+	$(OBJDIR)/erl_bif_persistent.o \
+	$(OBJDIR)/erl_bif_atomics.o    	$(OBJDIR)/erl_bif_counters.o \
 	$(OBJDIR)/erl_bif_trace.o	$(OBJDIR)/erl_bif_unique.o \
 	$(OBJDIR)/erl_bif_wrap.o	$(OBJDIR)/erl_nfunc_sched.o \
 	$(OBJDIR)/erl_guard_bifs.o 	$(OBJDIR)/erl_dirty_bif_wrap.o \
diff --git a/erts/emulator/beam/atom.c b/erts/emulator/beam/atom.c
index 5381611fab..59b51fd15e 100644
--- a/erts/emulator/beam/atom.c
+++ b/erts/emulator/beam/atom.c
@@ -174,7 +174,7 @@ atom_alloc(Atom* tmpl)
 
     /*
      * Precompute ordinal value of first 3 bytes + 7 bits.
-     * This is used by utils.c:erts_cmp_atoms().
+     * This is used by erl_utils.h:erts_cmp_atoms().
      * We cannot use the full 32 bits of the first 4 bytes,
      * since we use the sign of the difference between two
      * ordinal values to represent their relative order.
diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names
index 45b7540aeb..a14f22b19e 100644
--- a/erts/emulator/beam/atom.names
+++ b/erts/emulator/beam/atom.names
@@ -182,6 +182,7 @@ atom control
 atom copy
 atom copy_literals
 atom counters
+atom count
 atom cpu
 atom cpu_timestamp
 atom cr
@@ -287,6 +288,7 @@ atom gc_minor_end
 atom gc_minor_start
 atom Ge='>='
 atom generational
+atom get_all_trap
 atom get_seq_token
 atom get_tcw
 atom gather_gc_info_result
@@ -325,6 +327,7 @@ atom index
 atom infinity
 atom info
 atom info_msg
+atom info_trap
 atom init
 atom initial_call
 atom input
@@ -393,6 +396,7 @@ atom microsecond
 atom microstate_accounting
 atom milli_seconds
 atom millisecond
+atom min
 atom min_heap_size
 atom min_bin_vheap_size
 atom minor
diff --git a/erts/emulator/beam/beam_bif_load.c b/erts/emulator/beam/beam_bif_load.c
index d221e6aea6..bb1b2e5b27 100644
--- a/erts/emulator/beam/beam_bif_load.c
+++ b/erts/emulator/beam/beam_bif_load.c
@@ -1752,29 +1752,7 @@ BIF_RETTYPE erts_internal_purge_module_2(BIF_ALIST_2)
 	finalize_purge_operation(BIF_P, ret == am_true);
 
 	if (literals) {
-	    ErtsLiteralAreaRef *ref;
-            ErtsMessage *mp;
-	    ref = erts_alloc(ERTS_ALC_T_LITERAL_REF,
-			     sizeof(ErtsLiteralAreaRef));
-	    ref->literal_area = literals;
-	    ref->next = NULL;
-	    erts_mtx_lock(&release_literal_areas.mtx);
-	    if (release_literal_areas.last) {
-		release_literal_areas.last->next = ref;
-		release_literal_areas.last = ref;
-	    }
-	    else {
-		release_literal_areas.first = ref;
-		release_literal_areas.last = ref;
-	    }
-	    erts_mtx_unlock(&release_literal_areas.mtx);
-            mp = erts_alloc_message(0, NULL);
-            ERL_MESSAGE_TOKEN(mp) = am_undefined;
-	    erts_queue_proc_message(BIF_P,
-                               erts_literal_area_collector,
-			       0,
-			       mp,
-			       am_copy_literals);
+            erts_queue_release_literals(BIF_P, literals);
 	}
 
 	return ret;
@@ -1786,6 +1764,41 @@ BIF_RETTYPE erts_internal_purge_module_2(BIF_ALIST_2)
     }
 }
 
+void
+erts_queue_release_literals(Process* c_p, ErtsLiteralArea* literals)
+{
+    ErtsLiteralAreaRef *ref;
+    ErtsMessage *mp;
+    ref = erts_alloc(ERTS_ALC_T_LITERAL_REF,
+                     sizeof(ErtsLiteralAreaRef));
+    ref->literal_area = literals;
+    ref->next = NULL;
+    erts_mtx_lock(&release_literal_areas.mtx);
+    if (release_literal_areas.last) {
+        release_literal_areas.last->next = ref;
+        release_literal_areas.last = ref;
+    } else {
+        release_literal_areas.first = ref;
+        release_literal_areas.last = ref;
+    }
+    erts_mtx_unlock(&release_literal_areas.mtx);
+    mp = erts_alloc_message(0, NULL);
+    ERL_MESSAGE_TOKEN(mp) = am_undefined;
+    if (c_p == NULL) {
+        erts_queue_message(erts_literal_area_collector,
+                           0,
+                           mp,
+                           am_copy_literals,
+                           am_system);
+    } else {
+        erts_queue_proc_message(c_p,
+                                erts_literal_area_collector,
+                                0,
+                                mp,
+                                am_copy_literals);
+    }
+}
+
 /*
  * Move code from current to old and null all export entries for the module
  */
diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c
index ab5920a67e..e909a0b4da 100644
--- a/erts/emulator/beam/beam_emu.c
+++ b/erts/emulator/beam/beam_emu.c
@@ -579,6 +579,7 @@ init_emulator(void)
  * the instructions' C labels to the loader.
  * The second call starts execution of BEAM code. This call never returns.
  */
+ERTS_NO_RETPOLINE
 void process_main(Eterm * x_reg_array, FloatDef* f_reg_array)
 {
     static int init_done = 0;
diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab
index 7548924178..d4ba90a61a 100644
--- a/erts/emulator/beam/bif.tab
+++ b/erts/emulator/beam/bif.tab
@@ -40,6 +40,7 @@
 # Note: Guards BIFs usually require special support in the compiler.
 #
 
+
 gcbif erlang:abs/1
 bif erlang:adler32/1
 bif erlang:adler32/2
@@ -698,3 +699,29 @@ ubif erlang:map_get/2
 ubif erlang:is_map_key/2
 bif ets:internal_delete_all/2
 bif ets:internal_select_delete/2
+
+#
+# New in 21.2
+#
+
+bif persistent_term:put/2
+bif persistent_term:get/1
+bif persistent_term:get/0
+bif persistent_term:erase/1
+bif persistent_term:info/0
+bif erts_internal:erase_persistent_terms/0
+
+bif erts_internal:atomics_new/2
+bif atomics:get/2
+bif atomics:put/3
+bif atomics:add/3
+bif atomics:add_get/3
+bif atomics:exchange/3
+bif atomics:compare_exchange/4
+bif atomics:info/1
+
+bif erts_internal:counters_new/1
+bif erts_internal:counters_get/2
+bif erts_internal:counters_add/3
+bif erts_internal:counters_put/3
+bif erts_internal:counters_info/1
diff --git a/erts/emulator/beam/copy.c b/erts/emulator/beam/copy.c
index e7bfd04b73..e7bd046e18 100644
--- a/erts/emulator/beam/copy.c
+++ b/erts/emulator/beam/copy.c
@@ -1074,6 +1074,7 @@ Uint copy_shared_calculate(Eterm obj, erts_shcopy_t *info)
     Eterm* ptr;
     Eterm *lit_purge_ptr = info->lit_purge_ptr;
     Uint lit_purge_sz = info->lit_purge_sz;
+    int copy_literals = info->copy_literals;
 #ifdef DEBUG
     Eterm mypid = erts_get_current_pid();
 #endif
@@ -1119,7 +1120,7 @@ Uint copy_shared_calculate(Eterm obj, erts_shcopy_t *info)
 	    /* off heap list pointers are copied verbatim */
 	    if (erts_is_literal(obj,ptr)) {
 		VERBOSE(DEBUG_SHCOPY, ("[pid=%T] bypassed copying %p is %T\n", mypid, ptr, obj));
-                if (in_literal_purge_area(ptr))
+                if (copy_literals || in_literal_purge_area(ptr))
                     info->literal_size += size_object(obj);
 		goto pop_next;
 	    }
@@ -1170,7 +1171,7 @@ Uint copy_shared_calculate(Eterm obj, erts_shcopy_t *info)
 	    /* off heap pointers to boxes are copied verbatim */
 	    if (erts_is_literal(obj,ptr)) {
 		VERBOSE(DEBUG_SHCOPY, ("[pid=%T] bypassed copying %p is %T\n", mypid, ptr, obj));
-                if (in_literal_purge_area(ptr))
+                if (copy_literals || in_literal_purge_area(ptr))
                     info->literal_size += size_object(obj);
 		goto pop_next;
 	    }
@@ -1338,6 +1339,7 @@ Uint copy_shared_perform(Eterm obj, Uint size, erts_shcopy_t *info,
     unsigned remaining;
     Eterm *lit_purge_ptr = info->lit_purge_ptr;
     Uint lit_purge_sz = info->lit_purge_sz;
+    int copy_literals = info->copy_literals;
 #ifdef DEBUG
     Eterm mypid = erts_get_current_pid();
     Eterm saved_obj = obj;
@@ -1387,7 +1389,7 @@ Uint copy_shared_perform(Eterm obj, Uint size, erts_shcopy_t *info,
 	    ptr = list_val(obj);
 	    /* off heap list pointers are copied verbatim */
 	    if (erts_is_literal(obj,ptr)) {
-                if (!in_literal_purge_area(ptr)) {
+                if (!(copy_literals || in_literal_purge_area(ptr))) {
                     *resp = obj;
                 } else {
                     Uint bsz = 0;
@@ -1455,7 +1457,7 @@ Uint copy_shared_perform(Eterm obj, Uint size, erts_shcopy_t *info,
 	    ptr = boxed_val(obj);
 	    /* off heap pointers to boxes are copied verbatim */
 	    if (erts_is_literal(obj,ptr)) {
-                if (!in_literal_purge_area(ptr)) {
+                if (!(copy_literals || in_literal_purge_area(ptr))) {
                     *resp = obj;
                 } else {
                     Uint bsz = 0;
diff --git a/erts/emulator/beam/erl_alloc.types b/erts/emulator/beam/erl_alloc.types
index 5409b89bab..4f03a34390 100644
--- a/erts/emulator/beam/erl_alloc.types
+++ b/erts/emulator/beam/erl_alloc.types
@@ -274,9 +274,13 @@ type	ML_YIELD_STATE	SHORT_LIVED	SYSTEM		monitor_link_yield_state
 type	ML_DIST		STANDARD	SYSTEM		monitor_link_dist
 type	PF3_ARGS	SHORT_LIVED	PROCESSES	process_flag_3_arguments
 type	SETUP_CONN_ARG	SHORT_LIVED	PROCESSES	setup_connection_argument
+type    LIST_TRAP       SHORT_LIVED     PROCESSES       list_bif_trap_state
 
 type	ENVIRONMENT	SYSTEM		SYSTEM		environment
 
+type	PERSISTENT_TERM	LONG_LIVED	CODE		persisten_term
+type	PERSISTENT_LOCK_Q SHORT_LIVED	SYSTEM		persistent_lock_q
+
 #
 # Types used for special emulators
 #
@@ -334,6 +338,8 @@ type	GC_INFO_REQ	SHORT_LIVED	SYSTEM		gc_info_request
 type	PORT_DATA_HEAP	STANDARD	SYSTEM		port_data_heap
 type    MSACC           DRIVER          SYSTEM          microstate_accounting
 type	SYS_CHECK_REQ	SHORT_LIVED	SYSTEM		system_check_request
+type	ATOMICS		STANDARD	SYSTEM		erl_bif_atomics
+type	COUNTERS	STANDARD	SYSTEM		erl_bif_counters
 
 #
 # Types used by system specific code
diff --git a/erts/emulator/beam/erl_bif_atomics.c b/erts/emulator/beam/erl_bif_atomics.c
new file mode 100644
index 0000000000..092dbb3bd3
--- /dev/null
+++ b/erts/emulator/beam/erl_bif_atomics.c
@@ -0,0 +1,256 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2018. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/*
+ * Purpose:  High performance atomics.
+ */
+
+#ifdef HAVE_CONFIG_H
+#  include "config.h"
+#endif
+
+#include <stddef.h> /* offsetof */
+
+#include "sys.h"
+#include "export.h"
+#include "bif.h"
+#include "erl_threads.h"
+#include "big.h"
+#include "erl_binary.h"
+#include "erl_bif_unique.h"
+#include "erl_map.h"
+
+typedef struct
+{
+    int is_signed;
+    UWord vlen;
+    erts_atomic64_t v[1];
+}AtomicsRef;
+
+static int atomics_destructor(Binary *unused)
+{
+    return 1;
+}
+
+#define OPT_SIGNED (1 << 0)
+
+BIF_RETTYPE erts_internal_atomics_new_2(BIF_ALIST_2)
+{
+    AtomicsRef* p;
+    Binary* mbin;
+    UWord i, cnt, opts;
+    Uint bytes;
+    Eterm* hp;
+
+    if (!term_to_UWord(BIF_ARG_1, &cnt)
+        || cnt == 0
+        || !term_to_UWord(BIF_ARG_2, &opts)) {
+
+        BIF_ERROR(BIF_P, BADARG);
+    }
+
+    if (cnt > (ERTS_UWORD_MAX / sizeof(p->v[0])))
+        BIF_ERROR(BIF_P, SYSTEM_LIMIT);
+
+    bytes = offsetof(AtomicsRef, v) + cnt*sizeof(p->v[0]);
+    mbin = erts_create_magic_binary_x(bytes,
+                                      atomics_destructor,
+                                      ERTS_ALC_T_ATOMICS,
+                                      0);
+    p = ERTS_MAGIC_BIN_DATA(mbin);
+    p->is_signed = opts & OPT_SIGNED;
+    p->vlen = cnt;
+    for (i=0; i < cnt; i++)
+        erts_atomic64_init_nob(&p->v[i], 0);
+    hp = HAlloc(BIF_P, ERTS_MAGIC_REF_THING_SIZE);
+    return erts_mk_magic_ref(&hp, &MSO(BIF_P), mbin);
+}
+
+static ERTS_INLINE int get_ref(Eterm ref, AtomicsRef** pp)
+{
+    Binary* mbin;
+    if (!is_internal_magic_ref(ref))
+        return 0;
+
+    mbin = erts_magic_ref2bin(ref);
+    if (ERTS_MAGIC_BIN_DESTRUCTOR(mbin) != atomics_destructor)
+        return 0;
+    *pp = ERTS_MAGIC_BIN_DATA(mbin);
+    return 1;
+}
+
+static ERTS_INLINE int get_ref_ix(Eterm ref, Eterm ix,
+                                  AtomicsRef** pp, UWord* ixp)
+{
+    return (get_ref(ref, pp)
+            && term_to_UWord(ix, ixp)
+            && --(*ixp) < (*pp)->vlen);
+}
+
+static ERTS_INLINE int get_value(AtomicsRef* p, Eterm term, erts_aint64_t *valp)
+{
+    return (p->is_signed ?
+            term_to_Sint64(term, (Sint64*)valp) :
+            term_to_Uint64(term, (Uint64*)valp));
+}
+
+static ERTS_INLINE int get_incr(AtomicsRef* p, Eterm term, erts_aint64_t *valp)
+{
+    return (term_to_Sint64(term, (Sint64*)valp)
+            || term_to_Uint64(term, (Uint64*)valp));
+}
+
+static ERTS_INLINE Eterm bld_atomic(Process* proc, AtomicsRef* p,
+                                    erts_aint64_t val)
+{
+    if (p->is_signed) {
+        if (IS_SSMALL(val))
+            return make_small((Sint) val);
+        else {
+            Uint hsz = ERTS_SINT64_HEAP_SIZE(val);
+            Eterm* hp = HAlloc(proc, hsz);
+            return erts_sint64_to_big(val, &hp);
+        }
+    }
+    else {
+        if ((Uint64)val <= MAX_SMALL)
+            return make_small((Sint) val);
+        else {
+            Uint hsz = ERTS_UINT64_HEAP_SIZE(val);
+            Eterm* hp = HAlloc(proc, hsz);
+            return erts_uint64_to_big(val, &hp);
+        }
+    }
+}
+
+BIF_RETTYPE atomics_put_3(BIF_ALIST_3)
+{
+    AtomicsRef* p;
+    UWord ix;
+    erts_aint64_t val;
+
+    if (!get_ref_ix(BIF_ARG_1, BIF_ARG_2, &p, &ix)
+        || !get_value(p, BIF_ARG_3, &val)) {
+        BIF_ERROR(BIF_P, BADARG);
+    }
+    erts_atomic64_set_mb(&p->v[ix], val);
+    return am_ok;
+}
+
+BIF_RETTYPE atomics_get_2(BIF_ALIST_2)
+{
+    AtomicsRef* p;
+    UWord ix;
+
+    if (!get_ref_ix(BIF_ARG_1, BIF_ARG_2, &p, &ix)) {
+        BIF_ERROR(BIF_P, BADARG);
+    }
+    return bld_atomic(BIF_P, p, erts_atomic64_read_mb(&p->v[ix]));
+}
+
+BIF_RETTYPE atomics_add_3(BIF_ALIST_3)
+{
+    AtomicsRef* p;
+    UWord ix;
+    erts_aint64_t incr;
+
+    if (!get_ref_ix(BIF_ARG_1, BIF_ARG_2, &p, &ix)
+        || !get_incr(p, BIF_ARG_3, &incr)) {
+        BIF_ERROR(BIF_P, BADARG);
+    }
+    erts_atomic64_add_mb(&p->v[ix], incr);
+    return am_ok;
+}
+
+BIF_RETTYPE atomics_add_get_3(BIF_ALIST_3)
+{
+    AtomicsRef* p;
+    UWord ix;
+    erts_aint64_t incr;
+
+    if (!get_ref_ix(BIF_ARG_1, BIF_ARG_2, &p, &ix)
+        || !get_incr(p, BIF_ARG_3, &incr)) {
+        BIF_ERROR(BIF_P, BADARG);
+    }
+    return bld_atomic(BIF_P, p, erts_atomic64_add_read_mb(&p->v[ix], incr));
+}
+
+BIF_RETTYPE atomics_exchange_3(BIF_ALIST_3)
+{
+    AtomicsRef* p;
+    UWord ix;
+    erts_aint64_t desired, was;
+
+    if (!get_ref_ix(BIF_ARG_1, BIF_ARG_2, &p, &ix)
+        || !get_value(p, BIF_ARG_3, &desired)) {
+        BIF_ERROR(BIF_P, BADARG);
+    }
+    was = erts_atomic64_xchg_mb(&p->v[ix], desired);
+    return bld_atomic(BIF_P, p, was);
+}
+
+BIF_RETTYPE atomics_compare_exchange_4(BIF_ALIST_4)
+{
+    AtomicsRef* p;
+    UWord ix;
+    erts_aint64_t expected, desired, was;
+
+    if (!get_ref_ix(BIF_ARG_1, BIF_ARG_2, &p, &ix)
+        || !get_value(p, BIF_ARG_3, &expected)
+        || !get_value(p, BIF_ARG_4, &desired)) {
+        BIF_ERROR(BIF_P, BADARG);
+    }
+    was = erts_atomic64_cmpxchg_mb(&p->v[ix], desired, expected);
+    return was == expected ? am_ok : bld_atomic(BIF_P, p, was);
+}
+
+BIF_RETTYPE atomics_info_1(BIF_ALIST_1)
+{
+    AtomicsRef* p;
+    Uint hsz = MAP4_SZ;
+    Eterm *hp;
+    Uint64 max;
+    Sint64 min;
+    UWord memory;
+    Eterm max_val, min_val, sz_val, mem_val;
+
+    if (!get_ref(BIF_ARG_1, &p))
+        BIF_ERROR(BIF_P, BADARG);
+
+    max = p->is_signed ? ERTS_SINT64_MAX : ERTS_UINT64_MAX;
+    min = p->is_signed ? ERTS_SINT64_MIN : 0;
+    memory = erts_magic_ref2bin(BIF_ARG_1)->orig_size;
+
+    erts_bld_uint64(NULL, &hsz, max);
+    erts_bld_sint64(NULL, &hsz, min);
+    erts_bld_uword(NULL, &hsz, p->vlen);
+    erts_bld_uword(NULL, &hsz, memory);
+
+    hp = HAlloc(BIF_P, hsz);
+    max_val = erts_bld_uint64(&hp, NULL, max);
+    min_val = erts_bld_sint64(&hp, NULL, min);
+    sz_val  = erts_bld_uword(&hp, NULL, p->vlen);
+    mem_val = erts_bld_uword(&hp, NULL, memory);
+
+    return MAP4(hp, am_max, max_val,
+                am_memory, mem_val,
+                am_min, min_val,
+                am_size, sz_val);
+}
diff --git a/erts/emulator/beam/erl_bif_counters.c b/erts/emulator/beam/erl_bif_counters.c
new file mode 100644
index 0000000000..7c8884ba32
--- /dev/null
+++ b/erts/emulator/beam/erl_bif_counters.c
@@ -0,0 +1,255 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2018. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/*
+ * Purpose:  The implementation for 'counters' with 'write_concurrency'.
+ */
+
+#ifdef HAVE_CONFIG_H
+#  include "config.h"
+#endif
+
+#include <stddef.h> /* offsetof */
+
+#include "sys.h"
+#include "export.h"
+#include "bif.h"
+#include "erl_threads.h"
+#include "big.h"
+#include "erl_binary.h"
+#include "erl_bif_unique.h"
+#include "erl_map.h"
+
+/*
+ * Each logical counter consists of one 64-bit atomic instance per scheduler
+ * plus one instance for the "base value".
+ *
+ * get() reads all atomics for the counter and returns the sum.
+ * add() reads and writes only its own scheduler specific atomic instance.
+ * put() reads all scheduler specific atomics and writes a new base value.
+ */
+#define ATOMICS_PER_COUNTER (erts_no_schedulers + 1)
+
+#define ATOMICS_PER_CACHE_LINE (ERTS_CACHE_LINE_SIZE / sizeof(erts_atomic64_t))
+
+typedef struct
+{
+    UWord arity;
+#ifdef DEBUG
+    UWord ulen;
+#endif
+    union {
+        erts_atomic64_t v[ATOMICS_PER_CACHE_LINE];
+        byte cache_line__[ERTS_CACHE_LINE_SIZE];
+    } u[1];
+}CountersRef;
+
+static int counters_destructor(Binary *mbin)
+{
+    return 1;
+}
+
+
+static UWord ERTS_INLINE div_ceil(UWord dividend, UWord divisor)
+{
+    return (dividend + divisor - 1) / divisor;
+}
+
+BIF_RETTYPE erts_internal_counters_new_1(BIF_ALIST_1)
+{
+    CountersRef* p;
+    Binary* mbin;
+    UWord ui, vi, cnt;
+    Uint bytes, cache_lines;
+    Eterm* hp;
+
+    if (!term_to_UWord(BIF_ARG_1, &cnt)
+        || cnt == 0) {
+        BIF_ERROR(BIF_P, BADARG);
+    }
+
+    if (cnt > (ERTS_UWORD_MAX / (sizeof(erts_atomic64_t)*2*ATOMICS_PER_COUNTER)))
+        BIF_ERROR(BIF_P, SYSTEM_LIMIT);
+
+    cache_lines = ATOMICS_PER_COUNTER * div_ceil(cnt, ATOMICS_PER_CACHE_LINE);
+    bytes = offsetof(CountersRef, u) + cache_lines * ERTS_CACHE_LINE_SIZE;
+    mbin = erts_create_magic_binary_x(bytes,
+                                      counters_destructor,
+                                      ERTS_ALC_T_ATOMICS,
+                                      0);
+    p = ERTS_MAGIC_BIN_DATA(mbin);
+    p->arity = cnt;
+
+#ifdef DEBUG
+    p->ulen = cache_lines;
+#endif
+    ASSERT((byte*)&p->u[cache_lines] <= ((byte*)p + bytes));
+    for (ui=0; ui < cache_lines; ui++)
+        for (vi=0; vi < ATOMICS_PER_CACHE_LINE; vi++)
+            erts_atomic64_init_nob(&p->u[ui].v[vi], 0);
+    hp = HAlloc(BIF_P, ERTS_MAGIC_REF_THING_SIZE);
+    return erts_mk_magic_ref(&hp, &MSO(BIF_P), mbin);
+}
+
+static ERTS_INLINE int get_ref(Eterm ref, CountersRef** pp)
+{
+    Binary* mbin;
+    if (!is_internal_magic_ref(ref))
+        return 0;
+
+    mbin = erts_magic_ref2bin(ref);
+    if (ERTS_MAGIC_BIN_DESTRUCTOR(mbin) != counters_destructor)
+        return 0;
+    *pp = ERTS_MAGIC_BIN_DATA(mbin);
+    return 1;
+}
+
+static ERTS_INLINE int get_ref_cnt(Eterm ref, Eterm index,
+                                   CountersRef** pp,
+                                   erts_atomic64_t** app,
+                                   UWord sched_ix)
+{
+    CountersRef* p;
+    UWord ix, ui, vi;
+    if (!get_ref(ref, &p) || !term_to_UWord(index, &ix) || --ix >= p->arity)
+        return 0;
+    ui = (ix / ATOMICS_PER_CACHE_LINE) * ATOMICS_PER_COUNTER + sched_ix;
+    vi = ix % ATOMICS_PER_CACHE_LINE;
+    ASSERT(ui < p->ulen);
+    *pp = p;
+    *app = &p->u[ui].v[vi];
+    return 1;
+}
+
+static ERTS_INLINE int get_ref_my_cnt(Eterm ref, Eterm index,
+                                      CountersRef** pp,
+                                      erts_atomic64_t** app)
+{
+    ErtsSchedulerData *esdp = erts_get_scheduler_data();
+    ASSERT(esdp && !ERTS_SCHEDULER_IS_DIRTY(esdp));
+    ASSERT(esdp->no > 0 && esdp->no < ATOMICS_PER_COUNTER);
+    return get_ref_cnt(ref, index, pp, app, esdp->no);
+}
+
+static ERTS_INLINE int get_ref_first_cnt(Eterm ref, Eterm index,
+                                         CountersRef** pp,
+                                         erts_atomic64_t** app)
+{
+    return get_ref_cnt(ref, index, pp, app, 0);
+}
+
+static ERTS_INLINE int get_incr(CountersRef* p, Eterm term, erts_aint64_t *valp)
+{
+    return (term_to_Sint64(term, (Sint64*)valp)
+            || term_to_Uint64(term, (Uint64*)valp));
+}
+
+static ERTS_INLINE Eterm bld_counter(Process* proc, CountersRef* p,
+                                     erts_aint64_t val)
+{
+    if (IS_SSMALL(val))
+        return make_small((Sint) val);
+    else {
+        Uint hsz = ERTS_SINT64_HEAP_SIZE(val);
+        Eterm* hp = HAlloc(proc, hsz);
+        return erts_sint64_to_big(val, &hp);
+    }
+}
+
+BIF_RETTYPE erts_internal_counters_get_2(BIF_ALIST_2)
+{
+    CountersRef* p;
+    erts_atomic64_t* ap;
+    erts_aint64_t acc = 0;
+    int j;
+
+    if (!get_ref_first_cnt(BIF_ARG_1, BIF_ARG_2, &p, &ap)) {
+        BIF_ERROR(BIF_P, BADARG);
+    }
+    for (j = ATOMICS_PER_COUNTER; j ; --j) {
+        acc += erts_atomic64_read_nob(ap);
+        ap = (erts_atomic64_t*) ((byte*)ap + ERTS_CACHE_LINE_SIZE);
+    }
+    return bld_counter(BIF_P, p, acc);
+}
+
+BIF_RETTYPE erts_internal_counters_add_3(BIF_ALIST_3)
+{
+    CountersRef* p;
+    erts_atomic64_t* ap;
+    erts_aint64_t incr, sum;
+
+    if (!get_ref_my_cnt(BIF_ARG_1, BIF_ARG_2, &p, &ap)
+        || !get_incr(p, BIF_ARG_3, &incr)) {
+        BIF_ERROR(BIF_P, BADARG);
+    }
+    sum = incr + erts_atomic64_read_nob(ap);
+    erts_atomic64_set_nob(ap, sum);
+    return am_ok;
+}
+
+BIF_RETTYPE erts_internal_counters_put_3(BIF_ALIST_3)
+{
+    CountersRef* p;
+    erts_atomic64_t* first_ap;
+    erts_atomic64_t* ap;
+    erts_aint64_t acc;
+    erts_aint64_t val;
+    int j;
+
+    if (!get_ref_first_cnt(BIF_ARG_1, BIF_ARG_2, &p, &first_ap)
+        || !term_to_Sint64(BIF_ARG_3, &val)) {
+        BIF_ERROR(BIF_P, BADARG);
+    }
+
+    ap = first_ap;
+    acc = 0;
+    j = ATOMICS_PER_COUNTER - 1;
+    do {
+        ap = (erts_atomic64_t*) ((byte*)ap + ERTS_CACHE_LINE_SIZE);
+        acc += erts_atomic64_read_nob(ap);
+    } while (--j);
+    erts_atomic64_set_nob(first_ap, val-acc);
+
+    return am_ok;
+}
+
+BIF_RETTYPE erts_internal_counters_info_1(BIF_ALIST_1)
+{
+    CountersRef* p;
+    Uint hsz = MAP2_SZ;
+    Eterm *hp;
+    UWord memory;
+    Eterm sz_val, mem_val;
+
+    if (!get_ref(BIF_ARG_1, &p))
+        BIF_ERROR(BIF_P, BADARG);
+
+    memory = erts_magic_ref2bin(BIF_ARG_1)->orig_size;
+    erts_bld_uword(NULL, &hsz, p->arity);
+    erts_bld_uword(NULL, &hsz, memory);
+
+    hp = HAlloc(BIF_P, hsz);
+    sz_val  = erts_bld_uword(&hp, NULL, p->arity);
+    mem_val = erts_bld_uword(&hp, NULL, memory);
+
+    return MAP2(hp, am_memory, mem_val,
+                am_size, sz_val);
+}
diff --git a/erts/emulator/beam/erl_bif_lists.c b/erts/emulator/beam/erl_bif_lists.c
index 395be67a90..a793b34852 100644
--- a/erts/emulator/beam/erl_bif_lists.c
+++ b/erts/emulator/beam/erl_bif_lists.c
@@ -29,12 +29,13 @@
 #include "sys.h"
 #include "erl_vm.h"
 #include "global.h"
-#include "erl_process.h"
-#include "error.h"
 #include "bif.h"
+#include "erl_binary.h"
+
 
 static Eterm keyfind(int Bif, Process* p, Eterm Key, Eterm Pos, Eterm List);
 
+
 static BIF_RETTYPE append(Process* p, Eterm A, Eterm B)
 {
     Eterm list;
@@ -146,103 +147,724 @@ BIF_RETTYPE append_2(BIF_ALIST_2)
     return append(BIF_P, BIF_ARG_1, BIF_ARG_2);
 }
 
-/*
- * erlang:'--'/2
- */
+/* erlang:'--'/2
+ *
+ * Subtracts a list from another (LHS -- RHS), removing the first occurrence of
+ * each element in LHS from RHS. There is no type coercion so the elements must
+ * match exactly.
+ *
+ * The BIF is broken into several stages that can all trap individually, and it
+ * chooses its algorithm based on input size. If either input is small it will
+ * use a linear scan tuned to which side it's on, and if both inputs are large
+ * enough it will convert RHS into a multiset to provide good asymptotic
+ * behavior. */
+
+#define SUBTRACT_LHS_THRESHOLD 16
+#define SUBTRACT_RHS_THRESHOLD 16
+
+typedef enum {
+    SUBTRACT_STAGE_START,
+    SUBTRACT_STAGE_LEN_LHS,
+
+    /* Naive linear scan that's efficient when
+     * LEN_LHS <= SUBTRACT_LHS_THRESHOLD. */
+    SUBTRACT_STAGE_NAIVE_LHS,
+
+    SUBTRACT_STAGE_LEN_RHS,
+
+    /* As SUBTRACT_STAGE_NAIVE_LHS but for RHS. */
+    SUBTRACT_STAGE_NAIVE_RHS,
+
+    /* Creates a multiset from RHS for faster lookups before sweeping through
+     * LHS. The set is implemented as a red-black tree and duplicate elements
+     * are handled by a counter on each node. */
+    SUBTRACT_STAGE_SET_BUILD,
+    SUBTRACT_STAGE_SET_FINISH
+} ErtsSubtractCtxStage;
+
+typedef struct subtract_node__ {
+    struct subtract_node__ *parent;
+    struct subtract_node__ *left;
+    struct subtract_node__ *right;
+    int is_red;
+
+    Eterm key;
+    Uint count;
+} subtract_tree_t;
+
+typedef struct {
+    ErtsSubtractCtxStage stage;
+
+    Eterm lhs_original;
+    Eterm rhs_original;
+
+    Uint lhs_remaining;
+    Uint rhs_remaining;
+
+    Eterm iterator;
+
+    Eterm *result_cdr;
+    Eterm result;
+
+    union {
+        Eterm lhs_elements[SUBTRACT_LHS_THRESHOLD];
+        Eterm rhs_elements[SUBTRACT_RHS_THRESHOLD];
+
+        struct {
+            subtract_tree_t *tree;
+
+            /* A memory area for the tree's nodes, saving us the need to have
+             * one allocation per node. */
+            subtract_tree_t *alloc_start;
+            subtract_tree_t *alloc;
+        } rhs_set;
+    } u;
+} ErtsSubtractContext;
+
+#define ERTS_RBT_PREFIX subtract
+#define ERTS_RBT_T subtract_tree_t
+#define ERTS_RBT_KEY_T Eterm
+#define ERTS_RBT_FLAGS_T int
+#define ERTS_RBT_INIT_EMPTY_TNODE(T) \
+    do { \
+        (T)->parent = NULL; \
+        (T)->left = NULL; \
+        (T)->right = NULL; \
+    } while(0)
+#define ERTS_RBT_IS_RED(T) ((T)->is_red)
+#define ERTS_RBT_SET_RED(T) ((T)->is_red = 1)
+#define ERTS_RBT_IS_BLACK(T) (!ERTS_RBT_IS_RED(T))
+#define ERTS_RBT_SET_BLACK(T) ((T)->is_red = 0)
+#define ERTS_RBT_GET_FLAGS(T) ((T)->is_red)
+#define ERTS_RBT_SET_FLAGS(T, F) ((T)->is_red = F)
+#define ERTS_RBT_GET_PARENT(T) ((T)->parent)
+#define ERTS_RBT_SET_PARENT(T, P) ((T)->parent = P)
+#define ERTS_RBT_GET_RIGHT(T) ((T)->right)
+#define ERTS_RBT_SET_RIGHT(T, R) ((T)->right = (R))
+#define ERTS_RBT_GET_LEFT(T) ((T)->left)
+#define ERTS_RBT_SET_LEFT(T, L) ((T)->left = (L))
+#define ERTS_RBT_GET_KEY(T) ((T)->key)
+#define ERTS_RBT_CMP_KEYS(KX, KY) CMP_TERM(KX, KY)
+#define ERTS_RBT_WANT_LOOKUP_INSERT
+#define ERTS_RBT_WANT_LOOKUP
+#define ERTS_RBT_WANT_DELETE
+#define ERTS_RBT_UNDEF
+
+#include "erl_rbtree.h"
+
+static int subtract_continue(Process *p, ErtsSubtractContext *context);
+
+static void subtract_ctx_dtor(ErtsSubtractContext *context) {
+    switch (context->stage) {
+        case SUBTRACT_STAGE_SET_BUILD:
+        case SUBTRACT_STAGE_SET_FINISH:
+            erts_free(ERTS_ALC_T_LIST_TRAP, context->u.rhs_set.alloc_start);
+            break;
+        default:
+            break;
+    }
+}
 
-#define SMALL_VEC_SIZE 10
-static Eterm subtract(Process* p, Eterm A, Eterm B)
-{
-    Eterm  list;
-    Eterm* hp;
-    Uint  need;
-    Eterm  res;
-    Eterm small_vec[SMALL_VEC_SIZE];	/* Preallocated memory for small lists */
-    Eterm* vec_p;
-    Eterm* vp;
-    Sint i;
-    Sint n;
-    Sint m;
-    
-    if ((n = erts_list_length(A)) < 0) {
-	BIF_ERROR(p, BADARG);
+static int subtract_ctx_bin_dtor(Binary *context_bin) {
+    ErtsSubtractContext *context = ERTS_MAGIC_BIN_DATA(context_bin);
+    subtract_ctx_dtor(context);
+    return 1;
+}
+
+static void subtract_ctx_move(ErtsSubtractContext *from,
+                              ErtsSubtractContext *to) {
+    int uses_result_cdr = 0;
+
+    to->stage = from->stage;
+
+    to->lhs_original = from->lhs_original;
+    to->rhs_original = from->rhs_original;
+
+    to->lhs_remaining = from->lhs_remaining;
+    to->rhs_remaining = from->rhs_remaining;
+
+    to->iterator = from->iterator;
+    to->result = from->result;
+
+    switch (to->stage) {
+        case SUBTRACT_STAGE_NAIVE_LHS:
+            sys_memcpy(to->u.lhs_elements,
+                       from->u.lhs_elements,
+                       sizeof(Eterm) * to->lhs_remaining);
+            break;
+        case SUBTRACT_STAGE_NAIVE_RHS:
+            sys_memcpy(to->u.rhs_elements,
+                       from->u.rhs_elements,
+                       sizeof(Eterm) * to->rhs_remaining);
+
+            uses_result_cdr = 1;
+            break;
+        case SUBTRACT_STAGE_SET_FINISH:
+            uses_result_cdr = 1;
+            /* FALL THROUGH */
+        case SUBTRACT_STAGE_SET_BUILD:
+            to->u.rhs_set.alloc_start = from->u.rhs_set.alloc_start;
+            to->u.rhs_set.alloc = from->u.rhs_set.alloc;
+            to->u.rhs_set.tree = from->u.rhs_set.tree;
+            break;
+        default:
+            break;
     }
-    if ((m = erts_list_length(B)) < 0) {
-	BIF_ERROR(p, BADARG);
+
+    if (uses_result_cdr) {
+        if (from->result_cdr == &from->result) {
+            to->result_cdr = &to->result;
+        } else {
+            to->result_cdr = from->result_cdr;
+        }
     }
-    
-    if (n == 0)
-	BIF_RET(NIL);
-    if (m == 0)
-	BIF_RET(A);
-    
-    /* allocate element vector */
-    if (n <= SMALL_VEC_SIZE)
-	vec_p = small_vec;
-    else
-	vec_p = (Eterm*) erts_alloc(ERTS_ALC_T_TMP, n * sizeof(Eterm));
-    
-    /* PUT ALL ELEMENTS IN VP */
-    vp = vec_p;
-    list = A;
-    i = n;
-    while(i--) {
-	Eterm* listp = list_val(list);
-	*vp++ = CAR(listp);
-	list = CDR(listp);
+}
+
+static Eterm subtract_create_trap_state(Process *p,
+                                        ErtsSubtractContext *context) {
+    Binary *state_bin;
+    Eterm *hp;
+
+    state_bin = erts_create_magic_binary(sizeof(ErtsSubtractContext),
+                                         subtract_ctx_bin_dtor);
+
+    subtract_ctx_move(context, ERTS_MAGIC_BIN_DATA(state_bin));
+
+    hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE);
+
+    return erts_mk_magic_ref(&hp, &MSO(p), state_bin);
+}
+
+static int subtract_enter_len_lhs(Process *p, ErtsSubtractContext *context) {
+    context->stage = SUBTRACT_STAGE_LEN_LHS;
+
+    context->iterator = context->lhs_original;
+    context->lhs_remaining = 0;
+
+    return subtract_continue(p, context);
+}
+
+static int subtract_enter_len_rhs(Process *p, ErtsSubtractContext *context) {
+    context->stage = SUBTRACT_STAGE_LEN_RHS;
+
+    context->iterator = context->rhs_original;
+    context->rhs_remaining = 0;
+
+    return subtract_continue(p, context);
+}
+
+static int subtract_get_length(Process *p, Eterm *iterator_p, Uint *count_p) {
+    static const Sint ELEMENTS_PER_RED = 32;
+
+    Sint budget, count;
+    Eterm iterator;
+
+    budget = ELEMENTS_PER_RED * ERTS_BIF_REDS_LEFT(p);
+    iterator = *iterator_p;
+
+#ifdef DEBUG
+    budget = budget / 10 + 1;
+#endif
+
+    for (count = 0; count < budget && is_list(iterator); count++) {
+        iterator = CDR(list_val(iterator));
     }
-    
-    /* UNMARK ALL DELETED CELLS */
-    list = B;
-    m = 0;  /* number of deleted elements */
-    while(is_list(list)) {
-	Eterm* listp = list_val(list);
-	Eterm  elem = CAR(listp);
-	i = n;
-	vp = vec_p;
-	while(i--) {
-	    if (is_value(*vp) && eq(*vp, elem)) {
-		*vp = THE_NON_VALUE;
-		m++;
-		break;
-	    }
-	    vp++;
-	}
-	list = CDR(listp);
+
+    if (!is_list(iterator) && !is_nil(iterator)) {
+        return -1;
     }
-    
-    if (m == n)      /* All deleted ? */
-	res = NIL;
-    else if (m == 0)  /* None deleted ? */
-	res = A;
-    else {			/* REBUILD LIST */
-	res = NIL;
-	need = 2*(n - m);
-	hp = HAlloc(p, need);
-	vp = vec_p + n - 1;
-	while(vp >= vec_p) {
-	    if (is_value(*vp)) {
-		res = CONS(hp, *vp, res);
-		hp += 2;
-	    }
-	    vp--;
-	}
+
+    BUMP_REDS(p, count / ELEMENTS_PER_RED);
+
+    *iterator_p = iterator;
+    *count_p += count;
+
+    if (is_nil(iterator)) {
+        return 1;
     }
-    if (vec_p != small_vec)
-	erts_free(ERTS_ALC_T_TMP, (void *) vec_p);
-    BIF_RET(res);
+
+    return 0;
 }
 
-BIF_RETTYPE ebif_minusminus_2(BIF_ALIST_2)
-{
-    return subtract(BIF_P, BIF_ARG_1, BIF_ARG_2);
+static int subtract_enter_naive_lhs(Process *p, ErtsSubtractContext *context) {
+    Eterm iterator;
+    int i = 0;
+
+    context->stage = SUBTRACT_STAGE_NAIVE_LHS;
+
+    context->iterator = context->rhs_original;
+    context->result = NIL;
+
+    iterator = context->lhs_original;
+
+    while (is_list(iterator)) {
+        const Eterm *cell = list_val(iterator);
+
+        ASSERT(i < SUBTRACT_LHS_THRESHOLD);
+
+        context->u.lhs_elements[i++] = CAR(cell);
+        iterator = CDR(cell);
+    }
+
+    ASSERT(i == context->lhs_remaining);
+
+    return subtract_continue(p, context);
 }
 
-BIF_RETTYPE subtract_2(BIF_ALIST_2)
-{
-    return subtract(BIF_P, BIF_ARG_1, BIF_ARG_2);
+static int subtract_naive_lhs(Process *p, ErtsSubtractContext *context) {
+    const Sint CHECKS_PER_RED = 16;
+    Sint checks, budget;
+
+    budget = CHECKS_PER_RED * ERTS_BIF_REDS_LEFT(p);
+    checks = 0;
+
+    while (checks < budget && is_list(context->iterator)) {
+        const Eterm *cell;
+        Eterm value, next;
+        int found_at;
+
+        cell = list_val(context->iterator);
+
+        value = CAR(cell);
+        next = CDR(cell);
+
+        for (found_at = 0; found_at < context->lhs_remaining; found_at++) {
+            if (EQ(value, context->u.lhs_elements[found_at])) {
+                /* We shift the array one step down as we have to preserve
+                 * order.
+                 *
+                 * Note that we can't exit early as that would suppress errors
+                 * in the right-hand side (this runs prior to determining the
+                 * length of RHS). */
+
+                context->lhs_remaining--;
+                sys_memmove(&context->u.lhs_elements[found_at],
+                            &context->u.lhs_elements[found_at + 1],
+                            (context->lhs_remaining - found_at) * sizeof(Eterm));
+                break;
+            }
+        }
+
+        checks += MAX(1, context->lhs_remaining);
+        context->iterator = next;
+    }
+
+    BUMP_REDS(p, MIN(checks, budget) / CHECKS_PER_RED);
+
+    if (is_list(context->iterator)) {
+        return 0;
+    } else if (!is_nil(context->iterator)) {
+        return -1;
+    }
+
+    if (context->lhs_remaining > 0) {
+        Eterm *hp;
+        int i;
+
+        hp = HAlloc(p, context->lhs_remaining * 2);
+
+        for (i = context->lhs_remaining - 1; i >= 0; i--) {
+            Eterm value = context->u.lhs_elements[i];
+
+            context->result = CONS(hp, value, context->result);
+            hp += 2;
+        }
+    }
+
+    ASSERT(context->lhs_remaining > 0 || context->result == NIL);
+
+    return 1;
+}
+
+static int subtract_enter_naive_rhs(Process *p, ErtsSubtractContext *context) {
+    Eterm iterator;
+    int i = 0;
+
+    context->stage = SUBTRACT_STAGE_NAIVE_RHS;
+
+    context->iterator = context->lhs_original;
+    context->result_cdr = &context->result;
+    context->result = NIL;
+
+    iterator = context->rhs_original;
+
+    while (is_list(iterator)) {
+        const Eterm *cell = list_val(iterator);
+
+        ASSERT(i < SUBTRACT_RHS_THRESHOLD);
+
+        context->u.rhs_elements[i++] = CAR(cell);
+        iterator = CDR(cell);
+    }
+
+    ASSERT(i == context->rhs_remaining);
+
+    return subtract_continue(p, context);
+}
+
+static int subtract_naive_rhs(Process *p, ErtsSubtractContext *context) {
+    const Sint CHECKS_PER_RED = 16;
+    Sint checks, budget;
+
+    budget = CHECKS_PER_RED * ERTS_BIF_REDS_LEFT(p);
+    checks = 0;
+
+#ifdef DEBUG
+    budget = budget / 10 + 1;
+#endif
+
+    while (checks < budget && is_list(context->iterator)) {
+        const Eterm *cell;
+        Eterm value, next;
+        int found_at;
+
+        cell = list_val(context->iterator);
+        value = CAR(cell);
+        next = CDR(cell);
+
+        for (found_at = context->rhs_remaining - 1; found_at >= 0; found_at--) {
+            if (EQ(value, context->u.rhs_elements[found_at])) {
+                break;
+            }
+        }
+
+        if (found_at < 0) {
+            /* Destructively add the value to the result. This is safe
+             * since the GC is disabled and the unfinished term is never
+             * leaked to the outside world. */
+            Eterm *hp = HAllocX(p, 2, context->lhs_remaining * 2);
+
+            *context->result_cdr = make_list(hp);
+            context->result_cdr = &CDR(hp);
+
+            CAR(hp) = value;
+        } else if (found_at >= 0) {
+            Eterm swap;
+
+            if (context->rhs_remaining-- == 1) {
+                /* We've run out of items to remove, so the rest of the
+                 * result will be equal to the remainder of the input. We know
+                 * that LHS is well-formed as any errors would've been reported
+                 * during length determination. */
+                *context->result_cdr = next;
+
+                BUMP_REDS(p, MIN(budget, checks) / CHECKS_PER_RED);
+
+                return 1;
+            }
+
+            swap = context->u.rhs_elements[context->rhs_remaining];
+            context->u.rhs_elements[found_at] = swap;
+        }
+
+        checks += context->rhs_remaining;
+        context->iterator = next;
+        context->lhs_remaining--;
+    }
+
+    /* The result only has to be terminated when returning it to the user, but
+     * we're doing it when trapping as well to prevent headaches when
+     * debugging. */
+    *context->result_cdr = NIL;
+
+    BUMP_REDS(p, MIN(budget, checks) / CHECKS_PER_RED);
+
+    if (is_list(context->iterator)) {
+        ASSERT(context->lhs_remaining > 0 && context->rhs_remaining > 0);
+        return 0;
+    }
+
+    return 1;
 }
 
+static int subtract_enter_set_build(Process *p, ErtsSubtractContext *context) {
+    context->stage = SUBTRACT_STAGE_SET_BUILD;
+
+    context->u.rhs_set.alloc_start =
+        erts_alloc(ERTS_ALC_T_LIST_TRAP,
+                   context->rhs_remaining * sizeof(subtract_tree_t));
+
+    context->u.rhs_set.alloc = context->u.rhs_set.alloc_start;
+    context->u.rhs_set.tree = NULL;
+
+    context->iterator = context->rhs_original;
+
+    return subtract_continue(p, context);
+}
+
+static int subtract_set_build(Process *p, ErtsSubtractContext *context) {
+    const static Sint INSERTIONS_PER_RED = 16;
+    Sint budget, insertions;
+
+    budget = INSERTIONS_PER_RED * ERTS_BIF_REDS_LEFT(p);
+    insertions = 0;
+
+#ifdef DEBUG
+    budget = budget / 10 + 1;
+#endif
+
+    while (insertions < budget && is_list(context->iterator)) {
+        subtract_tree_t *existing_node, *new_node;
+        const Eterm *cell;
+        Eterm value, next;
+
+        cell = list_val(context->iterator);
+        value = CAR(cell);
+        next = CDR(cell);
+
+        new_node = context->u.rhs_set.alloc;
+        new_node->key = value;
+        new_node->count = 1;
+
+        existing_node = subtract_rbt_lookup_insert(&context->u.rhs_set.tree,
+                                                   new_node);
+
+        if (existing_node != NULL) {
+            existing_node->count++;
+        } else {
+            context->u.rhs_set.alloc++;
+        }
+
+        context->iterator = next;
+        insertions++;
+    }
+
+    BUMP_REDS(p, insertions / INSERTIONS_PER_RED);
+
+    ASSERT(is_list(context->iterator) || is_nil(context->iterator));
+    ASSERT(context->u.rhs_set.tree != NULL);
+
+    return is_nil(context->iterator);
+}
+
+static int subtract_enter_set_finish(Process *p, ErtsSubtractContext *context) {
+    context->stage = SUBTRACT_STAGE_SET_FINISH;
+
+    context->result_cdr = &context->result;
+    context->result = NIL;
+
+    context->iterator = context->lhs_original;
+
+    return subtract_continue(p, context);
+}
+
+static int subtract_set_finish(Process *p, ErtsSubtractContext *context) {
+    const Sint CHECKS_PER_RED = 8;
+    Sint checks, budget;
+
+    budget = CHECKS_PER_RED * ERTS_BIF_REDS_LEFT(p);
+    checks = 0;
+
+#ifdef DEBUG
+    budget = budget / 10 + 1;
+#endif
+
+    while (checks < budget && is_list(context->iterator)) {
+        subtract_tree_t *node;
+        const Eterm *cell;
+        Eterm value, next;
+
+        cell = list_val(context->iterator);
+        value = CAR(cell);
+        next = CDR(cell);
+
+        ASSERT(context->rhs_remaining > 0);
+
+        node = subtract_rbt_lookup(context->u.rhs_set.tree, value);
+
+        if (node == NULL) {
+            Eterm *hp = HAllocX(p, 2, context->lhs_remaining * 2);
+
+            *context->result_cdr = make_list(hp);
+            context->result_cdr = &CDR(hp);
+
+            CAR(hp) = value;
+        } else {
+            if (context->rhs_remaining-- == 1) {
+                *context->result_cdr = next;
+
+                BUMP_REDS(p, checks / CHECKS_PER_RED);
+
+                return 1;
+            }
+
+            if (node->count-- == 1) {
+                subtract_rbt_delete(&context->u.rhs_set.tree, node);
+            }
+        }
+
+        context->iterator = next;
+        context->lhs_remaining--;
+        checks++;
+    }
+
+    *context->result_cdr = NIL;
+
+    BUMP_REDS(p, checks / CHECKS_PER_RED);
+
+    if (is_list(context->iterator)) {
+        ASSERT(context->lhs_remaining > 0 && context->rhs_remaining > 0);
+        return 0;
+    }
+
+    return 1;
+}
+
+static int subtract_continue(Process *p, ErtsSubtractContext *context) {
+    switch (context->stage) {
+        case SUBTRACT_STAGE_START: {
+            return subtract_enter_len_lhs(p, context);
+        }
+
+        case SUBTRACT_STAGE_LEN_LHS: {
+            int res = subtract_get_length(p,
+                                          &context->iterator,
+                                          &context->lhs_remaining);
+
+            if (res != 1) {
+                return res;
+            }
+
+            if (context->lhs_remaining <= SUBTRACT_LHS_THRESHOLD) {
+                return subtract_enter_naive_lhs(p, context);
+            }
+
+            return subtract_enter_len_rhs(p, context);
+        }
+
+        case SUBTRACT_STAGE_NAIVE_LHS: {
+            return subtract_naive_lhs(p, context);
+        }
+
+        case SUBTRACT_STAGE_LEN_RHS: {
+            int res = subtract_get_length(p,
+                                          &context->iterator,
+                                          &context->rhs_remaining);
+
+            if (res != 1) {
+                return res;
+            }
+
+            /* We've walked through both lists fully now so we no longer need
+             * to check for errors past this point. */
+
+            if (context->rhs_remaining <= SUBTRACT_RHS_THRESHOLD) {
+                return subtract_enter_naive_rhs(p, context);
+            }
+
+            return subtract_enter_set_build(p, context);
+        }
+
+        case SUBTRACT_STAGE_NAIVE_RHS: {
+            return subtract_naive_rhs(p, context);
+        }
+
+        case SUBTRACT_STAGE_SET_BUILD: {
+            int res = subtract_set_build(p, context);
+
+            if (res != 1) {
+                return res;
+            }
+
+            return subtract_enter_set_finish(p, context);
+        }
+
+        case SUBTRACT_STAGE_SET_FINISH: {
+            return subtract_set_finish(p, context);
+        }
+
+        default:
+            ERTS_ASSERT(!"unreachable");
+    }
+}
+
+static int subtract_start(Process *p, Eterm lhs, Eterm rhs,
+                          ErtsSubtractContext *context) {
+    context->stage = SUBTRACT_STAGE_START;
+
+    context->lhs_original = lhs;
+    context->rhs_original = rhs;
+
+    return subtract_continue(p, context);
+}
+
+/* erlang:'--'/2 */
+static Eterm subtract(Export *bif_entry, BIF_ALIST_2) {
+    Eterm lhs = BIF_ARG_1, rhs = BIF_ARG_2;
+
+    if ((is_list(lhs) || is_nil(lhs)) && (is_list(rhs) || is_nil(rhs))) {
+        /* We start with the context on the stack in the hopes that we won't
+         * have to trap. */
+        ErtsSubtractContext context;
+        int res;
+
+        res = subtract_start(BIF_P, lhs, rhs, &context);
+
+        if (res == 0) {
+            Eterm state_mref;
+
+            state_mref = subtract_create_trap_state(BIF_P, &context);
+            erts_set_gc_state(BIF_P, 0);
+
+            BIF_TRAP2(bif_entry, BIF_P, state_mref, NIL);
+        }
+
+        subtract_ctx_dtor(&context);
+
+        if (res < 0) {
+            BIF_ERROR(BIF_P, BADARG);
+        }
+
+        BIF_RET(context.result);
+    } else if (is_internal_magic_ref(lhs)) {
+        ErtsSubtractContext *context;
+        int (*dtor)(Binary*);
+        Binary *magic_bin;
+
+        int res;
+
+        magic_bin = erts_magic_ref2bin(lhs);
+        dtor = ERTS_MAGIC_BIN_DESTRUCTOR(magic_bin);
+
+        if (dtor != subtract_ctx_bin_dtor) {
+            BIF_ERROR(BIF_P, BADARG);
+        }
+
+        ASSERT(BIF_P->flags & F_DISABLE_GC);
+        ASSERT(rhs == NIL);
+
+        context = ERTS_MAGIC_BIN_DATA(magic_bin);
+        res = subtract_continue(BIF_P, context);
+
+        if (res == 0) {
+            BIF_TRAP2(bif_entry, BIF_P, lhs, NIL);
+        }
+
+        erts_set_gc_state(BIF_P, 1);
+
+        if (res < 0) {
+            ERTS_BIF_ERROR_TRAPPED2(BIF_P, BADARG, bif_entry,
+                                    context->lhs_original,
+                                    context->rhs_original);
+        }
+
+        BIF_RET(context->result);
+    }
+
+    ASSERT(!(BIF_P->flags & F_DISABLE_GC));
+
+    BIF_ERROR(BIF_P, BADARG);
+}
+
+BIF_RETTYPE ebif_minusminus_2(BIF_ALIST_2) {
+    return subtract(bif_export[BIF_ebif_minusminus_2], BIF_CALL_ARGS);
+}
+
+BIF_RETTYPE subtract_2(BIF_ALIST_2) {
+    return subtract(bif_export[BIF_subtract_2], BIF_CALL_ARGS);
+}
+
+
 BIF_RETTYPE lists_member_2(BIF_ALIST_2)
 {
     Eterm term;
@@ -283,7 +905,7 @@ static BIF_RETTYPE lists_reverse_alloc(Process *c_p,
 {
     static const Uint CELLS_PER_RED = 40;
 
-    Eterm *heap_top, *heap_end;
+    Eterm *alloc_top, *alloc_end;
     Uint cells_left, max_cells;
     Eterm list, tail;
     Eterm lookahead;
@@ -305,18 +927,18 @@ static BIF_RETTYPE lists_reverse_alloc(Process *c_p,
         BIF_ERROR(c_p, BADARG);
     }
 
-    heap_top = HAlloc(c_p, 2 * (max_cells - cells_left));
-    heap_end = heap_top + 2 * (max_cells - cells_left);
+    alloc_top = HAlloc(c_p, 2 * (max_cells - cells_left));
+    alloc_end = alloc_top + 2 * (max_cells - cells_left);
 
-    while (heap_top < heap_end) {
+    while (alloc_top < alloc_end) {
         Eterm *pair = list_val(list);
 
-        tail = CONS(heap_top, CAR(pair), tail);
+        tail = CONS(alloc_top, CAR(pair), tail);
         list = CDR(pair);
 
         ASSERT(is_list(list) || is_nil(list));
 
-        heap_top += 2;
+        alloc_top += 2;
     }
 
     if (is_nil(list)) {
@@ -333,7 +955,7 @@ static BIF_RETTYPE lists_reverse_onheap(Process *c_p,
 {
     static const Uint CELLS_PER_RED = 60;
 
-    Eterm *heap_top, *heap_end;
+    Eterm *alloc_start, *alloc_top, *alloc_end;
     Uint cells_left, max_cells;
     Eterm list, tail;
 
@@ -343,21 +965,27 @@ static BIF_RETTYPE lists_reverse_onheap(Process *c_p,
     cells_left = max_cells = CELLS_PER_RED * (1 + ERTS_BIF_REDS_LEFT(c_p));
 
     ASSERT(HEAP_LIMIT(c_p) >= HEAP_TOP(c_p) + 2);
-    heap_end = HEAP_LIMIT(c_p) - 2;
-    heap_top = HEAP_TOP(c_p);
+    alloc_start = HEAP_TOP(c_p);
+    alloc_end = HEAP_LIMIT(c_p) - 2;
+    alloc_top = alloc_start;
+
+    /* Don't process more cells than we have reductions for. */
+    alloc_end = MIN(alloc_top + (cells_left * 2), alloc_end);
 
-    while (heap_top < heap_end && is_list(list)) {
+    while (alloc_top < alloc_end && is_list(list)) {
         Eterm *pair = list_val(list);
 
-        tail = CONS(heap_top, CAR(pair), tail);
+        tail = CONS(alloc_top, CAR(pair), tail);
         list = CDR(pair);
 
-        heap_top += 2;
+        alloc_top += 2;
     }
 
-    cells_left -= (heap_top - heap_end) / 2;
+    cells_left -= (alloc_top - alloc_start) / 2;
+    HEAP_TOP(c_p) = alloc_top;
+
+    ASSERT(cells_left >= 0 && cells_left <= max_cells);
     BUMP_REDS(c_p, (max_cells - cells_left) / CELLS_PER_RED);
-    HEAP_TOP(c_p) = heap_top;
 
     if (is_nil(list)) {
         BIF_RET(tail);
diff --git a/erts/emulator/beam/erl_bif_persistent.c b/erts/emulator/beam/erl_bif_persistent.c
new file mode 100644
index 0000000000..9dca768a18
--- /dev/null
+++ b/erts/emulator/beam/erl_bif_persistent.c
@@ -0,0 +1,983 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2018. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/*
+ * Purpose: Implement persistent term storage.
+ */
+
+#ifdef HAVE_CONFIG_H
+#  include "config.h"
+#endif
+
+#include "sys.h"
+#include "erl_vm.h"
+#include "global.h"
+#include "erl_process.h"
+#include "error.h"
+#include "erl_driver.h"
+#include "bif.h"
+#include "erl_map.h"
+#include "erl_binary.h"
+
+/*
+ * The limit for the number of persistent terms before
+ * a warning is issued.
+ */
+
+#define WARNING_LIMIT 20000
+#define XSTR(s) STR(s)
+#define STR(s) #s
+
+/*
+ * Parameters for the hash table.
+ */
+#define INITIAL_SIZE 8
+#define LOAD_FACTOR ((Uint)50)
+#define MUST_GROW(t) (((Uint)100) * t->num_entries >= LOAD_FACTOR * t->allocated)
+#define MUST_SHRINK(t) (((Uint)200) * t->num_entries <= LOAD_FACTOR * t->allocated && \
+                        t->allocated > INITIAL_SIZE)
+
+typedef struct hash_table {
+    Uint allocated;
+    Uint num_entries;
+    Uint mask;
+    Uint first_to_delete;
+    Uint num_to_delete;
+    erts_atomic_t refc;
+    struct hash_table* delete_next;
+    ErtsThrPrgrLaterOp thr_prog_op;
+    Eterm term[1];
+} HashTable;
+
+typedef struct trap_data {
+    HashTable* table;
+    Uint idx;
+    Uint remaining;
+    Uint memory;    /* Used by info/0 to count used memory */
+} TrapData;
+
+/*
+ * Declarations of local functions.
+ */
+
+static HashTable* create_initial_table(void);
+static Uint lookup(HashTable* hash_table, Eterm key);
+static HashTable* copy_table(HashTable* old_table, Uint new_size, int rehash);
+static HashTable* tmp_table_copy(HashTable* old_table);
+static int try_seize_update_permission(Process* c_p);
+static void release_update_permission(int release_updater);
+static void table_updater(void* table);
+static void table_deleter(void* hash_table);
+static void dec_table_refc(Process* c_p, HashTable* old_table);
+static void delete_table(Process* c_p, HashTable* table);
+static void mark_for_deletion(HashTable* hash_table, Uint entry_index);
+static ErtsLiteralArea* term_to_area(Eterm tuple);
+static void suspend_updater(Process* c_p);
+static Eterm do_get_all(Process* c_p, TrapData* trap_data, Eterm res);
+static Eterm do_info(Process* c_p, TrapData* trap_data);
+static void append_to_delete_queue(HashTable* table);
+static HashTable* next_to_delete(void);
+static Eterm alloc_trap_data(Process* c_p);
+static int cleanup_trap_data(Binary *bp);
+
+/*
+ * Traps
+ */
+
+static Export persistent_term_get_all_export;
+static BIF_RETTYPE persistent_term_get_all_trap(BIF_ALIST_2);
+static Export persistent_term_info_export;
+static BIF_RETTYPE persistent_term_info_trap(BIF_ALIST_1);
+
+/*
+ * Pointer to the current hash table.
+ */
+
+static erts_atomic_t the_hash_table;
+
+/*
+ * Queue of processes waiting to update the hash table.
+ */
+
+struct update_queue_item {
+    Process *p;
+    struct update_queue_item* next;
+};
+
+static erts_mtx_t update_table_permission_mtx;
+static struct update_queue_item* update_queue = NULL;
+static Process* updater_process = NULL;
+
+/* Protected by update_table_permission_mtx */
+static ErtsThrPrgrLaterOp thr_prog_op;
+static int issued_warning = 0;
+
+/*
+ * Queue of hash tables to be deleted.
+ */
+
+static erts_mtx_t delete_queue_mtx;
+static HashTable* delete_queue_head = NULL;
+static HashTable** delete_queue_tail = &delete_queue_head;
+
+/*
+ * The following variables are only used during crash dumping. They
+ * are intialized by erts_init_persistent_dumping().
+ */
+
+ErtsLiteralArea** erts_persistent_areas;
+Uint erts_num_persistent_areas;
+
+void erts_init_bif_persistent_term(void)
+{
+    HashTable* hash_table;
+
+    /*
+     * Initialize the mutex protecting updates.
+     */
+
+    erts_mtx_init(&update_table_permission_mtx,
+                  "update_persistent_term_permission",
+                  NIL,
+                  ERTS_LOCK_FLAGS_PROPERTY_STATIC |
+                  ERTS_LOCK_FLAGS_CATEGORY_GENERIC);
+
+    /*
+     * Initialize delete queue.
+     */
+
+    erts_mtx_init(&delete_queue_mtx,
+                  "persistent_term_delete_permission",
+                  NIL,
+                  ERTS_LOCK_FLAGS_PROPERTY_STATIC |
+                  ERTS_LOCK_FLAGS_CATEGORY_GENERIC);
+
+    /*
+     * Allocate a small initial hash table.
+     */
+
+    hash_table = create_initial_table();
+    erts_atomic_init_nob(&the_hash_table, (erts_aint_t)hash_table);
+
+    /*
+     * Initialize export entry for traps
+     */
+
+    erts_init_trap_export(&persistent_term_get_all_export,
+			  am_persistent_term, am_get_all_trap, 2,
+			  &persistent_term_get_all_trap);
+    erts_init_trap_export(&persistent_term_info_export,
+			  am_persistent_term, am_info_trap, 1,
+			  &persistent_term_info_trap);
+}
+
+BIF_RETTYPE persistent_term_put_2(BIF_ALIST_2)
+{
+    Eterm key;
+    Eterm term;
+    Eterm heap[3];
+    Eterm tuple;
+    HashTable* hash_table;
+    Uint term_size;
+    Uint lit_area_size;
+    ErlOffHeap code_off_heap;
+    ErtsLiteralArea* literal_area;
+    erts_shcopy_t info;
+    Eterm* ptr;
+    Uint entry_index;
+
+    if (!try_seize_update_permission(BIF_P)) {
+	ERTS_BIF_YIELD2(bif_export[BIF_persistent_term_put_2],
+                        BIF_P, BIF_ARG_1, BIF_ARG_2);
+    }
+
+    hash_table = (HashTable *) erts_atomic_read_nob(&the_hash_table);
+
+    key = BIF_ARG_1;
+    term = BIF_ARG_2;
+
+    entry_index = lookup(hash_table, key);
+
+    heap[0] = make_arityval(2);
+    heap[1] = key;
+    heap[2] = term;
+    tuple = make_tuple(heap);
+
+    if (is_nil(hash_table->term[entry_index])) {
+        Uint size = hash_table->allocated;
+        if (MUST_GROW(hash_table)) {
+            size *= 2;
+        }
+        hash_table = copy_table(hash_table, size, 0);
+        entry_index = lookup(hash_table, key);
+        hash_table->num_entries++;
+    } else {
+        Eterm tuple = hash_table->term[entry_index];
+        Eterm old_term;
+
+        ASSERT(is_tuple_arity(tuple, 2));
+        old_term = boxed_val(tuple)[2];
+        if (EQ(term, old_term)) {
+            /* Same value. No need to update anything. */
+            release_update_permission(0);
+            BIF_RET(am_ok);
+        } else {
+            /* Mark the old term for deletion. */
+            mark_for_deletion(hash_table, entry_index);
+            hash_table = copy_table(hash_table, hash_table->allocated, 0);
+        }
+    }
+
+    /*
+     * Preserve internal sharing in the term by using the
+     * sharing-preserving functions. However, literals must
+     * be copied in case the module holding them are unloaded.
+     */
+    INITIALIZE_SHCOPY(info);
+    info.copy_literals = 1;
+    term_size = copy_shared_calculate(tuple, &info);
+    ERTS_INIT_OFF_HEAP(&code_off_heap);
+    lit_area_size = ERTS_LITERAL_AREA_ALLOC_SIZE(term_size);
+    literal_area = erts_alloc(ERTS_ALC_T_LITERAL, lit_area_size);
+    ptr = &literal_area->start[0];
+    literal_area->end = ptr + term_size;
+    tuple = copy_shared_perform(tuple, term_size, &info, &ptr, &code_off_heap);
+    ASSERT(tuple_val(tuple) == literal_area->start);
+    literal_area->off_heap = code_off_heap.first;
+    DESTROY_SHCOPY(info);
+    erts_set_literal_tag(&tuple, literal_area->start, term_size);
+    hash_table->term[entry_index] = tuple;
+
+    erts_schedule_thr_prgr_later_op(table_updater, hash_table, &thr_prog_op);
+    suspend_updater(BIF_P);
+
+    /*
+     * Issue a warning once if the warning limit has been exceeded.
+     */
+
+    if (hash_table->num_entries > WARNING_LIMIT && issued_warning == 0) {
+        static char w[] =
+            "More than " XSTR(WARNING_LIMIT) " persistent terms "
+            "have been created.\n"
+            "It is recommended to avoid creating an excessive number of\n"
+            "persistent terms, as creation and deletion of persistent terms\n"
+            "will be slower as the number of persistent terms increases.\n";
+        issued_warning = 1;
+	erts_send_warning_to_logger_str(BIF_P->group_leader, w);
+    }
+
+    ERTS_BIF_YIELD_RETURN(BIF_P, am_ok);
+}
+
+BIF_RETTYPE persistent_term_get_0(BIF_ALIST_0)
+{
+    HashTable* hash_table = (HashTable *) erts_atomic_read_nob(&the_hash_table);
+    TrapData* trap_data;
+    Eterm res = NIL;
+    Eterm magic_ref;
+    Binary* mbp;
+
+    magic_ref = alloc_trap_data(BIF_P);
+    mbp = erts_magic_ref2bin(magic_ref);
+    trap_data = ERTS_MAGIC_BIN_DATA(mbp);
+    trap_data->table = hash_table;
+    trap_data->idx = 0;
+    trap_data->remaining = hash_table->num_entries;
+    res = do_get_all(BIF_P, trap_data, res);
+    if (trap_data->remaining == 0) {
+        BUMP_REDS(BIF_P, hash_table->num_entries);
+        trap_data->table = NULL; /* Prevent refc decrement */
+        BIF_RET(res);
+    } else {
+        /*
+         * Increment the ref counter to prevent an update operation (by put/2
+         * or erase/1) to delete this hash table.
+         */
+        erts_atomic_inc_nob(&hash_table->refc);
+        BUMP_ALL_REDS(BIF_P);
+        BIF_TRAP2(&persistent_term_get_all_export, BIF_P, magic_ref, res);
+    }
+}
+
+BIF_RETTYPE persistent_term_get_1(BIF_ALIST_1)
+{
+    Eterm key = BIF_ARG_1;
+    HashTable* hash_table = (HashTable *) erts_atomic_read_nob(&the_hash_table);
+    Uint entry_index;
+    Eterm term;
+
+    entry_index = lookup(hash_table, key);
+    term = hash_table->term[entry_index];
+    if (is_boxed(term)) {
+        ASSERT(is_tuple_arity(term, 2));
+        BIF_RET(tuple_val(term)[2]);
+    }
+    BIF_ERROR(BIF_P, BADARG);
+}
+
+BIF_RETTYPE persistent_term_erase_1(BIF_ALIST_1)
+{
+    Eterm key = BIF_ARG_1;
+    HashTable* old_table;
+    HashTable* new_table;
+    Uint entry_index;
+    Eterm old_term;
+
+    if (!try_seize_update_permission(BIF_P)) {
+	ERTS_BIF_YIELD1(bif_export[BIF_persistent_term_erase_1],
+                        BIF_P, BIF_ARG_1);
+    }
+
+    old_table = (HashTable *) erts_atomic_read_nob(&the_hash_table);
+    entry_index = lookup(old_table, key);
+    old_term = old_table->term[entry_index];
+    if (is_boxed(old_term)) {
+        Uint new_size;
+        HashTable* tmp_table;
+
+        /*
+         * Since we don't use any delete markers, we must rehash
+         * the table when deleting terms to ensure that all terms
+         * can still be reached if there are hash collisions.
+         * We can't rehash in place and it would not be safe to modify
+         * the old table yet, so we will first need a new
+         * temporary table copy of the same size as the old one.
+         */
+
+        ASSERT(is_tuple_arity(old_term, 2));
+        tmp_table = tmp_table_copy(old_table);
+
+        /*
+         * Delete the term from the temporary table. Then copy the
+         * temporary table to a new table, rehashing the entries
+         * while copying.
+         */
+
+        tmp_table->term[entry_index] = NIL;
+        tmp_table->num_entries--;
+        new_size = tmp_table->allocated;
+        if (MUST_SHRINK(tmp_table)) {
+            new_size /= 2;
+        }
+        new_table = copy_table(tmp_table, new_size, 1);
+        erts_free(ERTS_ALC_T_TMP, tmp_table);
+
+        mark_for_deletion(old_table, entry_index);
+        erts_schedule_thr_prgr_later_op(table_updater, new_table, &thr_prog_op);
+        suspend_updater(BIF_P);
+        ERTS_BIF_YIELD_RETURN(BIF_P, am_true);
+    }
+
+    /*
+     * Key is not present. Nothing to do.
+     */
+
+    ASSERT(is_nil(old_term));
+    release_update_permission(0);
+    BIF_RET(am_false);
+}
+
+BIF_RETTYPE erts_internal_erase_persistent_terms_0(BIF_ALIST_0)
+{
+    HashTable* old_table;
+    HashTable* new_table;
+
+    if (!try_seize_update_permission(BIF_P)) {
+	ERTS_BIF_YIELD0(bif_export[BIF_erts_internal_erase_persistent_terms_0],
+                        BIF_P);
+    }
+    old_table = (HashTable *) erts_atomic_read_nob(&the_hash_table);
+    old_table->first_to_delete = 0;
+    old_table->num_to_delete = old_table->allocated;
+    new_table = create_initial_table();
+    erts_schedule_thr_prgr_later_op(table_updater, new_table, &thr_prog_op);
+    suspend_updater(BIF_P);
+    ERTS_BIF_YIELD_RETURN(BIF_P, am_true);
+}
+
+BIF_RETTYPE persistent_term_info_0(BIF_ALIST_0)
+{
+    HashTable* hash_table = (HashTable *) erts_atomic_read_nob(&the_hash_table);
+    TrapData* trap_data;
+    Eterm res = NIL;
+    Eterm magic_ref;
+    Binary* mbp;
+
+    magic_ref = alloc_trap_data(BIF_P);
+    mbp = erts_magic_ref2bin(magic_ref);
+    trap_data = ERTS_MAGIC_BIN_DATA(mbp);
+    trap_data->table = hash_table;
+    trap_data->idx = 0;
+    trap_data->remaining = hash_table->num_entries;
+    trap_data->memory = 0;
+    res = do_info(BIF_P, trap_data);
+    if (trap_data->remaining == 0) {
+        BUMP_REDS(BIF_P, hash_table->num_entries);
+        trap_data->table = NULL; /* Prevent refc decrement */
+        BIF_RET(res);
+    } else {
+        /*
+         * Increment the ref counter to prevent an update operation (by put/2
+         * or erase/1) to delete this hash table.
+         */
+        erts_atomic_inc_nob(&hash_table->refc);
+        BUMP_ALL_REDS(BIF_P);
+        BIF_TRAP2(&persistent_term_info_export, BIF_P, magic_ref, res);
+    }
+}
+
+Uint
+erts_persistent_term_count(void)
+{
+    HashTable* hash_table = (HashTable *) erts_atomic_read_nob(&the_hash_table);
+    return hash_table->num_entries;
+}
+
+void
+erts_init_persistent_dumping(void)
+{
+    HashTable* hash_table = (HashTable *) erts_atomic_read_nob(&the_hash_table);
+    ErtsLiteralArea** area_p;
+    Uint i;
+
+    /*
+     * Overwrite the array of Eterms in the current hash table
+     * with pointers to literal areas.
+     */
+
+    erts_persistent_areas = (ErtsLiteralArea **) hash_table->term;
+    erts_num_persistent_areas = hash_table->num_entries;
+    area_p = erts_persistent_areas;
+    for (i = 0; i < hash_table->allocated; i++) {
+        Eterm term = hash_table->term[i];
+
+        if (is_boxed(term)) {
+            *area_p++ = term_to_area(term);
+        }
+    }
+}
+
+/*
+ * Local functions.
+ */
+
+static HashTable*
+create_initial_table(void)
+{
+    HashTable* hash_table;
+    int i;
+
+    hash_table = (HashTable *) erts_alloc(ERTS_ALC_T_PERSISTENT_TERM,
+                                          sizeof(HashTable)+sizeof(Eterm) *
+                                          (INITIAL_SIZE-1));
+    hash_table->allocated = INITIAL_SIZE;
+    hash_table->num_entries = 0;
+    hash_table->mask = INITIAL_SIZE-1;
+    hash_table->first_to_delete = 0;
+    hash_table->num_to_delete = 0;
+    erts_atomic_init_nob(&hash_table->refc, (erts_aint_t)1);
+    for (i = 0; i < INITIAL_SIZE; i++) {
+        hash_table->term[i] = NIL;
+    }
+    return hash_table;
+}
+
+static BIF_RETTYPE
+persistent_term_get_all_trap(BIF_ALIST_2)
+{
+    TrapData* trap_data;
+    Eterm res = BIF_ARG_2;
+    Uint bump_reds;
+    Binary* mbp;
+
+    ASSERT(is_list(BIF_ARG_2));
+    mbp = erts_magic_ref2bin(BIF_ARG_1);
+    trap_data = ERTS_MAGIC_BIN_DATA(mbp);
+    bump_reds = trap_data->remaining;
+    res = do_get_all(BIF_P, trap_data, res);
+    ASSERT(is_list(res));
+    if (trap_data->remaining > 0) {
+        BUMP_ALL_REDS(BIF_P);
+        BIF_TRAP2(&persistent_term_get_all_export, BIF_P, BIF_ARG_1, res);
+    } else {
+        /*
+         * Decrement ref count (and possibly delete the hash table
+         * and associated literal area).
+         */
+        dec_table_refc(BIF_P, trap_data->table);
+        trap_data->table = NULL; /* Prevent refc decrement */
+        BUMP_REDS(BIF_P, bump_reds);
+        BIF_RET(res);
+    }
+}
+
+static Eterm
+do_get_all(Process* c_p, TrapData* trap_data, Eterm res)
+{
+    HashTable* hash_table;
+    Uint remaining;
+    Uint idx;
+    Uint max_iter;
+    Uint i;
+    Eterm* hp;
+    Uint heap_size;
+    struct copy_term {
+        Uint key_size;
+        Eterm* tuple_ptr;
+    } *copy_data;
+
+    hash_table = trap_data->table;
+    idx = trap_data->idx;
+#if defined(DEBUG) || defined(VALGRIND)
+    max_iter = 50;
+#else
+    max_iter = ERTS_BIF_REDS_LEFT(c_p);
+#endif
+    remaining = trap_data->remaining < max_iter ?
+        trap_data->remaining : max_iter;
+    trap_data->remaining -= remaining;
+
+    copy_data = (struct copy_term *) erts_alloc(ERTS_ALC_T_TMP,
+                                                remaining *
+                                                sizeof(struct copy_term));
+    i = 0;
+    heap_size = (2 + 3) * remaining;
+    while (remaining != 0) {
+        Eterm term = hash_table->term[idx];
+        if (is_tuple(term)) {
+            Uint key_size;
+            Eterm* tup_val;
+
+            ASSERT(is_tuple_arity(term, 2));
+            tup_val = tuple_val(term);
+            key_size = size_object(tup_val[1]);
+            copy_data[i].key_size = key_size;
+            copy_data[i].tuple_ptr = tup_val;
+            heap_size += key_size;
+            i++;
+            remaining--;
+        }
+        idx++;
+    }
+    trap_data->idx = idx;
+
+    hp = HAlloc(c_p, heap_size);
+    remaining = i;
+    for (i = 0; i < remaining; i++) {
+        Eterm* tuple_ptr;
+        Uint key_size;
+        Eterm key;
+        Eterm tup;
+
+        tuple_ptr = copy_data[i].tuple_ptr;
+        key_size = copy_data[i].key_size;
+        key = copy_struct(tuple_ptr[1], key_size, &hp, &c_p->off_heap);
+        tup = TUPLE2(hp, key, tuple_ptr[2]);
+        hp += 3;
+        res = CONS(hp, tup, res);
+        hp += 2;
+    }
+    erts_free(ERTS_ALC_T_TMP, copy_data);
+    return res;
+}
+
+static BIF_RETTYPE
+persistent_term_info_trap(BIF_ALIST_1)
+{
+    TrapData* trap_data = (TrapData *) BIF_ARG_1;
+    Eterm res;
+    Uint bump_reds;
+    Binary* mbp;
+
+    mbp = erts_magic_ref2bin(BIF_ARG_1);
+    trap_data = ERTS_MAGIC_BIN_DATA(mbp);
+    bump_reds = trap_data->remaining;
+    res = do_info(BIF_P, trap_data);
+    if (trap_data->remaining > 0) {
+        ASSERT(res == am_ok);
+        BUMP_ALL_REDS(BIF_P);
+        BIF_TRAP1(&persistent_term_info_export, BIF_P, BIF_ARG_1);
+    } else {
+        /*
+         * Decrement ref count (and possibly delete the hash table
+         * and associated literal area).
+         */
+        dec_table_refc(BIF_P, trap_data->table);
+        trap_data->table = NULL; /* Prevent refc decrement */
+        BUMP_REDS(BIF_P, bump_reds);
+        ASSERT(is_map(res));
+        BIF_RET(res);
+    }
+}
+
+#define DECL_AM(S) Eterm AM_ ## S = am_atom_put(#S, sizeof(#S) - 1)
+
+static Eterm
+do_info(Process* c_p, TrapData* trap_data)
+{
+    HashTable* hash_table;
+    Uint remaining;
+    Uint idx;
+    Uint max_iter;
+
+    hash_table = trap_data->table;
+    idx = trap_data->idx;
+#if defined(DEBUG) || defined(VALGRIND)
+    max_iter = 50;
+#else
+    max_iter = ERTS_BIF_REDS_LEFT(c_p);
+#endif
+    remaining = trap_data->remaining < max_iter ? trap_data->remaining : max_iter;
+    trap_data->remaining -= remaining;
+    while (remaining != 0) {
+        if (is_boxed(hash_table->term[idx])) {
+            ErtsLiteralArea* area;
+            area = term_to_area(hash_table->term[idx]);
+            trap_data->memory += sizeof(ErtsLiteralArea) +
+                sizeof(Eterm) * (area->end - area->start - 1);
+            remaining--;
+        }
+        idx++;
+    }
+    trap_data->idx = idx;
+    if (trap_data->remaining > 0) {
+        return am_ok;           /* Dummy return value */
+    } else {
+        Eterm* hp;
+        Eterm count_term;
+        Eterm memory_term;
+        Eterm res;
+        Uint memory;
+        Uint hsz = MAP_SZ(2);
+
+        memory = sizeof(HashTable) + (trap_data->table->allocated-1) *
+            sizeof(Eterm) + trap_data->memory;
+        (void) erts_bld_uint(NULL, &hsz, hash_table->num_entries);
+        (void) erts_bld_uint(NULL, &hsz, memory);
+        hp = HAlloc(c_p, hsz);
+	count_term = erts_bld_uint(&hp, NULL, hash_table->num_entries);
+	memory_term = erts_bld_uint(&hp, NULL, memory);
+        res = MAP2(hp, am_count, count_term, am_memory, memory_term);
+        return res;
+    }
+}
+
+#undef DECL_AM
+
+static Eterm
+alloc_trap_data(Process* c_p)
+{
+    Binary* mbp = erts_create_magic_binary(sizeof(TrapData),
+                                           cleanup_trap_data);
+    Eterm* hp;
+
+    hp = HAlloc(c_p, ERTS_MAGIC_REF_THING_SIZE);
+    return erts_mk_magic_ref(&hp, &MSO(c_p), mbp);
+}
+
+static int
+cleanup_trap_data(Binary *bp)
+{
+    TrapData* trap_data = ERTS_MAGIC_BIN_DATA(bp);
+
+    if (trap_data->table) {
+        /*
+         * The process has been killed and is now exiting.
+         * Decrement the reference counter for the table.
+         */
+        dec_table_refc(NULL, trap_data->table);
+    }
+    return 1;
+}
+
+static Uint
+lookup(HashTable* hash_table, Eterm key)
+{
+    Uint mask = hash_table->mask;
+    Eterm* table = hash_table->term;
+    Uint32 idx = make_internal_hash(key, 0);
+    Eterm term;
+
+    do {
+        idx++;
+        term = table[idx & mask];
+    } while (is_boxed(term) && !EQ(key, (tuple_val(term))[1]));
+    return idx & mask;
+}
+
+static HashTable*
+tmp_table_copy(HashTable* old_table)
+{
+    Uint size = old_table->allocated;
+    HashTable* tmp_table;
+    Uint i;
+
+    tmp_table = (HashTable *) erts_alloc(ERTS_ALC_T_TMP,
+                                         sizeof(HashTable) +
+                                         sizeof(Eterm) * (size-1));
+    *tmp_table = *old_table;
+    for (i = 0; i < size; i++) {
+        tmp_table->term[i] = old_table->term[i];
+    }
+    return tmp_table;
+}
+
+static HashTable*
+copy_table(HashTable* old_table, Uint new_size, int rehash)
+{
+    HashTable* new_table;
+    Uint old_size = old_table->allocated;
+    Uint i;
+
+    new_table = (HashTable *) erts_alloc(ERTS_ALC_T_PERSISTENT_TERM,
+                                         sizeof(HashTable) +
+                                         sizeof(Eterm) * (new_size-1));
+    if (old_table->allocated == new_size && !rehash) {
+        /*
+         * Same size and no key deleted. Make an exact copy of the table.
+         */
+        *new_table = *old_table;
+        for (i = 0; i < new_size; i++) {
+            new_table->term[i] = old_table->term[i];
+        }
+    } else {
+        /*
+         * The size of the table has changed or an element has been
+         * deleted. Must rehash, by inserting all old terms into the
+         * new (empty) table.
+         */
+        new_table->allocated = new_size;
+        new_table->num_entries = old_table->num_entries;
+        new_table->mask = new_size - 1;
+        for (i = 0; i < new_size; i++) {
+            new_table->term[i] = NIL;
+        }
+        for (i = 0; i < old_size; i++) {
+            if (is_tuple(old_table->term[i])) {
+                Eterm key = tuple_val(old_table->term[i])[1];
+                Uint entry_index = lookup(new_table, key);
+                ASSERT(is_nil(new_table->term[entry_index]));
+                new_table->term[entry_index] = old_table->term[i];
+            }
+        }
+    }
+    new_table->first_to_delete = 0;
+    new_table->num_to_delete = 0;
+    erts_atomic_init_nob(&new_table->refc, (erts_aint_t)1);
+    return new_table;
+}
+
+static void
+mark_for_deletion(HashTable* hash_table, Uint entry_index)
+{
+    hash_table->first_to_delete = entry_index;
+    hash_table->num_to_delete = 1;
+}
+
+static ErtsLiteralArea*
+term_to_area(Eterm tuple)
+{
+    ASSERT(is_tuple_arity(tuple, 2));
+    return (ErtsLiteralArea *) (((char *) tuple_val(tuple)) -
+                                offsetof(ErtsLiteralArea, start));
+}
+
+static void
+table_updater(void* data)
+{
+    HashTable* old_table;
+    HashTable* new_table;
+
+    old_table = (HashTable *) erts_atomic_read_nob(&the_hash_table);
+    new_table = (HashTable *) data;
+    ASSERT(new_table->num_to_delete == 0);
+    erts_atomic_set_nob(&the_hash_table, (erts_aint_t)new_table);
+    append_to_delete_queue(old_table);
+    erts_schedule_thr_prgr_later_op(table_deleter,
+                                    old_table,
+                                    &old_table->thr_prog_op);
+    release_update_permission(1);
+}
+
+static void
+table_deleter(void* data)
+{
+    HashTable* old_table = (HashTable *) data;
+
+    dec_table_refc(NULL, old_table);
+}
+
+static void
+dec_table_refc(Process* c_p, HashTable* old_table)
+{
+    erts_aint_t refc = erts_atomic_dec_read_nob(&old_table->refc);
+
+    if (refc == 0) {
+        HashTable* to_delete;
+
+        while ((to_delete = next_to_delete()) != NULL) {
+            delete_table(c_p, to_delete);
+        }
+    }
+}
+
+static void
+delete_table(Process* c_p, HashTable* table)
+{
+    Uint idx = table->first_to_delete;
+    Uint n = table->num_to_delete;
+
+    /*
+     * There are no longer any references to this hash table.
+     *
+     * Any literals pointed for deletion can be queued for
+     * deletion and the table itself can be deallocated.
+     */
+
+#ifdef DEBUG
+    if (n == 1) {
+        ASSERT(is_tuple_arity(table->term[idx], 2));
+    }
+#endif
+
+    while (n > 0) {
+        Eterm term = table->term[idx];
+
+        if (is_tuple_arity(term, 2)) {
+            if (is_immed(tuple_val(term)[2])) {
+                erts_release_literal_area(term_to_area(term));
+            } else {
+                erts_queue_release_literals(c_p, term_to_area(term));
+            }
+        }
+        idx++, n--;
+    }
+    erts_free(ERTS_ALC_T_PERSISTENT_TERM, table);
+}
+
+/*
+ * Caller *must* yield if this function returns 0.
+ */
+
+static int
+try_seize_update_permission(Process* c_p)
+{
+    int success;
+
+    ASSERT(!erts_thr_progress_is_blocking()); /* to avoid deadlock */
+    ASSERT(c_p != NULL);
+
+    erts_mtx_lock(&update_table_permission_mtx);
+    ASSERT(updater_process != c_p);
+    success = (updater_process == NULL);
+    if (success) {
+        updater_process = c_p;
+    } else {
+        struct update_queue_item* qitem;
+        qitem = erts_alloc(ERTS_ALC_T_PERSISTENT_LOCK_Q, sizeof(*qitem));
+        qitem->p = c_p;
+        erts_proc_inc_refc(c_p);
+        qitem->next = update_queue;
+        update_queue = qitem;
+        erts_suspend(c_p, ERTS_PROC_LOCK_MAIN, NULL);
+    }
+    erts_mtx_unlock(&update_table_permission_mtx);
+    return success;
+}
+
+static void
+release_update_permission(int release_updater)
+{
+    erts_mtx_lock(&update_table_permission_mtx);
+    ASSERT(updater_process != NULL);
+
+    if (release_updater) {
+        erts_proc_lock(updater_process, ERTS_PROC_LOCK_STATUS);
+        if (!ERTS_PROC_IS_EXITING(updater_process)) {
+            erts_resume(updater_process, ERTS_PROC_LOCK_STATUS);
+        }
+        erts_proc_unlock(updater_process, ERTS_PROC_LOCK_STATUS);
+    }
+    updater_process = NULL;
+
+    while (update_queue != NULL) { /* Unleash the entire herd */
+	struct update_queue_item* qitem = update_queue;
+	erts_proc_lock(qitem->p, ERTS_PROC_LOCK_STATUS);
+	if (!ERTS_PROC_IS_EXITING(qitem->p)) {
+	    erts_resume(qitem->p, ERTS_PROC_LOCK_STATUS);
+	}
+	erts_proc_unlock(qitem->p, ERTS_PROC_LOCK_STATUS);
+	update_queue = qitem->next;
+	erts_proc_dec_refc(qitem->p);
+	erts_free(ERTS_ALC_T_PERSISTENT_LOCK_Q, qitem);
+    }
+    erts_mtx_unlock(&update_table_permission_mtx);
+}
+
+static void
+suspend_updater(Process* c_p)
+{
+#ifdef DEBUG
+    ASSERT(c_p != NULL);
+    erts_mtx_lock(&update_table_permission_mtx);
+    ASSERT(updater_process == c_p);
+    erts_mtx_unlock(&update_table_permission_mtx);
+#endif
+    erts_suspend(c_p, ERTS_PROC_LOCK_MAIN, NULL);
+}
+
+static void
+append_to_delete_queue(HashTable* table)
+{
+    erts_mtx_lock(&delete_queue_mtx);
+    table->delete_next = NULL;
+    *delete_queue_tail = table;
+    delete_queue_tail = &table->delete_next;
+    erts_mtx_unlock(&delete_queue_mtx);
+}
+
+static HashTable*
+next_to_delete(void)
+{
+    HashTable* table;
+
+    erts_mtx_lock(&delete_queue_mtx);
+    table = delete_queue_head;
+    if (table) {
+        if (erts_atomic_read_nob(&table->refc)) {
+            /*
+             * This hash table is still referenced. Hash tables
+             * must be deleted in order, so we return a NULL
+             * pointer.
+             */
+            table = NULL;
+        } else {
+            /*
+             * Remove the first hash table from the queue.
+             */
+            delete_queue_head = table->delete_next;
+            if (delete_queue_head == NULL) {
+                delete_queue_tail = &delete_queue_head;
+            }
+        }
+    }
+    erts_mtx_unlock(&delete_queue_mtx);
+    return table;
+}
diff --git a/erts/emulator/beam/erl_bif_unique.h b/erts/emulator/beam/erl_bif_unique.h
index 40b70667c0..944788c67c 100644
--- a/erts/emulator/beam/erl_bif_unique.h
+++ b/erts/emulator/beam/erl_bif_unique.h
@@ -242,11 +242,11 @@ erts_internal_ref_number_cmp(Uint32 num1[ERTS_REF_NUMBERS],
 			     Uint32 num2[ERTS_REF_NUMBERS])
 {
     if (num1[2] != num2[2])
-	return (int) ((Sint64) num1[2] - (Sint64) num2[2]);
+	return num1[2] > num2[2] ? 1 : -1;
     if (num1[1] != num2[1])
-	return (int) ((Sint64) num1[1] - (Sint64) num2[1]);
+	return num1[1] > num2[1] ? 1 : -1;
     if (num1[0] != num2[0])
-	return (int) ((Sint64) num1[0] - (Sint64) num2[0]);
+	return num1[0] > num2[0] ? 1 : -1;
     return 0;
 }
 
diff --git a/erts/emulator/beam/erl_dirty_bif.tab b/erts/emulator/beam/erl_dirty_bif.tab
index 086275fbe5..20299ff604 100644
--- a/erts/emulator/beam/erl_dirty_bif.tab
+++ b/erts/emulator/beam/erl_dirty_bif.tab
@@ -59,8 +59,6 @@ dirty-cpu erts_debug:lcnt_clear/0
 
 dirty-cpu-test erlang:'++'/2
 dirty-cpu-test erlang:append/2
-dirty-cpu-test erlang:'--'/2
-dirty-cpu-test erlang:subtract/2
 dirty-cpu-test erlang:iolist_size/1
 dirty-cpu-test erlang:make_tuple/2
 dirty-cpu-test erlang:make_tuple/3
diff --git a/erts/emulator/beam/erl_gc.c b/erts/emulator/beam/erl_gc.c
index 47dd115c82..b4df418cd5 100644
--- a/erts/emulator/beam/erl_gc.c
+++ b/erts/emulator/beam/erl_gc.c
@@ -681,7 +681,7 @@ garbage_collect(Process* p, ErlHeapFragment *live_hf_end,
     ErtsMonotonicTime start_time;
     ErtsSchedulerData *esdp = erts_proc_sched_data(p);
     erts_aint32_t state;
-    ERTS_MSACC_PUSH_STATE_M();
+    ERTS_MSACC_PUSH_STATE();
 #ifdef USE_VM_PROBES
     DTRACE_CHARBUF(pidbuf, DTRACE_TERM_BUF_SIZE);
 #endif
@@ -711,7 +711,7 @@ garbage_collect(Process* p, ErlHeapFragment *live_hf_end,
     else if (p->live_hf_end != ERTS_INVALID_HFRAG_PTR)
 	live_hf_end = p->live_hf_end;
 
-    ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_GC);
+    ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_GC);
 
     erts_atomic32_read_bor_nob(&p->state, ERTS_PSFLG_GC);
     if (erts_system_monitor_long_gc != 0)
@@ -759,7 +759,7 @@ garbage_collect(Process* p, ErlHeapFragment *live_hf_end,
         gc_trace_end_tag = am_gc_minor_end;
     } else {
 do_major_collection:
-        ERTS_MSACC_SET_STATE_CACHED_M_X(ERTS_MSACC_STATE_GC_FULL);
+        ERTS_MSACC_SET_STATE_CACHED_X(ERTS_MSACC_STATE_GC_FULL);
         if (IS_TRACED_FL(p, F_TRACE_GC)) {
             trace_gc(p, am_gc_major_start, need, THE_NON_VALUE);
         }
@@ -770,7 +770,7 @@ do_major_collection:
             p->flags &= ~(F_DIRTY_MAJOR_GC|F_DIRTY_MINOR_GC);
         DTRACE2(gc_major_end, pidbuf, reclaimed_now);
         gc_trace_end_tag = am_gc_major_end;
-        ERTS_MSACC_SET_STATE_CACHED_M_X(ERTS_MSACC_STATE_GC);
+        ERTS_MSACC_SET_STATE_CACHED_X(ERTS_MSACC_STATE_GC);
     }
 
     reset_active_writer(p);
@@ -800,7 +800,7 @@ do_major_collection:
 
         /* We have to make sure that we have space for need on the heap */
         res = delay_garbage_collection(p, live_hf_end, need, fcalls);
-        ERTS_MSACC_POP_STATE_M();
+        ERTS_MSACC_POP_STATE();
         return res;
     }
 
@@ -843,7 +843,7 @@ do_major_collection:
     FLAGS(p) &= ~(F_FORCE_GC|F_HIBERNATED);
     p->live_hf_end = ERTS_INVALID_HFRAG_PTR;
 
-    ERTS_MSACC_POP_STATE_M();
+    ERTS_MSACC_POP_STATE();
 
 #ifdef CHECK_FOR_HOLES
     /*
diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c
index 57c6c10c7f..f687dcf335 100644
--- a/erts/emulator/beam/erl_init.c
+++ b/erts/emulator/beam/erl_init.c
@@ -354,6 +354,7 @@ erl_init(int ncpu,
     erts_init_bif();
     erts_init_bif_chksum();
     erts_init_bif_binary();
+    erts_init_bif_persistent_term();
     erts_init_bif_re();
     erts_init_unicode(); /* after RE to get access to PCRE unicode */
     erts_init_external();
diff --git a/erts/emulator/beam/erl_lock_check.c b/erts/emulator/beam/erl_lock_check.c
index 463ae898a3..1416c5f96c 100644
--- a/erts/emulator/beam/erl_lock_check.c
+++ b/erts/emulator/beam/erl_lock_check.c
@@ -97,6 +97,8 @@ static erts_lc_lock_order_t erts_lock_order[] = {
     {	"proc_btm",				"pid"			},
     {	"dist_entry",				"address"		},
     {	"dist_entry_links",			"address"		},
+    {   "update_persistent_term_permission",    NULL                    },
+    {   "persistent_term_delete_permission",    NULL                    },
     {   "code_write_permission",                NULL                    },
     {	"purge_state",		      		NULL			},
     {	"proc_status",				"pid"			},
diff --git a/erts/emulator/beam/erl_monitor_link.h b/erts/emulator/beam/erl_monitor_link.h
index 9ff8aa509a..ed7bf7d54a 100644
--- a/erts/emulator/beam/erl_monitor_link.h
+++ b/erts/emulator/beam/erl_monitor_link.h
@@ -1387,11 +1387,14 @@ ERTS_GLB_INLINE void
 erts_monitor_release(ErtsMonitor *mon)
 {
     ErtsMonitorData *mdp = erts_monitor_to_data(mon);
-    ERTS_ML_ASSERT(!(mon->flags & ERTS_ML_FLG_IN_TABLE));
     ERTS_ML_ASSERT(erts_atomic32_read_nob(&mdp->refc) > 0);
 
-    if (erts_atomic32_dec_read_nob(&mdp->refc) == 0)
+    if (erts_atomic32_dec_read_nob(&mdp->refc) == 0) {
+        ERTS_ML_ASSERT(!(mdp->origin.flags & ERTS_ML_FLG_IN_TABLE));
+        ERTS_ML_ASSERT(!(mdp->target.flags & ERTS_ML_FLG_IN_TABLE));
+
         erts_monitor_destroy__(mdp);
+    }
 }
 
 ERTS_GLB_INLINE void
@@ -1399,12 +1402,14 @@ erts_monitor_release_both(ErtsMonitorData *mdp)
 {
     ERTS_ML_ASSERT((mdp->origin.flags & ERTS_ML_FLGS_SAME)
                    == (mdp->target.flags & ERTS_ML_FLGS_SAME));
-    ERTS_ML_ASSERT(!(mdp->origin.flags & ERTS_ML_FLG_IN_TABLE));
-    ERTS_ML_ASSERT(!(mdp->target.flags & ERTS_ML_FLG_IN_TABLE));
     ERTS_ML_ASSERT(erts_atomic32_read_nob(&mdp->refc) >= 2);
 
-    if (erts_atomic32_add_read_nob(&mdp->refc, (erts_aint32_t) -2) == 0)
+    if (erts_atomic32_add_read_nob(&mdp->refc, (erts_aint32_t) -2) == 0) {
+        ERTS_ML_ASSERT(!(mdp->origin.flags & ERTS_ML_FLG_IN_TABLE));
+        ERTS_ML_ASSERT(!(mdp->target.flags & ERTS_ML_FLG_IN_TABLE));
+
         erts_monitor_destroy__(mdp);
+    }
 }
 
 ERTS_GLB_INLINE int
diff --git a/erts/emulator/beam/erl_node_tables.c b/erts/emulator/beam/erl_node_tables.c
index f4dc60941a..18ed782ae3 100644
--- a/erts/emulator/beam/erl_node_tables.c
+++ b/erts/emulator/beam/erl_node_tables.c
@@ -421,8 +421,25 @@ static void schedule_delete_dist_entry(DistEntry* dep)
      *
      * Note that timeouts do not guarantee thread progress.
      */
-    erts_schedule_thr_prgr_later_op(start_timer_delete_dist_entry,
-                                    dep, &dep->later_op);
+    ErtsSchedulerData *esdp = erts_get_scheduler_data();
+    if (esdp && !ERTS_SCHEDULER_IS_DIRTY(esdp)) {
+        erts_schedule_thr_prgr_later_op(start_timer_delete_dist_entry,
+                                        dep, &dep->later_op);
+    } else {
+        /*
+         * Since OTP 20, it's possible that destructor is executed on
+         *  a dirty scheduler. Aux work cannot be done on a dirty
+         *  scheduler, and scheduling any aux work on a dirty scheduler
+         *  makes the scheduler to loop infinitely.
+         * To avoid this, make a spot jump: schedule this function again
+         *  on a first normal scheduler. It is guaranteed to be always
+         *  online. Since it's a rare event, this shall not pose a big
+         *  utilisation hit.
+         */
+        erts_schedule_misc_aux_work(1,
+                                    (void (*)(void *))schedule_delete_dist_entry,
+                                    (void *) dep);
+    }
 }
 
 static void
diff --git a/erts/emulator/beam/erl_process_dump.c b/erts/emulator/beam/erl_process_dump.c
index 243db4c734..ac5054ea10 100644
--- a/erts/emulator/beam/erl_process_dump.c
+++ b/erts/emulator/beam/erl_process_dump.c
@@ -58,6 +58,7 @@ static void dump_externally(fmtfn_t to, void *to_arg, Eterm term);
 static void mark_literal(Eterm* ptr);
 static void init_literal_areas(void);
 static void dump_literals(fmtfn_t to, void *to_arg);
+static void dump_persistent_terms(fmtfn_t to, void *to_arg);
 static void dump_module_literals(fmtfn_t to, void *to_arg,
                                  ErtsLiteralArea* lit_area);
 
@@ -74,6 +75,7 @@ erts_deep_process_dump(fmtfn_t to, void *to_arg)
 
     all_binaries = NULL;
     init_literal_areas();
+    erts_init_persistent_dumping();
 
     for (i = 0; i < max; i++) {
 	Process *p = erts_pix2proc(i);
@@ -93,6 +95,7 @@ erts_deep_process_dump(fmtfn_t to, void *to_arg)
        }
     }
 
+    dump_persistent_terms(to, to_arg);
     dump_literals(to, to_arg);
     dump_binaries(to, to_arg, all_binaries);
 }
@@ -775,6 +778,9 @@ init_literal_areas(void)
     qsort(lit_areas, num_lit_areas, sizeof(ErtsLiteralArea *),
           compare_areas);
 
+    qsort(erts_persistent_areas, erts_num_persistent_areas,
+          sizeof(ErtsLiteralArea *), compare_areas);
+
     erts_runlock_old_code(code_ix);
 }
 
@@ -796,6 +802,13 @@ static void mark_literal(Eterm* ptr)
 
     ap = bsearch(ptr, lit_areas, num_lit_areas, sizeof(ErtsLiteralArea*),
                  search_areas);
+    if (ap == 0) {
+        ap = bsearch(ptr, erts_persistent_areas,
+                     erts_num_persistent_areas,
+                     sizeof(ErtsLiteralArea*),
+                     search_areas);
+    }
+
 
     /*
      * If the literal was created by native code, this search will not
@@ -807,12 +820,12 @@ static void mark_literal(Eterm* ptr)
     }
 }
 
-
 static void
 dump_literals(fmtfn_t to, void *to_arg)
 {
     ErtsCodeIndex code_ix;
     int i;
+    Uint idx;
 
     code_ix = erts_active_code_ix();
     erts_rlock_old_code(code_ix);
@@ -825,6 +838,28 @@ dump_literals(fmtfn_t to, void *to_arg)
     }
 
     erts_runlock_old_code(code_ix);
+
+    for (idx = 0; idx < erts_num_persistent_areas; idx++) {
+        dump_module_literals(to, to_arg, erts_persistent_areas[idx]);
+    }
+}
+
+static void
+dump_persistent_terms(fmtfn_t to, void *to_arg)
+{
+    Uint idx;
+
+    erts_print(to, to_arg, "=persistent_terms\n");
+
+    for (idx = 0; idx < erts_num_persistent_areas; idx++) {
+        ErtsLiteralArea* ap = erts_persistent_areas[idx];
+        Eterm tuple = make_tuple(ap->start);
+        Eterm* tup_val = tuple_val(tuple);
+
+	dump_element(to, to_arg, tup_val[1]);
+        erts_putc(to, to_arg, '|');
+	dump_element_nl(to, to_arg, tup_val[2]);
+    }
 }
 
 static void
@@ -963,7 +998,8 @@ dump_module_literals(fmtfn_t to, void *to_arg, ErtsLiteralArea* lit_area)
                     }
                     erts_putc(to, to_arg, '\n');
                 }
-            } else if (is_export_header(w)) {
+            } else {
+                /* Dump everything else in the external format */
                 dump_externally(to, to_arg, term);
                 erts_putc(to, to_arg, '\n');
             }
diff --git a/erts/emulator/beam/erl_utils.h b/erts/emulator/beam/erl_utils.h
index b3bfa69052..880febba8b 100644
--- a/erts/emulator/beam/erl_utils.h
+++ b/erts/emulator/beam/erl_utils.h
@@ -22,6 +22,7 @@
 #define ERL_UTILS_H__
 
 #include "sys.h"
+#include "atom.h"
 #include "erl_printf.h"
 
 struct process;
@@ -112,10 +113,12 @@ int eq(Eterm, Eterm);
 
 #define EQ(x,y) (((x) == (y)) || (is_not_both_immed((x),(y)) && eq((x),(y))))
 
-int erts_cmp_atoms(Eterm a, Eterm b);
-Sint erts_cmp(Eterm, Eterm, int, int);
-Sint erts_cmp_compound(Eterm, Eterm, int, int);
+ERTS_GLB_INLINE Sint erts_cmp(Eterm, Eterm, int, int);
+ERTS_GLB_INLINE int erts_cmp_atoms(Eterm a, Eterm b);
+
 Sint cmp(Eterm a, Eterm b);
+Sint erts_cmp_compound(Eterm, Eterm, int, int);
+
 #define CMP(A,B)                         erts_cmp(A,B,0,0)
 #define CMP_TERM(A,B)                    erts_cmp(A,B,1,0)
 #define CMP_EQ_ONLY(A,B)                 erts_cmp(A,B,0,1)
@@ -150,4 +153,56 @@ Sint cmp(Eterm a, Eterm b);
 	if (erts_cmp_compound(X,Y,0,EqOnly) Op 0) { Action; };	\
     }
 
+#define erts_float_comp(x,y) (((x)<(y)) ? -1 : (((x)==(y)) ? 0 : 1))
+
+#if ERTS_GLB_INLINE_INCL_FUNC_DEF
+
+ERTS_GLB_INLINE int erts_cmp_atoms(Eterm a, Eterm b) {
+    Atom *aa = atom_tab(atom_val(a));
+    Atom *bb = atom_tab(atom_val(b));
+
+    byte *name_a, *name_b;
+    int len_a, len_b, diff;
+
+    diff = aa->ord0 - bb->ord0;
+
+    if (diff != 0) {
+        return diff;
+    }
+
+    name_a = &aa->name[3];
+    name_b = &bb->name[3];
+    len_a = aa->len-3;
+    len_b = bb->len-3;
+
+    if (len_a > 0 && len_b > 0) {
+        diff = sys_memcmp(name_a, name_b, MIN(len_a, len_b));
+
+        if (diff != 0) {
+            return diff;
+        }
+    }
+
+    return len_a - len_b;
+}
+
+ERTS_GLB_INLINE Sint erts_cmp(Eterm a, Eterm b, int exact, int eq_only) {
+    if (is_atom(a) && is_atom(b)) {
+        return erts_cmp_atoms(a, b);
+    } else if (is_both_small(a, b)) {
+        return (signed_val(a) - signed_val(b));
+    } else if (is_float(a) && is_float(b)) {
+        FloatDef af, bf;
+
+        GET_DOUBLE(a, af);
+        GET_DOUBLE(b, bf);
+
+        return erts_float_comp(af.fd, bf.fd);
+    }
+
+    return erts_cmp_compound(a,b,exact,eq_only);
+}
+
+#endif /* ERTS_GLB_INLINE_INCL_FUNC_DEF */
+
 #endif
diff --git a/erts/emulator/beam/external.c b/erts/emulator/beam/external.c
index 621ba108ba..9a66e491f3 100644
--- a/erts/emulator/beam/external.c
+++ b/erts/emulator/beam/external.c
@@ -1953,7 +1953,8 @@ static Eterm erts_term_to_binary_int(Process* p, Eterm Term, int level, Uint fla
 
 #define RETURN_STATE()							\
     do {								\
-	hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE+3);                    \
+	static const int TUPLE2_SIZE = 2 + 1;				\
+	hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE + TUPLE2_SIZE);        \
 	c_term = erts_mk_magic_ref(&hp, &MSO(p), context_b);            \
 	res = TUPLE2(hp, Term, c_term);					\
 	BUMP_ALL_REDS(p);                                               \
diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h
index 21ae205237..0631404599 100644
--- a/erts/emulator/beam/global.h
+++ b/erts/emulator/beam/global.h
@@ -906,6 +906,8 @@ typedef struct ErtsLiteralArea_ {
     Eterm start[1]; /* beginning of area */
 } ErtsLiteralArea;
 
+void erts_queue_release_literals(Process *c_p, ErtsLiteralArea* literals);
+
 #define ERTS_LITERAL_AREA_ALLOC_SIZE(N) \
     (sizeof(ErtsLiteralArea) + sizeof(Eterm)*((N) - 1))
 
@@ -1001,6 +1003,7 @@ typedef struct {
     Uint literal_size;
     Eterm *lit_purge_ptr;
     Uint lit_purge_sz;
+    int copy_literals;
 } erts_shcopy_t;
 
 #define INITIALIZE_SHCOPY(info)						\
@@ -1010,6 +1013,7 @@ typedef struct {
 	info.bitstore_start = info.bitstore_default;			\
 	info.shtable_start = info.shtable_default;			\
 	info.literal_size = 0;						\
+	info.copy_literals = 0;						\
 	if (larea__) {							\
 	    info.lit_purge_ptr = &larea__->start[0];			\
 	    info.lit_purge_sz = larea__->end - info.lit_purge_ptr;	\
@@ -1238,6 +1242,13 @@ Sint erts_re_set_loop_limit(Sint limit);
 void erts_init_bif_binary(void);
 Sint erts_binary_set_loop_limit(Sint limit);
 
+/* erl_bif_persistent.c */
+void erts_init_bif_persistent_term(void);
+Uint erts_persistent_term_count(void);
+void erts_init_persistent_dumping(void);
+extern ErtsLiteralArea** erts_persistent_areas;
+extern Uint erts_num_persistent_areas;
+
 /* external.c */
 void erts_init_external(void);
 
diff --git a/erts/emulator/beam/sys.h b/erts/emulator/beam/sys.h
index bb22548587..869a575cb4 100644
--- a/erts/emulator/beam/sys.h
+++ b/erts/emulator/beam/sys.h
@@ -325,6 +325,7 @@ typedef long          Sint  erts_align_attribute(sizeof(long));
 #define UWORD_CONSTANT(Const) Const##UL
 #define ERTS_UWORD_MAX ULONG_MAX
 #define ERTS_SWORD_MAX LONG_MAX
+#define ERTS_SWORD_MIN LONG_MIN
 #define ERTS_SIZEOF_ETERM SIZEOF_LONG
 #define ErtsStrToSint strtol
 #elif SIZEOF_VOID_P == SIZEOF_INT
@@ -335,6 +336,7 @@ typedef int          Sint  erts_align_attribute(sizeof(int));
 #define UWORD_CONSTANT(Const) Const##U
 #define ERTS_UWORD_MAX UINT_MAX
 #define ERTS_SWORD_MAX INT_MAX
+#define ERTS_SWORD_MIN INT_MIN
 #define ERTS_SIZEOF_ETERM SIZEOF_INT
 #define ErtsStrToSint strtol
 #elif SIZEOF_VOID_P == SIZEOF_LONG_LONG
@@ -345,6 +347,7 @@ typedef long long          Sint  erts_align_attribute(sizeof(long long));
 #define UWORD_CONSTANT(Const) Const##ULL
 #define ERTS_UWORD_MAX ULLONG_MAX
 #define ERTS_SWORD_MAX LLONG_MAX
+#define ERTS_SWORD_MIN LLONG_MIN
 #define ERTS_SIZEOF_ETERM SIZEOF_LONG_LONG
 #if defined(__WIN32__)
 #define ErtsStrToSint _strtoi64
diff --git a/erts/emulator/beam/utils.c b/erts/emulator/beam/utils.c
index 08f8ca9788..d81bd89a48 100644
--- a/erts/emulator/beam/utils.c
+++ b/erts/emulator/beam/utils.c
@@ -2615,27 +2615,6 @@ not_equal:
 }
 
 
-/* 
- * Lexically compare two strings of bytes (string s1 length l1 and s2 l2).
- *
- *	s1 < s2	return -1
- *	s1 = s2	return  0
- *	s1 > s2 return +1
- */
-static int cmpbytes(byte *s1, int l1, byte *s2, int l2)
-{
-    int i;
-    i = 0;
-    while((i < l1) && (i < l2)) {
-	if (s1[i] < s2[i]) return(-1);
-	if (s1[i] > s2[i]) return(1);
-	i++;
-    }
-    if (l1 < l2) return(-1);
-    if (l1 > l2) return(1);
-    return(0);
-}
-
 
 /*
  * Compare objects.
@@ -2649,20 +2628,6 @@ static int cmpbytes(byte *s1, int l1, byte *s2, int l2)
  *
  */
 
-
-#define float_comp(x,y)    (((x)<(y)) ? -1 : (((x)==(y)) ? 0 : 1))
-
-int erts_cmp_atoms(Eterm a, Eterm b)
-{
-    Atom *aa = atom_tab(atom_val(a));
-    Atom *bb = atom_tab(atom_val(b));
-    int diff = aa->ord0 - bb->ord0;
-    if (diff)
-	return diff;
-    return cmpbytes(aa->name+3, aa->len-3,
-		    bb->name+3, bb->len-3);
-}
-
 /* cmp(Eterm a, Eterm b)
  *  For compatibility with HiPE - arith-based compare.
  */
@@ -2673,22 +2638,6 @@ Sint cmp(Eterm a, Eterm b)
 
 Sint erts_cmp_compound(Eterm a, Eterm b, int exact, int eq_only);
 
-Sint erts_cmp(Eterm a, Eterm b, int exact, int eq_only)
-{
-    if (is_atom(a) && is_atom(b)) {
-        return erts_cmp_atoms(a, b);
-    } else if (is_both_small(a, b)) {
-        return (signed_val(a) - signed_val(b));
-    } else if (is_float(a) && is_float(b)) {
-        FloatDef af, bf;
-        GET_DOUBLE(a, af);
-        GET_DOUBLE(b, bf);
-        return float_comp(af.fd, bf.fd);
-    }
-    return erts_cmp_compound(a,b,exact,eq_only);
-}
-
-
 /* erts_cmp(Eterm a, Eterm b, int exact)
  * exact = 1 -> term-based compare
  * exact = 0 -> arith-based compare
@@ -2985,7 +2934,7 @@ tailrecur_ne:
 
 		    GET_DOUBLE(a, af);
 		    GET_DOUBLE(b, bf);
-		    ON_CMP_GOTO(float_comp(af.fd, bf.fd));
+		    ON_CMP_GOTO(erts_float_comp(af.fd, bf.fd));
 		}
 	    case (_TAG_HEADER_POS_BIG >> _TAG_PRIMARY_SIZE):
 	    case (_TAG_HEADER_NEG_BIG >> _TAG_PRIMARY_SIZE):
@@ -3022,10 +2971,7 @@ tailrecur_ne:
 		    ErlFunThing* f2 = (ErlFunThing *) fun_val(b);
 		    Sint diff;
 
-		    diff = cmpbytes(atom_tab(atom_val(f1->fe->module))->name,
-				    atom_tab(atom_val(f1->fe->module))->len,
-				    atom_tab(atom_val(f2->fe->module))->name,
-				    atom_tab(atom_val(f2->fe->module))->len);
+                    diff = erts_cmp_atoms((f1->fe)->module, (f2->fe)->module);
 		    if (diff != 0) {
 			RETURN_NEQ(diff);
 		    }
@@ -3219,7 +3165,7 @@ tailrecur_ne:
 	    if (f2.fd < MAX_LOSSLESS_FLOAT && f2.fd > MIN_LOSSLESS_FLOAT) {
 		/* Float is within the no loss limit */
 		f1.fd = signed_val(aw);
-		j = float_comp(f1.fd, f2.fd);
+		j = erts_float_comp(f1.fd, f2.fd);
 	    }
 #if ERTS_SIZEOF_ETERM == 8
 	    else if (f2.fd > (double) (MAX_SMALL + 1)) {
@@ -3266,7 +3212,7 @@ tailrecur_ne:
 		if (big_to_double(aw, &f1.fd) < 0) {
 		    j = big_sign(aw) ? -1 : 1;
 		} else {
-		    j = float_comp(f1.fd, f2.fd);
+		    j = erts_float_comp(f1.fd, f2.fd);
 		}
 	    } else {
 		big = double_to_big(f2.fd, big_buf, sizeof(big_buf)/sizeof(Eterm));
@@ -3282,7 +3228,7 @@ tailrecur_ne:
 	    if (f1.fd < MAX_LOSSLESS_FLOAT && f1.fd > MIN_LOSSLESS_FLOAT) {
 		/* Float is within the no loss limit */
 		f2.fd = signed_val(bw);
-		j = float_comp(f1.fd, f2.fd);
+		j = erts_float_comp(f1.fd, f2.fd);
 	    }
 #if ERTS_SIZEOF_ETERM == 8
 	    else if (f1.fd > (double) (MAX_SMALL + 1)) {
diff --git a/erts/emulator/nifs/common/prim_file_nif.c b/erts/emulator/nifs/common/prim_file_nif.c
index fd6aaa61f6..ba36a33458 100644
--- a/erts/emulator/nifs/common/prim_file_nif.c
+++ b/erts/emulator/nifs/common/prim_file_nif.c
@@ -38,6 +38,9 @@ static void unload(ErlNifEnv *env, void* priv_data);
 
 static ErlNifResourceType *efile_resource_type;
 
+static ERL_NIF_TERM am_erts_prim_file;
+static ERL_NIF_TERM am_close;
+
 static ERL_NIF_TERM am_ok;
 static ERL_NIF_TERM am_error;
 static ERL_NIF_TERM am_continue;
@@ -96,11 +99,14 @@ static ERL_NIF_TERM set_cwd_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM arg
 
 static ERL_NIF_TERM read_file_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
 
+static ERL_NIF_TERM open_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
+static ERL_NIF_TERM close_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
+
+/* Internal ops */
+static ERL_NIF_TERM delayed_close_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
 static ERL_NIF_TERM get_handle_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
 static ERL_NIF_TERM altname_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
 
-static ERL_NIF_TERM open_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
-
 /* All file handle operations are passed through a wrapper that handles state
  * transitions, marking it as busy during the course of the operation, and
  * closing on completion if the owner died in the middle of an operation.
@@ -128,7 +134,11 @@ static ERL_NIF_TERM open_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]
  *
  * CLOSE_PENDING ->
  *      CLOSED (file_handle_wrapper)
- */
+ *
+ * Should the owner of a file die, we can't close it immediately as that could
+ * potentially block a normal scheduler. When entering the CLOSED state from
+ * owner_death_callback, we will instead send a message to the erts_prim_file
+ * process that will then close the file through delayed_close_nif. */
 
 typedef ERL_NIF_TERM (*file_op_impl_t)(efile_data_t *d, ErlNifEnv *env,
     int argc, const ERL_NIF_TERM argv[]);
@@ -142,7 +152,6 @@ static ERL_NIF_TERM file_handle_wrapper(file_op_impl_t operation, ErlNifEnv *env
         return file_handle_wrapper( name ## _impl , env, argc, argv); \
     }
 
-WRAP_FILE_HANDLE_EXPORT(close_nif)
 WRAP_FILE_HANDLE_EXPORT(read_nif)
 WRAP_FILE_HANDLE_EXPORT(write_nif)
 WRAP_FILE_HANDLE_EXPORT(pread_nif)
@@ -193,18 +202,27 @@ static ErlNifFunc nif_funcs[] = {
 
     /* Internal ops. */
     {"get_handle_nif", 1, get_handle_nif},
+    {"delayed_close_nif", 1, delayed_close_nif, ERL_NIF_DIRTY_JOB_IO_BOUND},
     {"altname_nif", 1, altname_nif, ERL_NIF_DIRTY_JOB_IO_BOUND},
 };
 
 ERL_NIF_INIT(prim_file, nif_funcs, load, NULL, upgrade, unload)
 
+static ErlNifPid erts_prim_file_pid;
+
 static void owner_death_callback(ErlNifEnv* env, void* obj, ErlNifPid* pid, ErlNifMonitor* mon);
-static void gc_callback(ErlNifEnv *env, void* data);
 
-static int load(ErlNifEnv *env, void** priv_data, ERL_NIF_TERM load_info)
+static int load(ErlNifEnv *env, void** priv_data, ERL_NIF_TERM prim_file_pid)
 {
     ErlNifResourceTypeInit callbacks;
 
+    if(!enif_get_local_pid(env, prim_file_pid, &erts_prim_file_pid)) {
+        ASSERT(!"bad pid passed to prim_file_nif");
+    }
+
+    am_erts_prim_file = enif_make_atom(env, "erts_prim_file");
+    am_close = enif_make_atom(env, "close");
+
     am_ok = enif_make_atom(env, "ok");
     am_error = enif_make_atom(env, "error");
     am_continue = enif_make_atom(env, "continue");
@@ -239,7 +257,7 @@ static int load(ErlNifEnv *env, void** priv_data, ERL_NIF_TERM load_info)
     am_eof = enif_make_atom(env, "eof");
 
     callbacks.down = owner_death_callback;
-    callbacks.dtor = gc_callback;
+    callbacks.dtor = NULL;
     callbacks.stop = NULL;
 
     efile_resource_type = enif_open_resource_type_x(env, "efile", &callbacks,
@@ -305,8 +323,10 @@ static ERL_NIF_TERM file_handle_wrapper(file_op_impl_t operation, ErlNifEnv *env
             /* This is the only point where a change from CLOSE_PENDING is
              * possible, and we're running synchronously, so we can't race with
              * anything else here. */
+            posix_errno_t ignored;
+
             erts_atomic32_set_acqb(&d->state, EFILE_STATE_CLOSED);
-            efile_close(d);
+            efile_close(d, &ignored);
         }
     } else {
         /* CLOSE_PENDING should be impossible at this point since it requires
@@ -319,6 +339,24 @@ static ERL_NIF_TERM file_handle_wrapper(file_op_impl_t operation, ErlNifEnv *env
     return result;
 }
 
+/* This is a special close operation used by the erts_prim_file process for
+ * cleaning up orphaned files. It differs from the ordinary close_nif in that
+ * it only works for files that have already entered the CLOSED state. */
+static ERL_NIF_TERM delayed_close_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    posix_errno_t ignored;
+    efile_data_t *d;
+
+    ASSERT(argc == 1);
+    if(!get_file_data(env, argv[0], &d)) {
+        return enif_make_badarg(env);
+    }
+
+    ASSERT(erts_atomic32_read_acqb(&d->state) == EFILE_STATE_CLOSED);
+    efile_close(d, &ignored);
+
+    return am_ok;
+}
+
 static void owner_death_callback(ErlNifEnv* env, void* obj, ErlNifPid* pid, ErlNifMonitor* mon) {
     efile_data_t *d = (efile_data_t*)obj;
 
@@ -334,8 +372,24 @@ static void owner_death_callback(ErlNifEnv* env, void* obj, ErlNifPid* pid, ErlN
 
         switch(previous_state) {
         case EFILE_STATE_IDLE:
-            efile_close(d);
-            return;
+            {
+                /* We cannot close the file here as that could block a normal
+                 * scheduler, so we tell erts_prim_file to do it for us.
+                 *
+                 * This can in turn become a bottleneck (especially in cases
+                 * like NFS failure), but it's less problematic than blocking
+                 * thread progress. */
+                ERL_NIF_TERM message, file_ref;
+
+                file_ref = enif_make_resource(env, d);
+                message = enif_make_tuple2(env, am_close, file_ref);
+
+                if(!enif_send(env, &erts_prim_file_pid, NULL, message)) {
+                    ERTS_INTERNAL_ERROR("Failed to defer prim_file close.");
+                }
+
+                return;
+            }
         case EFILE_STATE_CLOSE_PENDING:
         case EFILE_STATE_CLOSED:
             /* We're either already closed or managed to mark ourselves for
@@ -352,24 +406,6 @@ static void owner_death_callback(ErlNifEnv* env, void* obj, ErlNifPid* pid, ErlN
     }
 }
 
-static void gc_callback(ErlNifEnv *env, void* data) {
-    efile_data_t *d = (efile_data_t*)data;
-
-    enum efile_state_t previous_state;
-
-    (void)env;
-
-    previous_state = erts_atomic32_cmpxchg_acqb(&d->state,
-        EFILE_STATE_CLOSED, EFILE_STATE_IDLE);
-
-    ASSERT(previous_state != EFILE_STATE_CLOSE_PENDING &&
-        previous_state != EFILE_STATE_BUSY);
-
-    if(previous_state == EFILE_STATE_IDLE) {
-        efile_close(d);
-    }
-}
-
 static ERL_NIF_TERM efile_filetype_to_atom(enum efile_filetype_t type) {
     switch(type) {
         case EFILE_FILETYPE_DEVICE: return am_device;
@@ -454,40 +490,62 @@ static ERL_NIF_TERM open_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]
         return posix_error_to_tuple(env, posix_errno);
     }
 
-    result = enif_make_resource(env, d);
-    enif_release_resource(d);
-
     enif_self(env, &controlling_process);
 
     if(enif_monitor_process(env, d, &controlling_process, &d->monitor)) {
+        /* We need to close the file manually as we haven't registered a
+         * destructor. */
+        posix_errno_t ignored;
+
+        erts_atomic32_set_acqb(&d->state, EFILE_STATE_CLOSED);
+        efile_close(d, &ignored);
+
         return posix_error_to_tuple(env, EINVAL);
     }
 
+    /* Note that we do not call enif_release_resource at this point. While it's
+     * normally safe to leave resource management to the GC, efile_close is a
+     * blocking operation which must not be done in the GC callback, and we
+     * can't defer it as the resource is gone as soon as it returns.
+     *
+     * We instead keep the resource alive until efile_close is called, after
+     * which it's safe to leave things to the GC. If the controlling process
+     * were to die before the user had a chance to close their file, the above
+     * monitor will tell the erts_prim_file process to close it for them. */
+    result = enif_make_resource(env, d);
+
     return enif_make_tuple2(env, am_ok, result);
 }
 
-static ERL_NIF_TERM close_nif_impl(efile_data_t *d, ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+static ERL_NIF_TERM close_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
     enum efile_state_t previous_state;
+    efile_data_t *d;
 
-    if(argc != 0) {
+    ASSERT(argc == 1);
+    if(!get_file_data(env, argv[0], &d)) {
         return enif_make_badarg(env);
     }
 
     previous_state = erts_atomic32_cmpxchg_acqb(&d->state,
-        EFILE_STATE_CLOSED, EFILE_STATE_BUSY);
+        EFILE_STATE_CLOSED, EFILE_STATE_IDLE);
 
-    ASSERT(previous_state == EFILE_STATE_CLOSE_PENDING ||
-        previous_state == EFILE_STATE_BUSY);
+    if(previous_state == EFILE_STATE_IDLE) {
+        posix_errno_t error;
 
-    if(previous_state == EFILE_STATE_BUSY) {
         enif_demonitor_process(env, d, &d->monitor);
 
-        if(!efile_close(d)) {
-            return posix_error_to_tuple(env, d->posix_errno);
+        if(!efile_close(d, &error)) {
+            return posix_error_to_tuple(env, error);
         }
-    }
 
-    return am_ok;
+        return am_ok;
+    } else {
+        /* CLOSE_PENDING should be impossible at this point since it requires
+         * a transition from BUSY; the only valid state here is CLOSED. */
+        ASSERT(previous_state == EFILE_STATE_CLOSED);
+
+        return posix_error_to_tuple(env, EINVAL);
+    }
 }
 
 static ERL_NIF_TERM read_nif_impl(efile_data_t *d, ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
@@ -1190,7 +1248,7 @@ static posix_errno_t read_file(efile_data_t *d, size_t size, ErlNifBinary *resul
 }
 
 static ERL_NIF_TERM read_file_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
-    posix_errno_t posix_errno;
+    posix_errno_t posix_errno, ignored;
 
     efile_fileinfo_t info = {0};
     efile_path_t path;
@@ -1211,7 +1269,9 @@ static ERL_NIF_TERM read_file_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM a
     }
 
     posix_errno = read_file(d, info.size, &result);
-    enif_release_resource(d);
+
+    erts_atomic32_set_acqb(&d->state, EFILE_STATE_CLOSED);
+    efile_close(d, &ignored);
 
     if(posix_errno) {
         return posix_error_to_tuple(env, posix_errno);
diff --git a/erts/emulator/nifs/common/prim_file_nif.h b/erts/emulator/nifs/common/prim_file_nif.h
index 099c06c48c..b2e30c59dd 100644
--- a/erts/emulator/nifs/common/prim_file_nif.h
+++ b/erts/emulator/nifs/common/prim_file_nif.h
@@ -159,8 +159,11 @@ posix_errno_t efile_open(const efile_path_t *path, enum efile_modes_t modes,
         ErlNifResourceType *nif_type, efile_data_t **d);
 
 /** @brief Closes a file. The file must have entered the CLOSED state prior to
- * calling this to prevent double close. */
-int efile_close(efile_data_t *d);
+ * calling this to prevent double close.
+ *
+ * Note that the file is completely invalid after this point, so the error code
+ * is provided in \c error rather than d->posix_errno. */
+int efile_close(efile_data_t *d, posix_errno_t *error);
 
 /* **** **** **** **** **** **** **** **** **** **** **** **** **** **** **** */
 
diff --git a/erts/emulator/nifs/unix/unix_prim_file.c b/erts/emulator/nifs/unix/unix_prim_file.c
index dea73db18a..169b193993 100644
--- a/erts/emulator/nifs/unix/unix_prim_file.c
+++ b/erts/emulator/nifs/unix/unix_prim_file.c
@@ -202,21 +202,24 @@ posix_errno_t efile_open(const efile_path_t *path, enum efile_modes_t modes,
     return errno;
 }
 
-int efile_close(efile_data_t *d) {
+int efile_close(efile_data_t *d, posix_errno_t *error) {
     efile_unix_t *u = (efile_unix_t*)d;
     int fd;
 
+    ASSERT(enif_thread_type() == ERL_NIF_THR_DIRTY_IO_SCHEDULER);
     ASSERT(erts_atomic32_read_nob(&d->state) == EFILE_STATE_CLOSED);
     ASSERT(u->fd != -1);
 
     fd = u->fd;
     u->fd = -1;
 
+    enif_release_resource(d);
+
     /* close(2) either always closes (*BSD, Linux) or leaves the fd in an
      * undefined state (POSIX 2008, Solaris), so we must not retry on EINTR. */
 
     if(close(fd) < 0) {
-        u->common.posix_errno = errno;
+        *error = errno;
         return 0;
     }
 
diff --git a/erts/emulator/nifs/win32/win_prim_file.c b/erts/emulator/nifs/win32/win_prim_file.c
index 602a282dd1..d0aa70542f 100644
--- a/erts/emulator/nifs/win32/win_prim_file.c
+++ b/erts/emulator/nifs/win32/win_prim_file.c
@@ -466,18 +466,21 @@ posix_errno_t efile_open(const efile_path_t *path, enum efile_modes_t modes,
     }
 }
 
-int efile_close(efile_data_t *d) {
+int efile_close(efile_data_t *d, posix_errno_t *error) {
     efile_win_t *w = (efile_win_t*)d;
     HANDLE handle;
 
+    ASSERT(enif_thread_type() == ERL_NIF_THR_DIRTY_IO_SCHEDULER);
     ASSERT(erts_atomic32_read_nob(&d->state) == EFILE_STATE_CLOSED);
     ASSERT(w->handle != INVALID_HANDLE_VALUE);
 
     handle = w->handle;
     w->handle = INVALID_HANDLE_VALUE;
 
+    enif_release_resource(d);
+
     if(!CloseHandle(handle)) {
-        w->common.posix_errno = windows_to_posix_errno(GetLastError());
+        *error = windows_to_posix_errno(GetLastError());
         return 0;
     }
 
diff --git a/erts/emulator/sys/unix/sys_uds.c b/erts/emulator/sys/unix/sys_uds.c
index 39a4866065..c9f73622ba 100644
--- a/erts/emulator/sys/unix/sys_uds.c
+++ b/erts/emulator/sys/unix/sys_uds.c
@@ -88,8 +88,9 @@ sys_uds_readv(int fd, struct iovec *iov, size_t iov_len,
     if((msg.msg_flags & MSG_CTRUNC) == MSG_CTRUNC)
     {
         /* We assume that we have given enough space for any header
-           that are sent to us. So the only remaining reason to get
-           this flag set is if the caller has run out of file descriptors.
+           that are sent to us. So the only remaining reasons to get
+           this flag set is if the caller has run out of file descriptors
+           or an SELinux policy prunes the response (eg. O_APPEND on STDERR).
         */
         errno = EMFILE;
         return -1;
diff --git a/erts/emulator/test/Makefile b/erts/emulator/test/Makefile
index bf00de2204..6a064ec8d4 100644
--- a/erts/emulator/test/Makefile
+++ b/erts/emulator/test/Makefile
@@ -33,6 +33,7 @@ MODULES= \
 	after_SUITE \
 	alloc_SUITE \
 	async_ports_SUITE \
+	atomics_SUITE \
 	beam_SUITE \
 	beam_literals_SUITE \
 	bif_SUITE \
@@ -50,6 +51,7 @@ MODULES= \
 	call_trace_SUITE \
 	code_SUITE \
 	code_parallel_load_SUITE \
+	counters_SUITE \
 	crypto_SUITE \
 	ddll_SUITE \
 	decode_packet_SUITE \
@@ -92,6 +94,7 @@ MODULES= \
 	port_SUITE \
 	port_bif_SUITE \
 	prim_eval_SUITE \
+	persistent_term_SUITE \
 	process_SUITE \
 	pseudoknot_SUITE \
 	receive_SUITE \
diff --git a/erts/emulator/test/atomics_SUITE.erl b/erts/emulator/test/atomics_SUITE.erl
new file mode 100644
index 0000000000..8c42354770
--- /dev/null
+++ b/erts/emulator/test/atomics_SUITE.erl
@@ -0,0 +1,147 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2018. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+-module(atomics_SUITE).
+
+-include_lib("common_test/include/ct.hrl").
+
+-compile(export_all).
+
+suite() -> [{ct_hooks,[ts_install_cth]}].
+
+all() ->
+    [signed, unsigned, bad, signed_limits, unsigned_limits].
+
+signed(Config) when is_list(Config) ->
+    Size = 10,
+    Ref = atomics:new(Size,[]),
+    #{size:=Size, memory:=Memory} = atomics:info(Ref),
+    {_,true} = {Memory, Memory > Size*8},
+    {_,true} = {Memory, Memory < Size*max_atomic_sz() + 100},
+    [signed_do(Ref, Ix) || Ix <- lists:seq(1, Size)],
+    ok.
+
+signed_do(Ref, Ix) ->
+    0 = atomics:get(Ref, Ix),
+    ok = atomics:put(Ref, Ix, 3),
+    ok = atomics:add(Ref, Ix, 14),
+    17 = atomics:get(Ref, Ix),
+    20 = atomics:add_get(Ref, Ix, 3),
+    -3 = atomics:add_get(Ref, Ix, -23),
+    17 = atomics:add_get(Ref, Ix, 20),
+    ok = atomics:sub(Ref, Ix, 4),
+    13 = atomics:get(Ref, Ix),
+    -7 = atomics:sub_get(Ref, Ix, 20),
+    3  = atomics:sub_get(Ref, Ix, -10),
+    3  = atomics:exchange(Ref, Ix, 666),
+    ok = atomics:compare_exchange(Ref, Ix, 666, 777),
+    777 = atomics:compare_exchange(Ref, Ix, 666, -666),
+    ok.
+
+unsigned(Config) when is_list(Config) ->
+    Size = 10,
+    Ref = atomics:new(Size,[{signed, false}]),
+    #{size:=Size, memory:=Memory} = atomics:info(Ref),
+    true = Memory > Size*8,
+    true = Memory < Size*max_atomic_sz() + 100,
+    [unsigned_do(Ref, Ix) || Ix <- lists:seq(1, Size)],
+    ok.
+
+unsigned_do(Ref, Ix) ->
+    0 = atomics:get(Ref, Ix),
+    ok = atomics:put(Ref, Ix, 3),
+    ok = atomics:add(Ref, Ix, 14),
+    17 = atomics:get(Ref, Ix),
+    20 = atomics:add_get(Ref, Ix, 3),
+    ok = atomics:sub(Ref, Ix, 7),
+    13 = atomics:get(Ref, Ix),
+    3  = atomics:sub_get(Ref, Ix, 10),
+    3  = atomics:exchange(Ref, Ix, 666),
+    ok = atomics:compare_exchange(Ref, Ix, 666, 777),
+    777 = atomics:compare_exchange(Ref, Ix, 666, 888),
+    ok.
+
+bad(Config) when is_list(Config) ->
+    {'EXIT',{badarg,_}} = (catch atomics:new(0,[])),
+    {'EXIT',{badarg,_}} = (catch atomics:new(10,[bad])),
+    {'EXIT',{badarg,_}} = (catch atomics:new(10,[{signed,bad}])),
+    {'EXIT',{badarg,_}} = (catch atomics:new(10,[{signed,true}, bad])),
+    {'EXIT',{badarg,_}} = (catch atomics:new(10,[{signed,false} | bad])),
+    Ref = atomics:new(10,[]),
+    {'EXIT',{badarg,_}} = (catch atomics:get(1742, 7)),
+    {'EXIT',{badarg,_}} = (catch atomics:get(make_ref(), 7)),
+    {'EXIT',{badarg,_}} = (catch atomics:get(Ref, -1)),
+    {'EXIT',{badarg,_}} = (catch atomics:get(Ref, 0)),
+    {'EXIT',{badarg,_}} = (catch atomics:get(Ref, 11)),
+    {'EXIT',{badarg,_}} = (catch atomics:get(Ref, 7.0)),
+    ok.
+
+
+signed_limits(Config) when is_list(Config) ->
+    Bits = 64,
+    Max = (1 bsl (Bits-1)) - 1,
+    Min = -(1 bsl (Bits-1)),
+
+    Ref = atomics:new(1,[{signed, true}]),
+    #{max:=Max, min:=Min} = atomics:info(Ref),
+    0 = atomics:get(Ref, 1),
+    ok = atomics:add(Ref, 1, Max),
+    Min = atomics:add_get(Ref, 1, 1),
+    Max = atomics:sub_get(Ref, 1, 1),
+
+    IncrMax = (Max bsl 1) bor 1,
+    ok = atomics:put(Ref, 1, 0),
+    ok = atomics:add(Ref, 1, IncrMax),
+    -1 = atomics:get(Ref, 1),
+    {'EXIT',{badarg,_}} = (catch atomics:add(Ref, 1, IncrMax+1)),
+    {'EXIT',{badarg,_}} = (catch atomics:add(Ref, 1, Min-1)),
+
+    ok.
+
+unsigned_limits(Config) when is_list(Config) ->
+    Bits = 64,
+    Max = (1 bsl Bits) - 1,
+    Min = 0,
+
+    Ref = atomics:new(1,[{signed,false}]),
+    #{max:=Max, min:=Min} = atomics:info(Ref),
+    0 = atomics:get(Ref, 1),
+    ok = atomics:add(Ref, 1, Max),
+    Min = atomics:add_get(Ref, 1, 1),
+    Max = atomics:sub_get(Ref, 1, 1),
+
+    {'EXIT',{badarg,_}} = (catch atomics:add(Ref, 1, Max+1)),
+    IncrMin = -(1 bsl (Bits-1)),
+    ok = atomics:put(Ref, 1, -IncrMin),
+    ok = atomics:add(Ref, 1, IncrMin),
+    0 = atomics:get(Ref, 1),
+    {'EXIT',{badarg,_}} = (catch atomics:add(Ref, 1, IncrMin-1)),
+
+    ok.
+
+max_atomic_sz() ->
+    case erlang:system_info({wordsize, external}) of
+        4 -> 16;
+        8 ->
+            EI = erlang:system_info(ethread_info),
+            case lists:keyfind("64-bit native atomics", 1, EI) of
+                {_, "no", _} -> 16;
+                _ -> 8
+            end
+    end.
diff --git a/erts/emulator/test/counters_SUITE.erl b/erts/emulator/test/counters_SUITE.erl
new file mode 100644
index 0000000000..b3f0358c1e
--- /dev/null
+++ b/erts/emulator/test/counters_SUITE.erl
@@ -0,0 +1,234 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2018. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+-module(counters_SUITE).
+
+-include_lib("common_test/include/ct.hrl").
+
+-export([suite/0, all/0]).
+-export([basic/1, bad/1, limits/1, indep/1, write_concurrency/1]).
+
+suite() -> [{ct_hooks,[ts_install_cth]}].
+
+all() ->
+    [basic, bad, limits, indep, write_concurrency].
+
+basic(Config) when is_list(Config) ->
+    Size = 10,
+    [begin
+         Ref = counters:new(Size,[Type]),
+         #{size:=Size, memory:=Memory} = counters:info(Ref),
+         check_memory(Type, Memory, Size),
+         [basic_do(Ref, Ix) || Ix <- lists:seq(1, Size)]
+     end
+     || Type <- [atomics, write_concurrency]],
+    ok.
+
+basic_do(Ref, Ix) ->
+    0 = counters:get(Ref, Ix),
+    ok = counters:add(Ref, Ix, 3),
+    3  = counters:get(Ref, Ix),
+    ok = counters:add(Ref, Ix, 14),
+    17  = counters:get(Ref, Ix),
+    ok = counters:add(Ref, Ix, -20),
+    -3  = counters:get(Ref, Ix),
+    ok = counters:add(Ref, Ix, 100),
+    97 = counters:get(Ref, Ix),
+    ok = counters:sub(Ref, Ix, 20),
+    77 = counters:get(Ref, Ix),
+    ok = counters:sub(Ref, Ix, -10),
+    87 = counters:get(Ref, Ix),
+    ok = counters:put(Ref, Ix, 0),
+    0 = counters:get(Ref, Ix),
+    ok = counters:put(Ref, Ix, 123),
+    123 = counters:get(Ref, Ix),
+    ok = counters:put(Ref, Ix, -321),
+    -321 = counters:get(Ref, Ix),
+    ok.
+
+check_memory(atomics, Memory, Size) ->
+    {_,true} = {Memory, Memory > Size*8},
+    {_,true} = {Memory, Memory < Size*max_atomic_sz() + 100};
+check_memory(write_concurrency, Memory, Size) ->
+    NWords = erlang:system_info(schedulers) + 1,
+    {_,true} = {Memory, Memory > NWords*Size*8},
+    {_,true} = {Memory, Memory < NWords*(Size+7)*max_atomic_sz() + 100}.
+
+max_atomic_sz() ->
+    case erlang:system_info({wordsize, external}) of
+        4 -> 16;
+        8 ->
+            EI = erlang:system_info(ethread_info),
+            case lists:keyfind("64-bit native atomics", 1, EI) of
+                {_, "no", _} -> 16;
+                _ -> 8
+            end
+    end.
+
+bad(Config) when is_list(Config) ->
+    {'EXIT',{badarg,_}} = (catch counters:new(0,[])),
+    {'EXIT',{badarg,_}} = (catch counters:new(10,[bad])),
+    {'EXIT',{badarg,_}} = (catch counters:new(10,[atomic, bad])),
+    {'EXIT',{badarg,_}} = (catch counters:new(10,[write_concurrency | bad])),
+    Ref = counters:new(10,[]),
+    {'EXIT',{badarg,_}} = (catch counters:get(1742, 7)),
+    {'EXIT',{badarg,_}} = (catch counters:get(make_ref(), 7)),
+    {'EXIT',{badarg,_}} = (catch counters:get(Ref, -1)),
+    {'EXIT',{badarg,_}} = (catch counters:get(Ref, 0)),
+    {'EXIT',{badarg,_}} = (catch counters:get(Ref, 11)),
+    {'EXIT',{badarg,_}} = (catch counters:get(Ref, 7.0)),
+    ok.
+
+
+limits(Config) when is_list(Config) ->
+    limits_do(counters:new(1,[atomics])),
+    limits_do(counters:new(1,[write_concurrency])),
+    ok.
+
+limits_do(Ref) ->
+    Bits = 64,
+    Max = (1 bsl (Bits-1)) - 1,
+    Min = -(1 bsl (Bits-1)),
+
+    0 = counters:get(Ref, 1),
+    ok = counters:put(Ref, 1, Max),
+    Max = counters:get(Ref, 1),
+    ok = counters:add(Ref, 1, 1),
+    Min = counters:get(Ref, 1),
+    ok  = counters:sub(Ref, 1, 1),
+    Max = counters:get(Ref, 1),
+    ok = counters:put(Ref, 1, Min),
+    Min = counters:get(Ref, 1),
+
+    IncrMax = (Max bsl 1) bor 1,
+    ok = counters:put(Ref, 1, 0),
+    ok = counters:add(Ref, 1, IncrMax),
+    -1 = counters:get(Ref, 1),
+    {'EXIT',{badarg,_}} = (catch counters:add(Ref, 1, IncrMax+1)),
+    {'EXIT',{badarg,_}} = (catch counters:add(Ref, 1, Min-1)),
+    {'EXIT',{badarg,_}} = (catch counters:put(Ref, 1, Max+1)),
+    {'EXIT',{badarg,_}} = (catch counters:add(Ref, 1, Min-1)),
+    ok.
+
+
+%% Verify that independent workers, using different counters
+%% within the same array, do not interfere with each other.
+indep(Config) when is_list(Config) ->
+    NScheds = erlang:system_info(schedulers),
+    Ref = counters:new(NScheds,[write_concurrency]),
+    Rounds = 100,
+    Papa = self(),
+    Pids = [spawn_opt(fun () ->
+                               Val = I*197,
+                               counters:put(Ref, I, Val),
+                               indep_looper(Rounds, Ref, I, Val),
+                               Papa ! {self(), done}
+                       end,
+                      [link, {scheduler, I}])
+            || I <- lists:seq(1, NScheds)],
+    [receive {P,done} -> ok end || P <- Pids],
+    ok.
+
+indep_looper(0, _, _ , _) ->
+    ok;
+indep_looper(N, Ref, I, Val0) ->
+    %%io:format("Val0 = ~p\n", [Val0]),
+    Val0 = counters:get(Ref, I),
+    Val1 = indep_adder(Ref, I, Val0),
+    indep_subber(Ref, I, Val1),
+    Val2 = N*7 + I,
+    counters:put(Ref, I, Val2),
+    indep_looper(N-1, Ref, I, Val2).
+
+indep_adder(Ref, I, Val) when Val < (1 bsl 62) ->
+    %%io:format("adder Val = ~p\n", [Val]),
+    Incr = abs(Val div 2) + I + 984735,
+    counters:add(Ref, I, Incr),
+    Res = Val + Incr,
+    Res = counters:get(Ref, I),
+    indep_adder(Ref, I, Res);
+indep_adder(_Ref, _I, Val) ->
+    Val.
+
+indep_subber(Ref, I, Val) when Val > -(1 bsl 62) ->
+    %%io:format("subber Val = ~p\n", [Val]),
+    Decr = (abs(Val div 2) + I + 725634),
+    counters:sub(Ref, I, Decr),
+    Res = Val - Decr,
+    Res = counters:get(Ref, I),
+    indep_subber(Ref, I, Res);
+indep_subber(_Ref, _I, Val) ->
+    Val.
+
+
+
+%% Verify write_concurrency yields correct results.
+write_concurrency(Config) when is_list(Config) ->
+    rand:seed(exs1024s),
+    io:format("*** SEED: ~p ***\n", [rand:export_seed()]),
+    NScheds = erlang:system_info(schedulers),
+    Size = 100,
+    Ref = counters:new(Size,[write_concurrency]),
+    Rounds = 1000,
+    Papa = self(),
+    Pids = [spawn_opt(fun Worker() ->
+                              receive
+                                  {go, Ix, Incr} ->
+                                      wc_looper(Rounds, Ref, Ix, Incr),
+                                      Papa ! {self(), done, Rounds*Incr},
+                                      Worker();
+                                  stop ->
+                                      ok
+                              end
+                       end,
+                      [link, {scheduler, N}])
+            || N <- lists:seq(1, NScheds)],
+    [begin
+         Base = rand_log64(),
+         counters:put(Ref, Index, Base),
+         SendList = [{P,{go, Index, rand_log64()}} || P <- Pids],
+         [P ! Msg || {P,Msg} <- SendList],
+         Added = lists:sum([receive {P,done,Contrib} -> Contrib end || P <- Pids]),
+         Result = mask_sint64(Base+Added),
+         {_,Result} = {Result, counters:get(Ref, Index)}
+     end
+     || Index <- lists:seq(1, Size)],
+
+    [begin unlink(P), P ! stop end || P <- Pids],
+    ok.
+
+wc_looper(0, _, _, _) ->
+    ok;
+wc_looper(N, Ref, Ix, Incr) ->
+    counters:add(Ref, Ix, Incr),
+    wc_looper(N-1, Ref, Ix, Incr).
+
+mask_sint64(X) ->
+    SMask = 1 bsl 63,
+    UMask = SMask - 1,
+    (X band UMask) - (X band SMask).
+
+%% A random signed 64-bit integer
+%% with a uniformly distributed number of significant bits.
+rand_log64() ->
+    Uint = round(math:pow(2, rand:uniform()*63)),
+    case rand:uniform(2) of
+        1 -> -Uint;
+        2 -> Uint
+    end.
diff --git a/erts/emulator/test/persistent_term_SUITE.erl b/erts/emulator/test/persistent_term_SUITE.erl
new file mode 100644
index 0000000000..58cd3276b0
--- /dev/null
+++ b/erts/emulator/test/persistent_term_SUITE.erl
@@ -0,0 +1,614 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2017. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%5
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+-module(persistent_term_SUITE).
+-include_lib("common_test/include/ct.hrl").
+
+-export([all/0,suite/0,
+	 basic/1,purging/1,sharing/1,get_trapping/1,
+         info/1,info_trapping/1,killed_while_trapping/1,
+         off_heap_values/1,keys/1,collisions/1,
+         init_restart/1]).
+
+%%
+-export([test_init_restart_cmd/1]).
+
+suite() ->
+    [{ct_hooks,[ts_install_cth]},
+     {timetrap,{minutes,10}}].
+
+all() ->
+    [basic,purging,sharing,get_trapping,info,info_trapping,
+     killed_while_trapping,off_heap_values,keys,collisions,
+     init_restart].
+
+basic(_Config) ->
+    Chk = chk(),
+    N = 777,
+    Seq = lists:seq(1, N),
+    par(2, N, Seq),
+    seq(3, Seq),
+    seq(3, Seq),                                %Same values.
+    _ = [begin
+             Key = {?MODULE,{key,I}},
+             true = persistent_term:erase(Key),
+             false = persistent_term:erase(Key),
+             {'EXIT',{badarg,_}} = (catch persistent_term:get(Key))
+         end || I <- Seq],
+    [] = [P || {{?MODULE,_},_}=P <- persistent_term:get()],
+    chk(Chk).
+
+par(C, N, Seq) ->
+    _ = [spawn_link(fun() ->
+                            ok = persistent_term:put({?MODULE,{key,I}},
+                                                     {value,C*I})
+                    end) || I <- Seq],
+    Result = wait(N),
+    _ = [begin
+             Double = C*I,
+             {{?MODULE,{key,I}},{value,Double}} = Res
+         end || {I,Res} <- lists:zip(Seq, Result)],
+    ok.
+
+seq(C, Seq) ->
+    _ = [ok = persistent_term:put({?MODULE,{key,I}}, {value,C*I}) ||
+            I <- Seq],
+    All = persistent_term:get(),
+    All = [P || {{?MODULE,_},_}=P <- persistent_term:get()],
+    All = [{Key,persistent_term:get(Key)} || {Key,_} <- All],
+    Result = lists:sort(All),
+    _ = [begin
+             Double = C*I,
+             {{?MODULE,{key,I}},{value,Double}} = Res
+         end || {I,Res} <- lists:zip(Seq, Result)],
+    ok.
+
+wait(N) ->
+    All = [P || {{?MODULE,_},_}=P <- persistent_term:get()],
+    case length(All) of
+        N ->
+            All = [{Key,persistent_term:get(Key)} || {Key,_} <- All],
+            lists:sort(All);
+        _ ->
+            receive after 10 -> ok end,
+            wait(N)
+    end.
+
+%% Make sure that terms that have been erased are copied into all
+%% processes that still hold a pointer to them.
+
+purging(_Config) ->
+    Chk = chk(),
+    do_purging(fun(K) -> persistent_term:put(K, {?MODULE,new}) end,
+               replaced),
+    do_purging(fun persistent_term:erase/1, erased),
+    chk(Chk).
+
+do_purging(Eraser, Type) ->
+    Parent = self(),
+    Key = {?MODULE,?FUNCTION_NAME},
+    ok = persistent_term:put(Key, {term,[<<"abc",0:777/unit:8>>]}),
+    Ps0 = [spawn_monitor(fun() -> purging_tester(Parent, Key) end) ||
+              _ <- lists:seq(1, 50)],
+    Ps = maps:from_list(Ps0),
+    purging_recv(gotten, Ps),
+    Eraser(Key),
+    _ = [P ! {Parent,Type} || P <- maps:keys(Ps)],
+    purging_wait(Ps).
+
+purging_recv(Tag, Ps) when map_size(Ps) > 0 ->
+    receive
+        {Pid,Tag} ->
+            true = is_map_key(Pid, Ps),
+            purging_recv(Tag, maps:remove(Pid, Ps))
+    end;
+purging_recv(_, _) -> ok.
+
+purging_wait(Ps) when map_size(Ps) > 0 ->
+    receive
+        {'DOWN',Ref,process,Pid,Reason} ->
+            normal = Reason,
+            Ref = map_get(Pid, Ps),
+            purging_wait(maps:remove(Pid, Ps))
+    end;
+purging_wait(_) -> ok.
+
+purging_tester(Parent, Key) ->
+    Term = persistent_term:get(Key),
+    purging_check_term(Term),
+    0 = erts_debug:size_shared(Term),
+    Parent ! {self(),gotten},
+    receive
+        {Parent,erased} ->
+            {'EXIT',{badarg,_}} = (catch persistent_term:get(Key)),
+            purging_tester_1(Term);
+        {Parent,replaced} ->
+            {?MODULE,new} = persistent_term:get(Key),
+            purging_tester_1(Term)
+    end.
+
+%% Wait for the term to be copied into this process.
+purging_tester_1(Term) ->
+    purging_check_term(Term),
+    receive after 1 -> ok end,
+    case erts_debug:size_shared(Term) of
+        0 ->
+            purging_tester_1(Term);
+        Size ->
+            %% The term has been copied into this process.
+            purging_check_term(Term),
+            Size = erts_debug:size(Term)
+    end.
+
+purging_check_term({term,[<<"abc",0:777/unit:8>>]}) ->
+    ok.
+
+%% Test that sharing is preserved when storing terms.
+
+sharing(_Config) ->
+    Chk = chk(),
+    Depth = 10,
+    Size = 2*Depth,
+    Shared = lists:foldl(fun(_, A) -> [A|A] end,
+                         [], lists:seq(1, Depth)),
+    Size = erts_debug:size(Shared),
+    Key = {?MODULE,?FUNCTION_NAME},
+    ok = persistent_term:put(Key, Shared),
+    SharedStored = persistent_term:get(Key),
+    Size = erts_debug:size(SharedStored),
+    0 = erts_debug:size_shared(SharedStored),
+
+    {Pid,Ref} = spawn_monitor(fun() ->
+                                      Term = persistent_term:get(Key),
+                                      Size = erts_debug:size(Term),
+                                      0 = erts_debug:size_shared(Term),
+                                      true = Term =:= SharedStored
+                              end),
+    receive
+        {'DOWN',Ref,process,Pid,normal} ->
+            true = persistent_term:erase(Key),
+            Size = erts_debug:size(SharedStored),
+            chk(Chk)
+    end.
+
+%% Test trapping of persistent_term:get/0.
+
+get_trapping(_Config) ->
+    Chk = chk(),
+
+    %% Assume that the get/0 traps after 4000 iterations
+    %% in a non-debug emulator.
+    N = case test_server:timetrap_scale_factor() of
+            1 -> 10000;
+            _ -> 1000
+        end,
+    spawn_link(fun() -> get_trapping_create(N) end),
+    All = do_get_trapping(N, []),
+    N = get_trapping_check_result(lists:sort(All), 1),
+    erlang:garbage_collect(),
+    get_trapping_erase(N),
+    chk(Chk).
+
+do_get_trapping(N, Prev) ->
+    case persistent_term:get() of
+        Prev when length(Prev) >= N ->
+            All = [P || {{?MODULE,{get_trapping,_}},_}=P <- Prev],
+            case length(All) of
+                N -> All;
+                _ -> do_get_trapping(N, Prev)
+            end;
+        New ->
+            receive after 1 -> ok end,
+            do_get_trapping(N, New)
+    end.
+
+get_trapping_create(0) ->
+    ok;
+get_trapping_create(N) ->
+    ok = persistent_term:put({?MODULE,{get_trapping,N}}, N),
+    get_trapping_create(N-1).
+
+get_trapping_check_result([{{?MODULE,{get_trapping,N}},N}|T], N) ->
+    get_trapping_check_result(T, N+1);
+get_trapping_check_result([], N) -> N-1.
+
+get_trapping_erase(0) ->
+    ok;
+get_trapping_erase(N) ->
+    true = persistent_term:erase({?MODULE,{get_trapping,N}}),
+    get_trapping_erase(N-1).
+
+%% Test retrieving information about persistent terms.
+
+info(_Config) ->
+    Chk = chk(),
+
+    %% White box test of info/0.
+    N = 100,
+    try
+        Overhead = info_literal_area_overhead(),
+        io:format("Overhead = ~p\n", [Overhead]),
+        info_wb(N, Overhead, info_info())
+    after
+        _ = [_ = persistent_term:erase({?MODULE,I}) ||
+                I <- lists:seq(1, N)]
+    end,
+
+    chk(Chk).
+
+%% White box test of persistent_term:info/0. We take into account
+%% that there might already exist persistent terms (created by the
+%% OTP standard libraries), but we assume that they are not
+%% changed during the execution of this test case.
+
+info_wb(0, _, _) ->
+    ok;
+info_wb(N, Overhead, {BaseCount,BaseMemory}) ->
+    Key = {?MODULE,N},
+    Value = lists:seq(1, N),
+    ok = persistent_term:put(Key, Value),
+
+    %% Calculate the extra memory needed for this term.
+    WordSize = erlang:system_info(wordsize),
+    ExtraMemory = Overhead + 2 * N * WordSize,
+
+    %% Call persistent_term:info/0.
+    {Count,Memory} = info_info(),
+
+    %% There should be one more persistent term.
+    Count = BaseCount + 1,
+
+    %% Verify that the amount of memory is correct.
+    case BaseMemory + ExtraMemory of
+        Memory ->
+            %% Exactly right. The size of the hash table was not changed.
+            ok;
+        Expected ->
+            %% The size of the hash table has been doubled to avoid filling
+            %% the table to more than 50 percent. The previous number
+            %% of entries must have been exactly half the size of the
+            %% hash table. The expected number of extra words added by
+            %% the resizing will be twice that number.
+            ExtraWords = BaseCount * 2,
+            true = ExtraWords * WordSize =:= (Memory - Expected)
+    end,
+    info_wb(N-1, Overhead, {Count,Memory}).
+
+info_info() ->
+    #{count:=Count,memory:=Memory} = persistent_term:info(),
+    true = is_integer(Count) andalso Count >= 0,
+    true = is_integer(Memory) andalso Memory >= 0,
+    {Count,Memory}.
+
+%% Calculate the number of extra bytes needed for storing each term in
+%% the literal, assuming that the key is a tuple of size 2 with
+%% immediate elements. The calculated number is the size of the
+%% ErtsLiteralArea struct excluding the storage for the literal term
+%% itself.
+
+info_literal_area_overhead() ->
+    Key1 = {?MODULE,1},
+    Key2 = {?MODULE,2},
+    #{memory:=Mem0} = persistent_term:info(),
+    ok = persistent_term:put(Key1, literal),
+    #{memory:=Mem1} = persistent_term:info(),
+    ok = persistent_term:put(Key2, literal),
+    #{memory:=Mem2} = persistent_term:info(),
+    true = persistent_term:erase(Key1),
+    true = persistent_term:erase(Key2),
+
+    %% The size of the hash table may have doubled when inserting
+    %% one of the keys. To avoiding counting the change in the hash
+    %% table size, take the smaller size increase.
+    min(Mem2-Mem1, Mem1-Mem0).
+
+%% Test trapping of persistent_term:info/0.
+
+info_trapping(_Config) ->
+    Chk = chk(),
+
+    %% Assume that the info/0 traps after 4000 iterations
+    %% in a non-debug emulator.
+    N = case test_server:timetrap_scale_factor() of
+            1 -> 10000;
+            _ -> 1000
+        end,
+    spawn_link(fun() -> info_trapping_create(N) end),
+    All = do_info_trapping(N, 0),
+    N = info_trapping_check_result(lists:sort(All), 1),
+    erlang:garbage_collect(),
+    info_trapping_erase(N),
+    chk(Chk).
+
+do_info_trapping(N, PrevMem) ->
+    case info_info() of
+        {N,Mem} ->
+            true = Mem >= PrevMem,
+            All = [P || {{?MODULE,{info_trapping,_}},_}=P <- persistent_term:get()],
+            case length(All) of
+                N -> All;
+                _ -> do_info_trapping(N, PrevMem)
+            end;
+        {_,Mem} ->
+            true = Mem >= PrevMem,
+            receive after 1 -> ok end,
+            do_info_trapping(N, Mem)
+    end.
+
+info_trapping_create(0) ->
+    ok;
+info_trapping_create(N) ->
+    ok = persistent_term:put({?MODULE,{info_trapping,N}}, N),
+    info_trapping_create(N-1).
+
+info_trapping_check_result([{{?MODULE,{info_trapping,N}},N}|T], N) ->
+    info_trapping_check_result(T, N+1);
+info_trapping_check_result([], N) -> N-1.
+
+info_trapping_erase(0) ->
+    ok;
+info_trapping_erase(N) ->
+    true = persistent_term:erase({?MODULE,{info_trapping,N}}),
+    info_trapping_erase(N-1).
+
+%% Test that hash tables are deallocated if a process running
+%% persistent_term:get/0 is killed.
+
+killed_while_trapping(_Config) ->
+    Chk = chk(),
+    N = case test_server:timetrap_scale_factor() of
+            1 -> 20000;
+            _ -> 2000
+        end,
+    kwt_put(N),
+    kwt_spawn(10),
+    kwt_erase(N),
+    chk(Chk).
+
+kwt_put(0) ->
+    ok;
+kwt_put(N) ->
+    ok = persistent_term:put({?MODULE,{kwt,N}}, N),
+    kwt_put(N-1).
+
+kwt_spawn(0) ->
+    ok;
+kwt_spawn(N) ->
+    Pids = [spawn(fun kwt_getter/0) || _ <- lists:seq(1, 20)],
+    erlang:yield(),
+    _ = [exit(Pid, kill) || Pid <- Pids],
+    kwt_spawn(N-1).
+
+kwt_getter() ->
+    _ = persistent_term:get(),
+    kwt_getter().
+
+kwt_erase(0) ->
+    ok;
+kwt_erase(N) ->
+    true = persistent_term:erase({?MODULE,{kwt,N}}),
+    kwt_erase(N-1).
+
+%% Test storing off heap values (such as ref-counted binaries).
+
+off_heap_values(_Config) ->
+    Chk = chk(),
+    Key = {?MODULE,?FUNCTION_NAME},
+    Val = {a,list_to_binary(lists:seq(0, 255)),make_ref(),fun() -> ok end},
+    ok = persistent_term:put(Key, Val),
+    FetchedVal = persistent_term:get(Key),
+    Val = FetchedVal,
+    true = persistent_term:erase(Key),
+    off_heap_values_wait(FetchedVal, Val),
+    chk(Chk).
+
+off_heap_values_wait(FetchedVal, Val) ->
+    case erts_debug:size_shared(FetchedVal) of
+        0 ->
+            Val = FetchedVal,
+            ok;
+        _ ->
+            erlang:yield(),
+            off_heap_values_wait(FetchedVal, Val)
+    end.
+
+%% Test some more data types as keys. Use the module name as a key
+%% to minimize the risk of collision with any key used
+%% by the OTP libraries.
+
+keys(_Config) ->
+    Chk = chk(),
+    do_key(?MODULE),
+    do_key([?MODULE]),
+    do_key(?MODULE_STRING),
+    do_key(list_to_binary(?MODULE_STRING)),
+    chk(Chk).
+
+do_key(Key) ->
+    Val = term_to_binary(Key),
+    ok = persistent_term:put(Key, Val),
+    StoredVal = persistent_term:get(Key),
+    Val = StoredVal,
+    true = persistent_term:erase(Key).
+
+%% Create persistent terms with keys that are known to collide.
+%% Delete them in random order, making sure that all others
+%% terms can still be found.
+
+collisions(_Config) ->
+    Chk = chk(),
+
+    %% Create persistent terms with random keys.
+    Keys = lists:flatten(colliding_keys()),
+    Kvs = [{K,rand:uniform(1000)} || K <- Keys],
+    _ = [ok = persistent_term:put(K, V) || {K,V} <- Kvs],
+    _ = [V = persistent_term:get(K) || {K,V} <- Kvs],
+
+    %% Now delete the persistent terms in random order.
+    collisions_delete(lists:keysort(2, Kvs)),
+
+    chk(Chk).
+
+collisions_delete([{Key,Val}|Kvs]) ->
+    Val = persistent_term:get(Key),
+    true = persistent_term:erase(Key),
+    true = lists:sort(persistent_term:get()) =:= lists:sort(Kvs),
+    _ = [V = persistent_term:get(K) || {K,V} <- Kvs],
+    collisions_delete(Kvs);
+collisions_delete([]) ->
+    ok.
+
+colliding_keys() ->
+    %% Collisions found by Jesper L. Andersen for breaking maps.
+    L = [[764492191,2361333849],
+         [49527266765044,90940896816021,20062927283041,267080852079651],
+         [249858369443708,206247021789428,20287304470696,25847120931175],
+         [10645228898670,224705626119556,267405565521452,258214397180678],
+         [264783762221048,166955943492306,98802957003141,102012488332476],
+         [69425677456944,177142907243411,137138950917722,228865047699598],
+         [116031213307147,29203342183358,37406949328742,255198080174323],
+         [200358182338308,235207156008390,120922906095920,116215987197289],
+         [58728890318426,68877471005069,176496507286088,221041411345780],
+         [91094120814795,50665258299931,256093108116737,19777509566621],
+         [74646746200247,98350487270564,154448261001199,39881047281135],
+         [23408943649483,164410325820923,248161749770122,274558342231648],
+         [169531547115055,213630535746863,235098262267796,200508473898303],
+         [235098564415817,85039146398174,51721575960328,173069189684390],
+         [176136386396069,155368359051606,147817099696487,265419485459634],
+         [137542881551462,40028925519736,70525669519846,63445773516557],
+         [173854695142814,114282444507812,149945832627054,99605565798831],
+         [177686773562184,127158716984798,132495543008547],
+         [227073396444896,139667311071766,158915951283562],
+         [26212438434289,94902985796531,198145776057315],
+         [266279278943923,58550737262493,74297973216378],
+         [32373606512065,131854353044428,184642643042326],
+         [34335377662439,85341895822066,273492717750246]],
+
+    %% Verify that the keys still collide (this will fail if the
+    %% internal hash function has been changed).
+    erts_debug:set_internal_state(available_internal_state, true),
+    try
+        case erlang:system_info(wordsize) of
+            8 ->
+                verify_colliding_keys(L);
+            4 ->
+                %% Not guaranteed to collide on a 32-bit system.
+                ok
+        end
+    after
+        erts_debug:set_internal_state(available_internal_state, false)
+    end,
+
+    L.
+
+verify_colliding_keys([[K|Ks]|Gs]) ->
+    Hash = internal_hash(K),
+    [Hash] = lists:usort([internal_hash(Key) || Key <- Ks]),
+    verify_colliding_keys(Gs);
+verify_colliding_keys([]) ->
+    ok.
+
+internal_hash(Term) ->
+    erts_debug:get_internal_state({internal_hash,Term}).
+
+%% Test that all persistent terms are erased by init:restart/0.
+
+init_restart(_Config) ->
+    File = "command_file",
+    ok = file:write_file(File, term_to_binary(restart)),
+    {ok,[[Erl]]} = init:get_argument(progname),
+    ModPath = filename:dirname(code:which(?MODULE)),
+    Cmd = Erl ++ " -pa " ++ ModPath ++ " -noshell "
+        "-run " ++ ?MODULE_STRING ++ " test_init_restart_cmd " ++
+        File,
+    io:format("~s\n", [Cmd]),
+    Expected = "12ok",
+    case os:cmd(Cmd) of
+        Expected ->
+            ok;
+        Actual ->
+            io:format("Expected: ~s", [Expected]),
+            io:format("Actual:   ~s\n", [Actual]),
+            ct:fail(unexpected_output)
+    end.
+
+test_init_restart_cmd([File]) ->
+    try
+        do_test_init_restart_cmd(File)
+    catch
+        C:R ->
+            io:format("\n~p ~p\n", [C,R]),
+            halt()
+    end,
+    receive
+        _ -> ok
+    end.
+
+do_test_init_restart_cmd(File) ->
+    {ok,Bin} = file:read_file(File),
+    Seq = lists:seq(1, 50),
+    case binary_to_term(Bin) of
+        restart ->
+            _ = [persistent_term:put({?MODULE,I}, {value,I}) ||
+                    I <- Seq],
+            ok = file:write_file(File, term_to_binary(was_restarted)),
+            io:put_chars("1"),
+            init:restart(),
+            receive
+                _ -> ok
+            end;
+        was_restarted ->
+            io:put_chars("2"),
+            ok = file:delete(File),
+            _ = [begin
+                     Key = {?MODULE,I},
+                     {'EXIT',{badarg,_}} = (catch persistent_term:get(Key))
+                 end || I <- Seq],
+            io:put_chars("ok"),
+            init:stop()
+    end.
+
+%% Check that there is the same number of persistents terms before
+%% and after each test case.
+
+chk() ->
+    persistent_term:info().
+
+chk(Chk) ->
+    Chk = persistent_term:info(),
+    Key = {?MODULE,?FUNCTION_NAME},
+    ok = persistent_term:put(Key, {term,Chk}),
+    Term = persistent_term:get(Key),
+    true = persistent_term:erase(Key),
+    chk_not_stuck(Term),
+    ok.
+
+chk_not_stuck(Term) ->
+    %% Hash tables to be deleted are put onto a queue.
+    %% Make sure that the queue isn't stuck by a table with
+    %% a non-zero ref count.
+
+    case erts_debug:size_shared(Term) of
+        0 ->
+            erlang:yield(),
+            chk_not_stuck(Term);
+        _ ->
+            ok
+    end.