9 files changed, 405 insertions, 251 deletions
diff --git a/erts/emulator/beam/erl_db_hash.c b/erts/emulator/beam/erl_db_hash.c
index ceaccf7e44..d80d7985cb 100644
--- a/erts/emulator/beam/erl_db_hash.c
+++ b/erts/emulator/beam/erl_db_hash.c
@@ -93,11 +93,9 @@
     erts_flxctr_dec_read_centralized(&(DB)->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID)
 #define RESET_NITEMS(DB)                                                \
     erts_flxctr_reset(&(DB)->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID)
-/* 
- * The following symbols can be manipulated to "tune" the linear hash array 
- */
+
 #define GROW_LIMIT(NACTIVE) ((NACTIVE)*1)
-#define SHRINK_LIMIT(NACTIVE) ((NACTIVE) / 2)
+#define SHRINK_LIMIT(TB) erts_atomic_read_nob(&(TB)->shrink_limit)
 
 /*
 ** We want the first mandatory segment to be small (to reduce minimal footprint)
@@ -137,6 +135,11 @@
 
 #define BUCKET(tb, i) SEGTAB(tb)[SLOT_IX_TO_SEG_IX(i)]->buckets[(i) & EXT_SEGSZ_MASK]
 
+#ifdef DEBUG
+#  define DBG_BUCKET_INACTIVE ((HashDbTerm*)0xdead5107)
+#endif
+
+
 /*
  * When deleting a table, the number of records to delete.
  * Approximate number, because we must delete entire buckets.
@@ -377,7 +380,7 @@ typedef int (*extra_match_validator_t)(int keypos, Eterm match, Eterm guard, Ete
 */
 static struct ext_segtab* alloc_ext_segtab(DbTableHash* tb, unsigned seg_ix);
 static void alloc_seg(DbTableHash *tb);
-static int free_seg(DbTableHash *tb, int free_records);
+static int free_seg(DbTableHash *tb);
 static HashDbTerm* next_live(DbTableHash *tb, Uint *iptr, erts_rwmtx_t** lck_ptr,
 			     HashDbTerm *list);
 static HashDbTerm* search_list(DbTableHash* tb, Eterm key, 
@@ -471,10 +474,8 @@ db_finalize_dbterm_hash(int cret, DbUpdateHandle* handle);
 
 static ERTS_INLINE void try_shrink(DbTableHash* tb)
 {
-    int nactive = NACTIVE(tb);
     int nitems = NITEMS(tb);
-    if (nactive > FIRST_SEGSZ && nitems < SHRINK_LIMIT(nactive)
-	&& !IS_FIXED(tb)) {
+    if (nitems < SHRINK_LIMIT(tb) && !IS_FIXED(tb)) {
 	shrink(tb, nitems);
     }
 }	
@@ -685,6 +686,7 @@ int db_create_hash(Process *p, DbTable *tbl)
 
     erts_atomic_init_nob(&tb->szm, FIRST_SEGSZ_MASK);
     erts_atomic_init_nob(&tb->nactive, FIRST_SEGSZ);
+    erts_atomic_init_nob(&tb->shrink_limit, 0);
     erts_atomic_init_nob(&tb->fixdel, (erts_aint_t)NULL);
     erts_atomic_init_nob(&tb->segtab, (erts_aint_t)NULL);
     SET_SEGTAB(tb, tb->first_segtab);
@@ -771,7 +773,7 @@ static int db_next_hash(Process *p, DbTable *tbl, Eterm key, Eterm *ret)
     b = next_live(tb, &ix, &lck, b->next);
     if (tb->common.status & (DB_BAG | DB_DUPLICATE_BAG)) {
 	while (b != 0) {
-	    if (!has_live_key(tb, b, key, hval)) {
+	    if (!has_key(tb, b, key, hval)) {
 		break;
 	    }
 	    b = next_live(tb, &ix, &lck, b->next);
@@ -781,6 +783,7 @@ static int db_next_hash(Process *p, DbTable *tbl, Eterm key, Eterm *ret)
 	*ret = am_EOT;
     }
     else {
+        ASSERT(!is_pseudo_deleted(b));
 	*ret = db_copy_key(p, tbl, &b->dbterm);
 	RUNLOCK_HASH(lck);
     }    
@@ -2466,7 +2469,7 @@ static SWord db_free_table_continue_hash(DbTable *tbl, SWord reds)
     erts_atomic_set_relb(&tb->fixdel, (erts_aint_t)NULL);
 
     while(tb->nslots != 0) {
-	reds -= EXT_SEGSZ/64 + free_seg(tb, 1);
+	reds -= EXT_SEGSZ/64 + free_seg(tb);
 
 	/*
 	 * If we have done enough work, get out here.
@@ -2664,6 +2667,34 @@ static struct ext_segtab* alloc_ext_segtab(DbTableHash* tb, unsigned seg_ix)
     return est;
 }
 
+static void calc_shrink_limit(DbTableHash* tb)
+{
+    erts_aint_t shrink_limit;
+
+    if (tb->nslots >= (FIRST_SEGSZ + 2*EXT_SEGSZ)) {
+        /*
+         * Start shrink when we can remove one extra segment
+         * and still remain below 50% load.
+         */
+        shrink_limit = (tb->nslots - EXT_SEGSZ) / 2;
+    }
+    else {
+        /*
+         * But don't shrink below two segments.
+         * Why? In order to have chance of getting rid of the last extra segment,
+         * and rehash it into the first small segment, we either have to start
+         * early and do speculative joining of buckets or we have to join a lot
+         * of buckets during each delete-op.
+         *
+         * Instead keep segment #2 once allocated. I also think it's a good bet
+         * a shrinking large table will grow large again.
+         */
+        shrink_limit = 0;
+    }
+    erts_atomic_set_nob(&tb->shrink_limit, shrink_limit);
+}
+
+
 /* Extend table with one new segment
 */
 static void alloc_seg(DbTableHash *tb)
@@ -2682,8 +2713,17 @@ static void alloc_seg(DbTableHash *tb)
     segtab[seg_ix] = (struct segment*) erts_db_alloc(ERTS_ALC_T_DB_SEG,
                                                      (DbTable *) tb,
                                                      SIZEOF_SEGMENT(EXT_SEGSZ));
-    sys_memset(segtab[seg_ix], 0, SIZEOF_SEGMENT(EXT_SEGSZ));
+#ifdef DEBUG
+    {
+        int i;
+        for (i = 0; i < EXT_SEGSZ; i++) {
+            segtab[seg_ix]->buckets[i] = DBG_BUCKET_INACTIVE;
+        }
+    }
+#endif
     tb->nslots += EXT_SEGSZ;
+
+    calc_shrink_limit(tb);
 }
 
 static void dealloc_ext_segtab(void* lop_data)
@@ -2693,10 +2733,19 @@ static void dealloc_ext_segtab(void* lop_data)
     erts_free(ERTS_ALC_T_DB_SEG, est);
 }
 
-/* Shrink table by freeing the top segment
+struct dealloc_seg_ops {
+    struct segment* segp;
+    Uint seg_sz;
+
+    struct ext_segtab* est;
+};
+
+/* Shrink table by removing the top segment
 ** free_records: 1=free any records in segment, 0=assume segment is empty 
+** ds_ops: (out) Instructions for dealloc_seg().
 */
-static int free_seg(DbTableHash *tb, int free_records)
+static int remove_seg(DbTableHash *tb, int free_records,
+                      struct dealloc_seg_ops *ds_ops)
 {
     const int seg_ix = SLOT_IX_TO_SEG_IX(tb->nslots) - 1;
     struct segment** const segtab = SEGTAB(tb);
@@ -2704,24 +2753,47 @@ static int free_seg(DbTableHash *tb, int free_records)
     Uint seg_sz;
     int nrecords = 0;
 
+    ERTS_LC_ASSERT(IS_TAB_WLOCKED(tb) || tb->common.status & DB_DELETE
+                   || erts_atomic_read_nob(&tb->is_resizing));
+
     ASSERT(segp != NULL);
-#ifndef DEBUG
-    if (free_records)
-#endif
-    {	
-	int i = (seg_ix == 0) ? FIRST_SEGSZ : EXT_SEGSZ;
-	while (i--) {
-	    HashDbTerm* p = segp->buckets[i];
+    if (free_records) {
+        int ix, n;
+        if (seg_ix == 0) {
+            /* First segment (always fully active) */
+            n = FIRST_SEGSZ;
+            ix = FIRST_SEGSZ-1;
+        }
+        else if (NACTIVE(tb) < tb->nslots) {
+            /* Last extended segment partially active */
+            n = (NACTIVE(tb) - FIRST_SEGSZ) & EXT_SEGSZ_MASK;
+            ix = (NACTIVE(tb)-1) & EXT_SEGSZ_MASK;
+        }
+        else {
+            /* Full extended segment */
+            n = EXT_SEGSZ;
+            ix = EXT_SEGSZ - 1;
+        }
+        for ( ; n > 0; n--, ix--) {
+	    HashDbTerm* p = segp->buckets[ix & EXT_SEGSZ_MASK];
 	    while(p != 0) {		
 		HashDbTerm* nxt = p->next;
-		ASSERT(free_records); /* segment not empty as assumed? */
 		free_term(tb, p);
 		p = nxt;
 		++nrecords;
 	    }
 	}
     }
-    
+#ifdef DEBUG
+    else {
+        int ix = (seg_ix == 0) ? FIRST_SEGSZ-1 : EXT_SEGSZ-1;
+        for ( ; ix >= 0; ix--) {
+            ASSERT(segp->buckets[ix] == DBG_BUCKET_INACTIVE);
+        }
+    }
+#endif
+
+    ds_ops->est = NULL;
     if (seg_ix >= NSEG_1) {
         struct ext_segtab* est = ErtsContainerStruct_(segtab,struct ext_segtab,segtab);
 
@@ -2730,35 +2802,64 @@ static int free_seg(DbTableHash *tb, int free_records)
             SET_SEGTAB(tb, est->prev_segtab);
             tb->nsegs = est->prev_nsegs;
 
-            if (!tb->common.is_thread_safe) {
-                /*
-                 * Table is doing a graceful shrink operation and we must avoid
-                 * deallocating this segtab while it may still be read by other
-                 * threads. Schedule deallocation with thread progress to make
-                 * sure no lingering threads are still hanging in BUCKET macro
-                 * with an old segtab pointer.
-                 */
-                erts_schedule_db_free(&tb->common, dealloc_ext_segtab,
-                                      est, &est->lop,
-                                      SIZEOF_EXT_SEGTAB(est->nsegs));
-            }
-            else
-                erts_db_free(ERTS_ALC_T_DB_SEG, (DbTable*)tb, est,
-                             SIZEOF_EXT_SEGTAB(est->nsegs));
+            ds_ops->est = est;
         }
     }
+
     seg_sz = (seg_ix == 0) ? FIRST_SEGSZ : EXT_SEGSZ;
-    erts_db_free(ERTS_ALC_T_DB_SEG, (DbTable *)tb, segp, SIZEOF_SEGMENT(seg_sz));
+    tb->nslots -= seg_sz;
+    ASSERT(tb->nslots >= 0);
+
+    ds_ops->segp = segp;
+    ds_ops->seg_sz = seg_sz;
     
 #ifdef DEBUG
     if (seg_ix < tb->nsegs)
         SEGTAB(tb)[seg_ix] = NULL;
 #endif
-    tb->nslots -= seg_sz;
-    ASSERT(tb->nslots >= 0);
+    calc_shrink_limit(tb);
     return nrecords;
 }
 
+/*
+ * Deallocate segment removed by remove_seg()
+ */
+static void dealloc_seg(DbTableHash *tb, struct dealloc_seg_ops* ds_ops)
+{
+    struct ext_segtab* est = ds_ops->est;
+
+    if (est) {
+        if (!tb->common.is_thread_safe) {
+            /*
+             * Table is doing a graceful shrink operation and we must avoid
+             * deallocating this segtab while it may still be read by other
+             * threads. Schedule deallocation with thread progress to make
+             * sure no lingering threads are still hanging in BUCKET macro
+             * with an old segtab pointer.
+             */
+            erts_schedule_db_free(&tb->common, dealloc_ext_segtab,
+                                  est, &est->lop,
+                                  SIZEOF_EXT_SEGTAB(est->nsegs));
+        }
+        else
+            erts_db_free(ERTS_ALC_T_DB_SEG, (DbTable*)tb, est,
+                         SIZEOF_EXT_SEGTAB(est->nsegs));
+    }
+
+    erts_db_free(ERTS_ALC_T_DB_SEG, (DbTable *)tb,
+                 ds_ops->segp, SIZEOF_SEGMENT(ds_ops->seg_sz));
+}
+
+/* Remove and deallocate top segment and all its contained objects */
+static int free_seg(DbTableHash *tb)
+{
+    struct dealloc_seg_ops ds_ops;
+    int reds;
+
+    reds = remove_seg(tb, 1, &ds_ops);
+    dealloc_seg(tb, &ds_ops);
+    return reds;
+}
 
 /*
 ** Copy terms from ptr1 until ptr2
@@ -2880,6 +2981,7 @@ static void grow(DbTableHash* tb, int nitems)
         pnext = &BUCKET(tb, from_ix);
         p = *pnext;
         to_pnext = &BUCKET(tb, to_ix);
+        ASSERT(*to_pnext == DBG_BUCKET_INACTIVE);
         while (p != NULL) {
             if (is_pseudo_deleted(p)) { /* rare but possible with fine locking */
                 *pnext = p->next;
@@ -2916,19 +3018,21 @@ abort:
 */
 static void shrink(DbTableHash* tb, int nitems)
 {
-    HashDbTerm** src_bp;
-    HashDbTerm** dst_bp;
+    struct dealloc_seg_ops ds_ops;
+    HashDbTerm* src;
+    HashDbTerm* tail;
     HashDbTerm** bp;
     erts_rwmtx_t* lck;
     int src_ix, dst_ix, low_szm;
     int nactive;
     int loop_limit = 5;
 
+    ds_ops.segp = NULL;
     do {
         if (!begin_resizing(tb))
             return; /* already in progress */
         nactive = NACTIVE(tb);
-        if (!(nactive > FIRST_SEGSZ && nitems < SHRINK_LIMIT(nactive))) {
+        if (!(nitems < SHRINK_LIMIT(tb))) {
             goto abort; /* already done (race) */
         }
         src_ix = nactive - 1;
@@ -2945,41 +3049,49 @@ static void shrink(DbTableHash* tb, int nitems)
             goto abort;
         }
 
-        src_bp = &BUCKET(tb, src_ix);
-        dst_bp = &BUCKET(tb, dst_ix);
-        bp = src_bp;
-
-        /*
-         * We join lists by appending "dst" at the end of "src"
-         * as we must step through "src" anyway to purge pseudo deleted.
-         */
-        while(*bp != NULL) {
-            if (is_pseudo_deleted(*bp)) {
-                HashDbTerm* deleted = *bp;
-                *bp = deleted->next;
-                free_term(tb, deleted);
-            } else {
-                bp = &(*bp)->next;
-            }
-        }
-        *bp = *dst_bp;
-        *dst_bp = *src_bp;
-        *src_bp = NULL;
-
+        src = BUCKET(tb, src_ix);
+#ifdef DEBUG
+        BUCKET(tb, src_ix) = DBG_BUCKET_INACTIVE;
+#endif
         nactive = src_ix;
         erts_atomic_set_nob(&tb->nactive, nactive);
         if (dst_ix == 0) {
             erts_atomic_set_relb(&tb->szm, low_szm);
         }
-        WUNLOCK_HASH(lck);
-
         if (tb->nslots - src_ix >= EXT_SEGSZ) {
-            free_seg(tb, 0);
+            remove_seg(tb, 0, &ds_ops);
         }
         done_resizing(tb);
 
-    } while (--loop_limit
-             && nactive > FIRST_SEGSZ && nitems < SHRINK_LIMIT(nactive));
+        if (src) {
+            /*
+             * We join buckets by appending "dst" list at the end of "src" list
+             * as we must step through "src" anyway to purge pseudo deleted.
+             */
+            bp = &BUCKET(tb, dst_ix);
+            tail = *bp;
+            *bp = src;
+
+            while(*bp != NULL) {
+                if (is_pseudo_deleted(*bp)) {
+                    HashDbTerm* deleted = *bp;
+                    *bp = deleted->next;
+                    free_term(tb, deleted);
+                } else {
+                    bp = &(*bp)->next;
+                }
+            }
+            *bp = tail;
+        }
+
+        WUNLOCK_HASH(lck);
+
+        if (ds_ops.segp) {
+            dealloc_seg(tb, &ds_ops);
+            ds_ops.segp = NULL;
+        }
+
+    } while (--loop_limit && nitems < SHRINK_LIMIT(tb));
     return;
 
 abort:
diff --git a/erts/emulator/beam/erl_db_hash.h b/erts/emulator/beam/erl_db_hash.h
index eae5537ba4..ecd2ca74a1 100644
--- a/erts/emulator/beam/erl_db_hash.h
+++ b/erts/emulator/beam/erl_db_hash.h
@@ -63,9 +63,10 @@ typedef struct db_table_hash_fine_locks {
 typedef struct db_table_hash {
     DbTableCommon common;
 
-    /* SMP: szm and nactive are write-protected by is_resizing or table write lock */
+    /* szm, nactive, shrink_limit are write-protected by is_resizing or table write lock */
     erts_atomic_t szm;     /* current size mask. */
     erts_atomic_t nactive; /* Number of "active" slots */
+    erts_atomic_t shrink_limit; /* Shrink table when fewer objects than this */
 
     erts_atomic_t segtab;  /* The segment table (struct segment**) */
     struct segment* first_segtab[1];
diff --git a/erts/emulator/beam/erl_gc.c b/erts/emulator/beam/erl_gc.c
index 67a73e4d57..7ab8034606 100644
--- a/erts/emulator/beam/erl_gc.c
+++ b/erts/emulator/beam/erl_gc.c
@@ -151,6 +151,7 @@ static void grow_new_heap(Process *p, Uint new_sz, Eterm* objv, int nobj);
 static void sweep_off_heap(Process *p, int fullsweep);
 static void offset_heap(Eterm* hp, Uint sz, Sint offs, char* area, Uint area_size);
 static void offset_heap_ptr(Eterm* hp, Uint sz, Sint offs, char* area, Uint area_size);
+static void offset_heap_ptr_nstack(Eterm* hp, Uint sz, Sint offs, char* area, Uint area_size);
 static void offset_rootset(Process *p, Sint offs, char* area, Uint area_size,
 			   Eterm* objv, int nobj);
 static void offset_off_heap(Process* p, Sint offs, char* area, Uint area_size);
@@ -1054,9 +1055,10 @@ erts_garbage_collect_hibernate(Process* p)
 		n_htop = tmp_n_htop;					\
 	} while(0)
 
+
 /*
  * offset_nstack() can ignore the descriptor-based traversal the other
- * nstack procedures use and simply call offset_heap_ptr() instead.
+ * nstack procedures use and do a simpler word by word traversal instead.
  * This relies on two facts:
  * 1. The only live non-Erlang terms on an nstack are return addresses,
  *    and they will be skipped thanks to the low/high range check.
@@ -1071,14 +1073,51 @@ static ERTS_INLINE void offset_nstack(Process* p, Sint offs,
 {
     if (p->hipe.nstack) {
 	ASSERT(p->hipe.nsp && p->hipe.nstend);
-	offset_heap_ptr(hipe_nstack_start(p), hipe_nstack_used(p),
-			offs, area, area_size);
+	offset_heap_ptr_nstack(hipe_nstack_start(p), hipe_nstack_used(p),
+                               offs, area, area_size);
     }
     else {
 	ASSERT(!p->hipe.nsp && !p->hipe.nstend);
     }
 }
 
+/*
+ * This is the same as offset_heap_ptr()
+ *
+ * Except for VALGRIND. It allows benign offsetting of undefined (dead) words
+ * on the nstack while also retaining them as undefined. This suppresses
+ * valgrinds "Conditional jump or move depends on uninitialised value(s)".
+ */
+static void
+offset_heap_ptr_nstack(Eterm* hp, Uint sz, Sint offs,
+                       char* area, Uint area_size)
+{
+    while (sz--) {
+	Eterm val = *hp;
+#ifdef VALGRIND
+        Eterm val_vbits;
+        VALGRIND_GET_VBITS(&val, &val_vbits, sizeof(val));
+        VALGRIND_MAKE_MEM_DEFINED(&val, sizeof(val));
+#endif
+	switch (primary_tag(val)) {
+	case TAG_PRIMARY_LIST:
+	case TAG_PRIMARY_BOXED:
+	    if (ErtsInArea(ptr_val(val), area, area_size)) {
+#ifdef VALGRIND
+                VALGRIND_SET_VBITS(&val, val_vbits, sizeof(val));
+#endif
+		*hp = offset_ptr(val, offs);
+	    }
+	    hp++;
+	    break;
+	default:
+	    hp++;
+	    break;
+	}
+    }
+}
+
+
 #else /* !HIPE */
 
 #define fullsweep_nstack(p,n_htop)		        	(n_htop)
diff --git a/lib/compiler/src/beam_ssa.erl b/lib/compiler/src/beam_ssa.erl
index f46cca1431..77619368c7 100644
--- a/lib/compiler/src/beam_ssa.erl
+++ b/lib/compiler/src/beam_ssa.erl
@@ -21,7 +21,7 @@
 
 -module(beam_ssa).
 -export([add_anno/3,get_anno/2,get_anno/3,
-         clobbers_xregs/1,def/2,def_used/2,
+         clobbers_xregs/1,def/2,def_unused/3,
          definitions/1,
          dominators/1,common_dominators/3,
          flatmapfold_instrs_rpo/4,
@@ -124,7 +124,7 @@
                       'put_tuple_element' | 'put_tuple_elements' |
                       'set_tuple_element'.
 
--import(lists, [foldl/3,keyfind/3,mapfoldl/3,member/2,reverse/1,umerge/1]).
+-import(lists, [foldl/3,keyfind/3,mapfoldl/3,member/2,reverse/1]).
 
 -spec add_anno(Key, Value, Construct) -> Construct when
       Key :: atom(),
@@ -320,17 +320,18 @@ def(Ls, Blocks) ->
     Blks = [map_get(L, Blocks) || L <- Top],
     def_1(Blks, []).
 
--spec def_used(Ls, Blocks) -> {Def,Used} when
+-spec def_unused(Ls, Used, Blocks) -> {Def,Unused} when
       Ls :: [label()],
+      Used :: ordsets:ordset(var_name()),
       Blocks :: block_map(),
       Def :: ordsets:ordset(var_name()),
-      Used :: ordsets:ordset(var_name()).
+      Unused :: ordsets:ordset(var_name()).
 
-def_used(Ls, Blocks) ->
+def_unused(Ls, Unused, Blocks) ->
     Top = rpo(Ls, Blocks),
     Blks = [map_get(L, Blocks) || L <- Top],
     Preds = cerl_sets:from_list(Top),
-    def_used_1(Blks, Preds, [], []).
+    def_unused_1(Blks, Preds, [], Unused).
 
 %% dominators(BlockMap) -> {Dominators,Numbering}.
 %%  Calculate the dominator tree, returning a map where each entry
@@ -652,34 +653,28 @@ is_commutative('=/=') -> true;
 is_commutative('/=') -> true;
 is_commutative(_) -> false.
 
-def_used_1([#b_blk{is=Is,last=Last}|Bs], Preds, Def0, UsedAcc) ->
-    {Def,Used} = def_used_is(Is, Preds, Def0, used(Last)),
-    case Used of
-        [] ->
-            def_used_1(Bs, Preds, Def, UsedAcc);
-        [_|_] ->
-            def_used_1(Bs, Preds, Def, [Used|UsedAcc])
-    end;
-def_used_1([], _Preds, Def0, UsedAcc) ->
-    Def = ordsets:from_list(Def0),
-    Used = umerge(UsedAcc),
-    {Def,Used}.
+def_unused_1([#b_blk{is=Is,last=Last}|Bs], Preds, Def0, Unused0) ->
+    Unused1 = ordsets:subtract(Unused0, used(Last)),
+    {Def,Unused} = def_unused_is(Is, Preds, Def0, Unused1),
+    def_unused_1(Bs, Preds, Def, Unused);
+def_unused_1([], _Preds, Def, Unused) ->
+    {ordsets:from_list(Def), Unused}.
 
-def_used_is([#b_set{op=phi,dst=Dst,args=Args}|Is],
-            Preds, Def0, Used0) ->
+def_unused_is([#b_set{op=phi,dst=Dst,args=Args}|Is],
+            Preds, Def0, Unused0) ->
     Def = [Dst|Def0],
     %% We must be careful to only include variables that will
     %% be used when arriving from one of the predecessor blocks
     %% in Preds.
-    Used1 = [V || {#b_var{}=V,L} <- Args, cerl_sets:is_element(L, Preds)],
-    Used = ordsets:union(ordsets:from_list(Used1), Used0),
-    def_used_is(Is, Preds, Def, Used);
-def_used_is([#b_set{dst=Dst}=I|Is], Preds, Def0, Used0) ->
+    Unused1 = [V || {#b_var{}=V,L} <- Args, cerl_sets:is_element(L, Preds)],
+    Unused = ordsets:subtract(Unused0, ordsets:from_list(Unused1)),
+    def_unused_is(Is, Preds, Def, Unused);
+def_unused_is([#b_set{dst=Dst}=I|Is], Preds, Def0, Unused0) ->
     Def = [Dst|Def0],
-    Used = ordsets:union(used(I), Used0),
-    def_used_is(Is, Preds, Def, Used);
-def_used_is([], _Preds, Def, Used) ->
-    {Def,Used}.
+    Unused = ordsets:subtract(Unused0, used(I)),
+    def_unused_is(Is, Preds, Def, Unused);
+def_unused_is([], _Preds, Def, Unused) ->
+    {Def,Unused}.
 
 def_1([#b_blk{is=Is}|Bs], Def0) ->
     Def = def_is(Is, Def0),
diff --git a/lib/compiler/src/beam_ssa_pre_codegen.erl b/lib/compiler/src/beam_ssa_pre_codegen.erl
index d3cedc3617..7022b1d316 100644
--- a/lib/compiler/src/beam_ssa_pre_codegen.erl
+++ b/lib/compiler/src/beam_ssa_pre_codegen.erl
@@ -1488,9 +1488,9 @@ recv_common(_Defs, none, _Blocks) ->
     %% in the tail position of a function.
     [];
 recv_common(Defs, Exit, Blocks) ->
-    {ExitDefs,ExitUsed} = beam_ssa:def_used([Exit], Blocks),
+    {ExitDefs,ExitUnused} = beam_ssa:def_unused([Exit], Defs, Blocks),
     Def = ordsets:subtract(Defs, ExitDefs),
-    ordsets:intersection(Def, ExitUsed).
+    ordsets:subtract(Def, ExitUnused).
 
 %% recv_fix_common([CommonVar], LoopExit, [RemoveMessageLabel],
 %%                 Blocks0, Count0) -> {Blocks,Count}.
@@ -1544,9 +1544,9 @@ exit_predecessors([], _Exit, _Blocks) -> [].
 %%  later used within a clause of the receive.
 
 fix_receive([L|Ls], Defs, Blocks0, Count0) ->
-    {RmDefs,Used0} = beam_ssa:def_used([L], Blocks0),
+    {RmDefs,Unused} = beam_ssa:def_unused([L], Defs, Blocks0),
     Def = ordsets:subtract(Defs, RmDefs),
-    Used = ordsets:intersection(Def, Used0),
+    Used = ordsets:subtract(Def, Unused),
     {NewVars,Count} = new_vars([Base || #b_var{name=Base} <- Used], Count0),
     Ren = zip(Used, NewVars),
     Blocks1 = beam_ssa:rename_vars(Ren, [L], Blocks0),
@@ -2172,8 +2172,8 @@ reserve_yregs(#st{frames=Frames}=St0) ->
 reserve_yregs_1(L, #st{ssa=Blocks0,cnt=Count0,res=Res0}=St) ->
     Blk = map_get(L, Blocks0),
     Yregs = beam_ssa:get_anno(yregs, Blk),
-    {Def,Used} = beam_ssa:def_used([L], Blocks0),
-    UsedYregs = ordsets:intersection(Yregs, Used),
+    {Def,Unused} = beam_ssa:def_unused([L], Yregs, Blocks0),
+    UsedYregs = ordsets:subtract(Yregs, Unused),
     DefBefore = ordsets:subtract(UsedYregs, Def),
     {BeforeVars,Blocks,Count} = rename_vars(DefBefore, L, Blocks0, Count0),
     InsideVars = ordsets:subtract(UsedYregs, DefBefore),
diff --git a/lib/dialyzer/src/dialyzer_cl.erl b/lib/dialyzer/src/dialyzer_cl.erl
index 403fcb6279..5e680062fb 100644
--- a/lib/dialyzer/src/dialyzer_cl.erl
+++ b/lib/dialyzer/src/dialyzer_cl.erl
@@ -320,12 +320,6 @@ report_analysis_start(#options{analysis_type = Type,
       end
   end.
 
-report_native_comp(#options{report_mode = ReportMode}) ->
-  case ReportMode of
-    quiet -> ok;
-    _ -> io:format("  Compiling some key modules to native code...")
-  end.
-
 report_elapsed_time(T1, T2, #options{report_mode = ReportMode}) ->
   case ReportMode of
     quiet -> ok;
@@ -375,7 +369,6 @@ do_analysis(Options) ->
   
 do_analysis(Files, Options, Plt, PltInfo) ->
   assert_writable(Options#options.output_plt),
-  hipe_compile(Files, Options),
   report_analysis_start(Options),
   State0 = new_state(),
   State1 = init_output(State0, Options),
@@ -484,115 +477,6 @@ expand_dependent_modules_1([Mod|Mods], Included, ModDeps) ->
 expand_dependent_modules_1([], Included, _ModDeps) ->
   Included.
 
--define(MIN_PARALLELISM, 7).
--define(MIN_FILES_FOR_NATIVE_COMPILE, 20).
-
--spec hipe_compile([file:filename()], #options{}) -> 'ok'.
-
-hipe_compile(Files, #options{erlang_mode = ErlangMode,
-                             native = Native,
-                             native_cache = NativeCache} = Options) ->
-  NoNative =
-    case ErlangMode of
-      true ->
-        %% In Erlang mode, native compilation must be explicitly enabled
-        Native =/= true;
-      false ->
-        %% In CLI mode, perform native compilation unless disabled
-        Native =:= false
-    end,
-  FewFiles = (length(Files) < ?MIN_FILES_FOR_NATIVE_COMPILE),
-  case NoNative orelse FewFiles of
-    true -> ok;
-    false ->
-      case erlang:system_info(hipe_architecture) of
-	undefined -> ok;
-	_ ->
-	  Mods = [lists, dict, digraph, digraph_utils, ets,
-		  gb_sets, gb_trees, ordsets, sets, sofs,
-		  cerl, erl_types, cerl_trees, erl_bif_types,
-		  dialyzer_analysis_callgraph, dialyzer, dialyzer_behaviours,
-		  dialyzer_codeserver, dialyzer_contracts,
-		  dialyzer_coordinator, dialyzer_dataflow, dialyzer_dep,
-		  dialyzer_plt, dialyzer_succ_typings, dialyzer_typesig,
-		  dialyzer_worker],
-	  report_native_comp(Options),
-	  {T1, _} = statistics(wall_clock),
-	  native_compile(Mods, NativeCache),
-	  {T2, _} = statistics(wall_clock),
-	  report_elapsed_time(T1, T2, Options)
-      end
-  end.
-
-native_compile(Mods, Cache) ->
-  case dialyzer_utils:parallelism() > ?MIN_PARALLELISM of
-    true ->
-      Parent = self(),
-      Pids = [spawn(fun () -> Parent ! {self(), hc(M, Cache)} end) || M <- Mods],
-      lists:foreach(fun (Pid) -> receive {Pid, Res} -> Res end end, Pids);
-    false ->
-      lists:foreach(fun (Mod) -> hc(Mod, Cache) end, Mods)
-  end.
-
-hc(Mod, Cache) ->
-  {module, Mod} = code:ensure_loaded(Mod),
-  case code:is_module_native(Mod) of
-    true -> ok;
-    false ->
-      %% io:format(" ~w", [Mod]),
-      case Cache of
-	false ->
-	  {ok, Mod} = hipe:c(Mod),
-	  ok;
-	true ->
-	  hc_cache(Mod)
-      end
-  end.
-
-hc_cache(Mod) ->
-  CacheBase = cache_base_dir(),
-  %% Use HiPE architecture, version and erts checksum in directory name,
-  %% to avoid clashes between incompatible binaries.
-  HipeArchVersion =
-    lists:concat(
-      [erlang:system_info(hipe_architecture), "-",
-       hipe:version(), "-",
-       hipe:erts_checksum()]),
-  CacheDir = filename:join(CacheBase, HipeArchVersion),
-  OrigBeamFile = code:which(Mod),
-  {ok, {Mod, <<Checksum:128>>}} = beam_lib:md5(OrigBeamFile),
-  CachedBeamFile = filename:join(CacheDir, lists:concat([Mod, "-", Checksum, ".beam"])),
-  ok = filelib:ensure_dir(CachedBeamFile),
-  ModBin =
-    case filelib:is_file(CachedBeamFile) of
-      true ->
-	{ok, BinFromFile} = file:read_file(CachedBeamFile),
-	BinFromFile;
-      false ->
-	{ok, Mod, CompiledBin} = compile:file(OrigBeamFile, [from_beam, native, binary]),
-	ok = file:write_file(CachedBeamFile, CompiledBin),
-	CompiledBin
-    end,
-  code:unstick_dir(filename:dirname(OrigBeamFile)),
-  {module, Mod} = code:load_binary(Mod, CachedBeamFile, ModBin),
-  true = code:is_module_native(Mod),
-  ok.
-
-cache_base_dir() ->
-  %% http://standards.freedesktop.org/basedir-spec/basedir-spec-0.7.html
-  %% If XDG_CACHE_HOME is set to an absolute path, use it as base.
-  XdgCacheHome = os:getenv("XDG_CACHE_HOME"),
-  CacheHome =
-    case is_list(XdgCacheHome) andalso filename:pathtype(XdgCacheHome) =:= absolute of
-      true ->
-	XdgCacheHome;
-      false ->
-	%% Otherwise, the default is $HOME/.cache.
-	{ok, [[Home]]} = init:get_argument(home),
-	filename:join(Home, ".cache")
-    end,
-  filename:join([CacheHome, "dialyzer_hipe_cache"]).
-
 new_state() ->
   #cl_state{}.
 
diff --git a/lib/hipe/doc/src/hipe_app.xml b/lib/hipe/doc/src/hipe_app.xml
index 61d92fdffe..5ac445ac58 100644
--- a/lib/hipe/doc/src/hipe_app.xml
+++ b/lib/hipe/doc/src/hipe_app.xml
@@ -66,6 +66,10 @@
       <item><p>The HiPE compiler will crash on modules containing binary
       matching.</p>
       </item>
+      <tag>try/catch</tag>
+      <item><p>The HiPE compiler will crash on modules containing 'try' or
+      'catch'.</p>
+      </item>
 
       <tag>Stack traces</tag>
       <item><p>Stack traces returned from <seealso marker="erts:erlang#get_stacktrace/0">
diff --git a/lib/hipe/icode/hipe_beam_to_icode.erl b/lib/hipe/icode/hipe_beam_to_icode.erl
index 1246af1da3..fce178a5e3 100644
--- a/lib/hipe/icode/hipe_beam_to_icode.erl
+++ b/lib/hipe/icode/hipe_beam_to_icode.erl
@@ -557,32 +557,21 @@ trans_fun([{move,Src,Dst}|Instructions], Env) ->
   Dst1 = mk_var(Dst),
   Src1 = trans_arg(Src),
   [hipe_icode:mk_move(Dst1,Src1) | trans_fun(Instructions,Env)];
-%%--- catch --- ITS PROCESSING IS POSTPONED
-trans_fun([{'catch',N,{_,EndLabel}}|Instructions], Env) ->
-  NewContLbl = mk_label(new),
-  [{'catch',N,EndLabel},NewContLbl | trans_fun(Instructions,Env)];
-%%--- catch_end --- ITS PROCESSING IS POSTPONED
-trans_fun([{catch_end,_N}=I|Instructions], Env) ->
-  [I | trans_fun(Instructions,Env)];
-%%--- try --- ITS PROCESSING IS POSTPONED
-trans_fun([{'try',N,{_,EndLabel}}|Instructions], Env) ->
-  NewContLbl = mk_label(new),
-  [{'try',N,EndLabel},NewContLbl | trans_fun(Instructions,Env)];
-%%--- try_end ---
-trans_fun([{try_end,_N}|Instructions], Env) ->
-  [hipe_icode:mk_end_try() | trans_fun(Instructions,Env)];
-%%--- try_case --- ITS PROCESSING IS POSTPONED
-trans_fun([{try_case,_N}=I|Instructions], Env) ->
-  [I | trans_fun(Instructions,Env)];
-%%--- try_case_end ---
-trans_fun([{try_case_end,Arg}|Instructions], Env) ->
-  BadArg = trans_arg(Arg),
-  ErrVar = mk_var(new),
-  Vs = [mk_var(new)],
-  Atom = hipe_icode:mk_move(ErrVar,hipe_icode:mk_const(try_clause)),
-  Tuple = hipe_icode:mk_primop(Vs,mktuple,[ErrVar,BadArg]),
-  Fail = hipe_icode:mk_fail(Vs,error),
-  [Atom,Tuple,Fail | trans_fun(Instructions,Env)];
+%%
+%% try/catch -- THESE ARE KNOWN TO MISCOMPILE, SEE OTP-15949
+%%
+trans_fun([{'catch'=Name,_,_}|_], _Env) ->
+  nyi(Name);
+trans_fun([{catch_end=Name,_}|_], _Env) ->
+  nyi(Name);
+trans_fun([{'try'=Name,_,_}|_], _Env) ->
+  nyi(Name);
+trans_fun([{try_end=Name,_}|_], _Env) ->
+  nyi(Name);
+trans_fun([{try_case=Name,_}|_], _Env) ->
+  nyi(Name);
+trans_fun([{try_case_end=Name,_}|_], _Env) ->
+  nyi(Name);
 %%--- raise ---
 trans_fun([{raise,{f,0},[Reg1,Reg2],{x,0}}|Instructions], Env) ->
   V1 = trans_arg(Reg1),
diff --git a/lib/stdlib/test/ets_SUITE.erl b/lib/stdlib/test/ets_SUITE.erl
index 09238ae2b4..05893a92b0 100644
--- a/lib/stdlib/test/ets_SUITE.erl
+++ b/lib/stdlib/test/ets_SUITE.erl
@@ -46,7 +46,8 @@
          test_delete_table_while_size_snapshot/1, test_delete_table_while_size_snapshot_helper/0]).
 
 -export([ordered/1, ordered_match/1, interface_equality/1,
-	 fixtable_next/1, fixtable_insert/1, rename/1, rename_unnamed/1, evil_rename/1,
+	 fixtable_next/1, fixtable_iter_bag/1,
+         fixtable_insert/1, rename/1, rename_unnamed/1, evil_rename/1,
 	 update_element/1, update_counter/1, evil_update_counter/1, partly_bound/1, match_heavy/1]).
 -export([update_counter_with_default/1]).
 -export([update_counter_table_growth/1]).
@@ -127,7 +128,7 @@ all() ->
      {group, match}, t_match_spec_run,
      {group, lookup_element}, {group, misc}, {group, files},
      {group, heavy}, ordered, ordered_match,
-     interface_equality, fixtable_next, fixtable_insert,
+     interface_equality, fixtable_next, fixtable_iter_bag, fixtable_insert,
      rename, rename_unnamed, evil_rename, update_element,
      update_counter, evil_update_counter,
      update_counter_with_default, partly_bound,
@@ -2446,6 +2447,135 @@ do_fixtable_next(Tab) ->
     false = ets:info(Tab, fixed),
     ets:delete(Tab).
 
+%% Check that iteration of bags find all live objects and nothing else.
+fixtable_iter_bag(Config) when is_list(Config) ->
+    repeat_for_opts(fun fixtable_iter_do/1,
+                    [write_concurrency,[bag,duplicate_bag]]).
+
+fixtable_iter_do(Opts) ->
+    EtsMem = etsmem(),
+    do_fixtable_iter_bag(ets_new(fixtable_iter_bag,Opts)),
+    verify_etsmem(EtsMem).
+
+do_fixtable_iter_bag(T) ->
+    MaxValues = 4,
+    %% Create 1 to MaxValues objects for each key
+    %% and then delete every possible combination of those objects
+    %% in every possible order.
+    %% Then test iteration returns all live objects and nothing else.
+
+    CrDelOps = [begin
+                    Values = lists:seq(1,N),
+                    %% All ways of deleting any number of the Values in any order
+                    Combos = combs(Values),
+                    DeleteOps = concat_lists([perms(C) || C <- Combos]),
+                    {N, DeleteOps}
+                end
+                || N <- lists:seq(1,MaxValues)],
+
+    %%io:format("~p\n", [CrDelOps]),
+
+    NKeys = lists:foldl(fun({_, DeleteOps}, Cnt) ->
+                               Cnt + length(DeleteOps)
+                       end,
+                       0,
+                       CrDelOps),
+
+    io:format("Create ~p keys\n", [NKeys]),
+
+    %% Fixate even before inserts just to maintain small table size
+    %% and increase likelyhood of different keys in same bucket.
+    ets:safe_fixtable(T,true),
+    InsRes = [begin
+                  [begin
+                       Key = {NValues,ValueList},
+                       [begin
+                            Tpl = {Key, V},
+                            %%io:format("Insert object ~p", [Tpl]),
+                            ets:insert(T, Tpl),
+                            Tpl
+                        end
+                        || V <- lists:seq(1,NValues)]
+                   end
+                   || ValueList <- DeleteOps]
+              end
+              || {NValues, DeleteOps} <- CrDelOps],
+
+    Inserted = lists:flatten(InsRes),
+    InSorted = lists:sort(Inserted),
+    InSorted = lists:usort(Inserted),  %% No duplicates
+    NObjs = length(Inserted),
+
+    DelRes = [begin
+                  [begin
+                       Key = {NValues,ValueList},
+                       [begin
+                            Tpl = {Key, V},
+                            %%io:format("Delete object ~p", [Tpl]),
+                            ets:delete_object(T, Tpl),
+                            Tpl
+                        end
+                        || V <- ValueList]
+                   end
+                   || ValueList <- DeleteOps]
+              end
+              || {NValues, DeleteOps} <- CrDelOps],
+
+    Deleted = lists:flatten(DelRes),
+    DelSorted = lists:sort(Deleted),
+    DelSorted = lists:usort(Deleted),  %% No duplicates
+    NDels = length(Deleted),
+    
+    %% Nr of keys where all values were deleted.
+    NDeletedKeys = lists:sum([factorial(N) || N <- lists:seq(1,MaxValues)]),
+
+    CountKeysFun = fun Me(K1, Cnt) ->
+                           case ets:next(T, K1) of
+                               '$end_of_table' ->
+                                   Cnt;
+                               K2 ->
+                                   Objs = ets:lookup(T, K2),
+                                   [{{NValues, ValueList}, _V} | _] = Objs,
+                                   ExpectedLive = NValues - length(ValueList),
+                                   ExpectedLive = length(Objs),
+                                   Me(K2, Cnt+1)
+                           end
+                   end,
+
+    ExpectedKeys = NKeys - NDeletedKeys,
+    io:format("Expected keys: ~p\n", [ExpectedKeys]),
+    FoundKeys = CountKeysFun(ets:first(T), 1),
+    io:format("Found keys: ~p\n", [FoundKeys]),
+    ExpectedKeys = FoundKeys,
+
+    ExpectedObjs = NObjs - NDels,
+    io:format("Expected objects: ~p\n", [ExpectedObjs]),
+    FoundObjs = ets:select_count(T, [{{'_','_'}, [], [true]}]),
+    io:format("Found objects: ~p\n", [FoundObjs]),
+    ExpectedObjs = FoundObjs,
+
+    ets:delete(T).
+
+%% All permutations of list
+perms([]) -> [[]];
+perms(L)  -> [[H|T] || H <- L, T <- perms(L--[H])].
+
+%% All combinations of picking the element (or not) from list
+combs([]) -> [[]];
+combs([H|T]) ->
+    Tcombs = combs(T),
+    Tcombs ++ [[H | C] || C <- Tcombs].
+
+factorial(0) -> 1;
+factorial(N) when N > 0 ->
+    N * factorial(N - 1).
+
+concat_lists([]) ->
+    [];
+concat_lists([H|T]) ->
+    H ++ concat_lists(T).
+
+
 %% Check inserts of deleted keys in fixed bags.
 fixtable_insert(Config) when is_list(Config) ->
     Combos = [[Type,{write_concurrency,WC}] || Type<- [bag,duplicate_bag],