27 files changed, 2114 insertions, 1003 deletions
diff --git a/lib/compiler/doc/src/notes.xml b/lib/compiler/doc/src/notes.xml
index f0d869381b..f11444137d 100644
--- a/lib/compiler/doc/src/notes.xml
+++ b/lib/compiler/doc/src/notes.xml
@@ -32,6 +32,50 @@
   <p>This document describes the changes made to the Compiler
     application.</p>
 
+<section><title>Compiler 7.4.4</title>
+
+    <section><title>Fixed Bugs and Malfunctions</title>
+      <list>
+        <item>
+	    <p>Fixed a compiler crash introduced in <c>22.0.6</c>
+	    (OTP-15952).</p>
+          <p>
+	    Own Id: OTP-15953 Aux Id: ERL-999 </p>
+        </item>
+      </list>
+    </section>
+
+</section>
+
+<section><title>Compiler 7.4.3</title>
+
+    <section><title>Fixed Bugs and Malfunctions</title>
+      <list>
+        <item>
+	    <p>Fixed an unsafe optimization when matching
+	    <c>tuple_size/1</c> outside of guards, which could crash
+	    the emulator if the argument was not a tuple.</p>
+          <p>
+	    Own Id: OTP-15945</p>
+        </item>
+        <item>
+	    <p>Fixed a rare bug that could cause the wrong kind of
+	    exception to be thrown when a BIF failed in a function
+	    that matched bitstrings.</p>
+          <p>
+	    Own Id: OTP-15946</p>
+        </item>
+        <item>
+	    <p>Fixed a bug where receive statements inside try/catch
+	    blocks could return incorrect results.</p>
+          <p>
+	    Own Id: OTP-15952</p>
+        </item>
+      </list>
+    </section>
+
+</section>
+
 <section><title>Compiler 7.4.2</title>
 
     <section><title>Fixed Bugs and Malfunctions</title>
diff --git a/lib/compiler/src/beam_call_types.erl b/lib/compiler/src/beam_call_types.erl
index d091b7866d..e76ad79365 100644
--- a/lib/compiler/src/beam_call_types.erl
+++ b/lib/compiler/src/beam_call_types.erl
@@ -24,39 +24,7 @@
 
 -import(lists, [duplicate/2,foldl/3]).
 
--export([never_throws/3, types/3]).
-
--spec never_throws(Mod, Func, Arity) -> boolean() when
-      Mod :: atom(),
-      Func :: atom(),
-      Arity :: non_neg_integer().
-
-never_throws(erlang, '/=', 2) -> true;
-never_throws(erlang, '<', 2) -> true;
-never_throws(erlang, '=/=', 2) -> true;
-never_throws(erlang, '=:=', 2) -> true;
-never_throws(erlang, '=<', 2) -> true;
-never_throws(erlang, '==', 2) -> true;
-never_throws(erlang, '>', 2) -> true;
-never_throws(erlang, '>=', 2) -> true;
-never_throws(erlang, is_atom, 1) -> true;
-never_throws(erlang, is_boolean, 1) -> true;
-never_throws(erlang, is_binary, 1) -> true;
-never_throws(erlang, is_bitstring, 1) -> true;
-never_throws(erlang, is_float, 1) -> true;
-never_throws(erlang, is_function, 1) -> true;
-never_throws(erlang, is_integer, 1) -> true;
-never_throws(erlang, is_list, 1) -> true;
-never_throws(erlang, is_map, 1) -> true;
-never_throws(erlang, is_number, 1) -> true;
-never_throws(erlang, is_pid, 1) -> true;
-never_throws(erlang, is_port, 1) -> true;
-never_throws(erlang, is_reference, 1) -> true;
-never_throws(erlang, is_tuple, 1) -> true;
-never_throws(erlang, get, 1) -> true;
-never_throws(erlang, self, 0) -> true;
-never_throws(erlang, node, 0) -> true;
-never_throws(_, _, _) -> false.
+-export([types/3]).
 
 %%
 %% Returns the inferred return and argument types for known functions, and
@@ -69,7 +37,7 @@ never_throws(_, _, _) -> false.
 -spec types(Mod, Func, ArgTypes) -> {RetType, ArgTypes, CanSubtract} when
       Mod :: atom(),
       Func :: atom(),
-      ArgTypes :: [type()],
+      ArgTypes :: [normal_type()],
       RetType :: type(),
       CanSubtract :: boolean().
 
@@ -198,7 +166,7 @@ types(erlang, element, [PosType, TupleType]) ->
 types(erlang, setelement, [PosType, TupleType, ArgType]) ->
     RetType = case {PosType,TupleType} of
                   {#t_integer{elements={Index,Index}},
-                   #t_tuple{elements=Es0,size=Size}=T} ->
+                   #t_tuple{elements=Es0,size=Size}=T} when Index >= 1 ->
                       %% This is an exact index, update the type of said
                       %% element or return 'none' if it's known to be out of
                       %% bounds.
@@ -212,7 +180,7 @@ types(erlang, setelement, [PosType, TupleType, ArgType]) ->
                               none
                       end;
                   {#t_integer{elements={Min,Max}},
-                   #t_tuple{elements=Es0,size=Size}=T} ->
+                   #t_tuple{elements=Es0,size=Size}=T} when Min >= 1 ->
                       %% We know this will land between Min and Max, so kill
                       %% the types for those indexes.
                       Es = discard_tuple_element_info(Min, Max, Es0),
@@ -385,15 +353,27 @@ types(lists, zipwith3, [_,A,B,C]) ->
     sub_unsafe(ZipType, [#t_fun{arity=3}, ZipType, ZipType, ZipType]);
 
 %% Functions with complex return values.
-types(lists, partition, [_,_]) ->
-    sub_unsafe(make_two_tuple(list, list), [#t_fun{arity=1}, list]);
+types(lists, keyfind, [KeyType,PosType,_]) ->
+    TupleType = case PosType of
+                    #t_integer{elements={Index,Index}} when is_integer(Index),
+                                                            Index >= 1 ->
+                        Es = beam_types:set_element_type(Index, KeyType, #{}),
+                        #t_tuple{size=Index,elements=Es};
+                    _ ->
+                        #t_tuple{}
+                end,
+    RetType = beam_types:join(TupleType, beam_types:make_atom(false)),
+    sub_unsafe(RetType, [any, #t_integer{}, list]);
 types(lists, MapFold, [_Fun, _Init, List])
   when MapFold =:= mapfoldl; MapFold =:= mapfoldr ->
-    ListType = same_length_type(List),
-    RetType = #t_tuple{size=2,
-                       exact=true,
-                       elements=#{ 1 => ListType }},
+    RetType = make_two_tuple(same_length_type(List), any),
     sub_unsafe(RetType, [#t_fun{arity=2}, any, list]);
+types(lists, partition, [_,_]) ->
+    sub_unsafe(make_two_tuple(list, list), [#t_fun{arity=1}, list]);
+types(lists, search, [_,_]) ->
+    TupleType = make_two_tuple(beam_types:make_atom(value), any),
+    RetType = beam_types:join(TupleType, beam_types:make_atom(false)),
+    sub_unsafe(RetType, [#t_fun{arity=1}, list]);
 types(lists, splitwith, [_,_]) ->
     sub_unsafe(make_two_tuple(list, list), [#t_fun{arity=1}, list]);
 types(lists, unzip, [List]) ->
@@ -505,5 +485,6 @@ lists_zip_type(Types) ->
           end, list, Types).
 
 make_two_tuple(Type1, Type2) ->
-    #t_tuple{size=2,exact=true,
-             elements=#{1=>Type1,2=>Type2}}.
+    Es0 = beam_types:set_element_type(1, Type1, #{}),
+    Es = beam_types:set_element_type(2, Type2, Es0),
+    #t_tuple{size=2,exact=true,elements=Es}.
diff --git a/lib/compiler/src/beam_kernel_to_ssa.erl b/lib/compiler/src/beam_kernel_to_ssa.erl
index df95749fb3..474cb1a851 100644
--- a/lib/compiler/src/beam_kernel_to_ssa.erl
+++ b/lib/compiler/src/beam_kernel_to_ssa.erl
@@ -34,7 +34,7 @@
 -type label() :: beam_ssa:label().
 
 %% Main codegen structure.
--record(cg, {lcount=1 :: label(),               %Label counter
+-record(cg, {lcount=1 :: label(),   %Label counter
              bfail=1 :: label(),
              catch_label=none :: 'none' | label(),
              vars=#{} :: map(),     %Defined variables.
@@ -83,6 +83,7 @@ function(#k_fdef{anno=Anno0,func=Name,arity=Arity,
 
 cg_fun(Ke, St0) ->
     {UltimateFail,FailIs,St1} = make_failure(badarg, St0),
+    ?EXCEPTION_BLOCK = UltimateFail,            %Assertion.
     St2 = St1#cg{bfail=UltimateFail,ultimate_failure=UltimateFail},
     {B,St} = cg(Ke, St2),
     Asm = [{label,0}|B++FailIs],
@@ -279,7 +280,7 @@ select_binary(#k_val_clause{val=#k_binary{segs=#k_var{name=Ctx0}},body=B},
               #k_var{}=Src, Tf, Vf, St0) ->
     {Ctx,St1} = new_ssa_var(Ctx0, St0),
     {Bis0,St2} = match_cg(B, Vf, St1),
-    {TestIs,St} = make_cond_branch(succeeded, [Ctx], Tf, St2),
+    {TestIs,St} = make_succeeded(Ctx, {guard, Tf}, St2),
     Bis1 = [#b_set{op=bs_start_match,dst=Ctx,
                    args=[ssa_arg(Src, St)]}] ++ TestIs ++ Bis0,
     Bis = finish_bs_matching(Bis1),
@@ -311,6 +312,35 @@ make_cond_branch(Cond, Args, Fail, St0) ->
 make_uncond_branch(Fail) ->
     #b_br{bool=#b_literal{val=true},succ=Fail,fail=Fail}.
 
+%%
+%% The 'succeeded' instruction needs special treatment in catch blocks to
+%% prevent the checked operation from being optimized away if a later pass
+%% determines that it always fails.
+%%
+
+make_succeeded(Var, {in_catch, CatchLbl}, St0) ->
+    {Bool, St1} = new_ssa_var('@ssa_bool', St0),
+    {Succ, St2} = new_label(St1),
+    {Fail, St} = new_label(St2),
+
+    Check = [#b_set{op=succeeded,dst=Bool,args=[Var]},
+             #b_br{bool=Bool,succ=Succ,fail=Fail}],
+
+    %% Add a dummy block that references the checked variable, ensuring it
+    %% stays alive and that it won't be merged with the landing pad.
+    Trampoline = [{label,Fail},
+                  #b_set{op=exception_trampoline,args=[Var]},
+                  make_uncond_branch(CatchLbl)],
+
+    {Check ++ Trampoline ++ [{label,Succ}], St};
+make_succeeded(Var, {no_catch, Fail}, St) ->
+    %% Ultimate failure raises an exception, so we must treat it as if it were
+    %% in a catch to keep it from being optimized out.
+    #cg{ultimate_failure=Fail} = St,            %Assertion
+    make_succeeded(Var, {in_catch, Fail}, St);
+make_succeeded(Var, {guard, Fail}, St) ->
+    make_cond_branch(succeeded, [Var], Fail, St).
+
 %% Instructions for selection of binary segments.
 
 select_bin_segs(Scs, Ivar, Tf, St) ->
@@ -394,7 +424,7 @@ select_extract_int(#k_var{name=Tl}, Val, #k_int{val=Sz}, U, Fs, Vf,
                   <<Val:Bits/little>>
           end,
     Bits = bit_size(Bin),			%Assertion.
-    {TestIs,St} = make_cond_branch(succeeded, [Dst], Vf, St1),
+    {TestIs,St} = make_succeeded(Dst, {guard, Vf}, St1),
     Set = #b_set{op=bs_match,dst=Dst,
                  args=[#b_literal{val=string},Ctx,#b_literal{val=Bin}]},
     {[Set|TestIs],St}.
@@ -412,7 +442,7 @@ build_bs_instr(Anno, Type, Fail, Ctx, Size, Unit0, Flags0, Dst, St0) ->
                   #b_set{anno=Anno,op=bs_match,dst=Dst,
                          args=[TypeArg,Ctx,Flags]}
           end,
-    {Is,St} = make_cond_branch(succeeded, [Dst], Fail, St0),
+    {Is,St} = make_succeeded(Dst, {guard, Fail}, St0),
     {[Get|Is],St}.
 
 select_val(#k_val_clause{val=#k_tuple{es=Es},body=B}, V, Vf, St0) ->
@@ -475,7 +505,7 @@ select_extract_map([P|Ps], Src, Fail, St0) ->
     Key = ssa_arg(Key0, St0),
     {Dst,St1} = new_ssa_var(Dst0, St0),
     Set = #b_set{op=get_map_element,dst=Dst,args=[MapSrc,Key]},
-    {TestIs,St2} = make_cond_branch(succeeded, [Dst], Fail, St1),
+    {TestIs,St2} = make_succeeded(Dst, {guard, Fail}, St1),
     {Is,St} = select_extract_map(Ps, Src, Fail, St2),
     {[Set|TestIs]++Is,St};
 select_extract_map([], _, _, St) ->
@@ -596,7 +626,7 @@ match_fmf(F, LastFail, St0, [H|T]) ->
     {Rs,St3} = match_fmf(F, LastFail, St2, T),
     {R ++ [{label,Fail}] ++ Rs,St3}.
 
-%% fail_label(State) -> {Where,FailureLabel}.
+%% fail_context(State) -> {Where,FailureLabel}.
 %%       Where = guard | no_catch | in_catch
 %%  Return an indication of which part of a function code is
 %%  being generated for and the appropriate failure label to
@@ -609,7 +639,7 @@ match_fmf(F, LastFail, St0, [H|T]) ->
 %%                 a try/catch or catch.
 %%     in_catch  - In the scope of a try/catch or catch.
 
-fail_label(#cg{catch_label=Catch,bfail=Fail,ultimate_failure=Ult}) ->
+fail_context(#cg{catch_label=Catch,bfail=Fail,ultimate_failure=Ult}) ->
     if
         Fail =/= Ult ->
             {guard,Fail};
@@ -619,14 +649,6 @@ fail_label(#cg{catch_label=Catch,bfail=Fail,ultimate_failure=Ult}) ->
             {in_catch,Catch}
     end.
 
-%% bif_fail_label(State) -> FailureLabel.
-%%  Return the appropriate failure label for a guard BIF call or
-%%  primop that fails.
-
-bif_fail_label(St) ->
-    {_,Fail} = fail_label(St),
-    Fail.
-
 %% call_cg(Func, [Arg], [Ret], Le, State) ->
 %%      {[Ainstr],State}.
 %% enter_cg(Func, [Arg], Le, St) -> {[Ainstr],St}.
@@ -635,7 +657,7 @@ bif_fail_label(St) ->
 call_cg(Func, As, [], Le, St) ->
     call_cg(Func, As, [#k_var{name='@ssa_ignored'}], Le, St);
 call_cg(Func0, As, [#k_var{name=R}|MoreRs]=Rs, Le, St0) ->
-    case fail_label(St0) of
+    case fail_context(St0) of
         {guard,Fail} ->
             %% Inside a guard. The only allowed function call is to
             %% erlang:error/1,2. We will generate a branch to the
@@ -645,7 +667,7 @@ call_cg(Func0, As, [#k_var{name=R}|MoreRs]=Rs, Le, St0) ->
             [#k_var{name=DestVar}] = Rs,
             St = set_ssa_var(DestVar, #b_literal{val=unused}, St0),
             {[make_uncond_branch(Fail),#cg_unreachable{}],St};
-        {Catch,Fail} ->
+        FailCtx ->
             %% Ordinary function call in a function body.
             Args = ssa_args(As, St0),
             {Ret,St1} = new_ssa_var(R, St0),
@@ -657,11 +679,12 @@ call_cg(Func0, As, [#k_var{name=R}|MoreRs]=Rs, Le, St0) ->
             St2 = foldl(fun(#k_var{name=Dummy}, S) ->
                                 set_ssa_var(Dummy, #b_literal{val=unused}, S)
                         end, St1, MoreRs),
-            case Catch of
-                no_catch ->
+
+            case FailCtx of
+                {no_catch, _} ->
                     {[Call],St2};
-                in_catch ->
-                    {TestIs,St} = make_cond_branch(succeeded, [Ret], Fail, St2),
+                {in_catch, _} ->
+                    {TestIs,St} = make_succeeded(Ret, FailCtx, St2),
                     {[Call|TestIs],St}
             end
     end.
@@ -748,8 +771,8 @@ bif_cg(Bif, As0, [#k_var{name=Dst0}], Le, St0) ->
             I = #b_set{anno=line_anno(Le),op={bif,Bif},dst=Dst,args=As},
             case erl_bifs:is_safe(erlang, Bif, length(As)) of
                 false ->
-                    Fail = bif_fail_label(St1),
-                    {Is,St} = make_cond_branch(succeeded, [Dst], Fail, St1),
+                    FailCtx = fail_context(St1),
+                    {Is,St} = make_succeeded(Dst, FailCtx, St1),
                     {[I|Is],St};
                 true->
                     {[I],St1}
@@ -797,7 +820,7 @@ cg_recv_mesg(#k_var{name=R}, Rm, Tl, Le, St0) ->
     {Dst,St1} = new_ssa_var(R, St0),
     {Mis,St2} = match_cg(Rm, none, St1),
     RecvLbl = St1#cg.recv,
-    {TestIs,St} = make_cond_branch(succeeded, [Dst], Tl, St2),
+    {TestIs,St} = make_succeeded(Dst, {guard, Tl}, St2),
     Is = [#b_br{anno=line_anno(Le),bool=#b_literal{val=true},
                 succ=RecvLbl,fail=RecvLbl},
           {label,RecvLbl},
@@ -813,7 +836,7 @@ cg_recv_wait(Te, Es, St0) ->
     {Tis,St1} = cg(Es, St0),
     Args = [ssa_arg(Te, St1)],
     {WaitDst,St2} = new_ssa_var('@ssa_wait', St1),
-    {WaitIs,St} = make_cond_branch(succeeded, [WaitDst], St1#cg.recv, St2),
+    {WaitIs,St} = make_succeeded(WaitDst, {guard, St1#cg.recv}, St2),
     %% Infinite timeout will be optimized later.
     Is = [#b_set{op=wait_timeout,dst=WaitDst,args=Args}] ++ WaitIs ++
         [#b_set{op=timeout}] ++ Tis,
@@ -924,9 +947,9 @@ put_cg([#k_var{name=R}], #k_tuple{es=Es}, _Le, St0) ->
     PutTuple = #b_set{op=put_tuple,dst=Ret,args=Args},
     {[PutTuple],St};
 put_cg([#k_var{name=R}], #k_binary{segs=Segs}, Le, St0) ->
-    Fail = bif_fail_label(St0),
+    FailCtx = fail_context(St0),
     {Dst,St1} = new_ssa_var(R, St0),
-    cg_binary(Dst, Segs, Fail, Le, St1);
+    cg_binary(Dst, Segs, FailCtx, Le, St1);
 put_cg([#k_var{name=R}], #k_map{op=Op,var=Map,
                                 es=[#k_map_pair{key=#k_var{}=K,val=V}]},
        Le, St0) ->
@@ -955,14 +978,14 @@ put_cg([#k_var{name=R}], Con0, _Le, St0) ->
     {[],St}.
 
 put_cg_map(LineAnno, Op, SrcMap, Dst, List, St0) ->
-    Fail = bif_fail_label(St0),
     Args = [#b_literal{val=Op},SrcMap|List],
     PutMap = #b_set{anno=LineAnno,op=put_map,dst=Dst,args=Args},
     if
         Op =:= assoc ->
             {[PutMap],St0};
         true ->
-            {Is,St} = make_cond_branch(succeeded, [Dst], Fail, St0),
+            FailCtx = fail_context(St0),
+            {Is,St} = make_succeeded(Dst, FailCtx, St0),
             {[PutMap|Is],St}
     end.
 
@@ -970,8 +993,8 @@ put_cg_map(LineAnno, Op, SrcMap, Dst, List, St0) ->
 %%% Code generation for constructing binaries.
 %%%
 
-cg_binary(Dst, Segs0, Fail, Le, St0) ->
-    {PutCode0,SzCalc0,St1} = cg_bin_put(Segs0, Fail, St0),
+cg_binary(Dst, Segs0, FailCtx, Le, St0) ->
+    {PutCode0,SzCalc0,St1} = cg_bin_put(Segs0, FailCtx, St0),
     LineAnno = line_anno(Le),
     Anno = Le#k.a,
     case PutCode0 of
@@ -980,8 +1003,8 @@ cg_binary(Dst, Segs0, Fail, Le, St0) ->
          {label,_}|_] ->
             #k_bin_seg{unit=Unit0,next=Segs} = Segs0,
             Unit = #b_literal{val=Unit0},
-            {PutCode,SzCalc1,St2} = cg_bin_put(Segs, Fail, St1),
-            {_,SzVar,SzCode0,St3} = cg_size_calc(1, SzCalc1, Fail, St2),
+            {PutCode,SzCalc1,St2} = cg_bin_put(Segs, FailCtx, St1),
+            {_,SzVar,SzCode0,St3} = cg_size_calc(1, SzCalc1, FailCtx, St2),
             SzCode = cg_bin_anno(SzCode0, LineAnno),
             Args = case member(single_use, Anno) of
                        true ->
@@ -990,14 +1013,14 @@ cg_binary(Dst, Segs0, Fail, Le, St0) ->
                            [#b_literal{val=append},Src,SzVar,Unit]
                    end,
             BsInit = #b_set{anno=LineAnno,op=bs_init,dst=Dst,args=Args},
-            {TestIs,St} = make_cond_branch(succeeded, [Dst], Fail, St3),
+            {TestIs,St} = make_succeeded(Dst, FailCtx, St3),
             {SzCode ++ [BsInit] ++ TestIs ++ PutCode,St};
         [#b_set{op=bs_put}|_] ->
-            {Unit,SzVar,SzCode0,St2} = cg_size_calc(8, SzCalc0, Fail, St1),
+            {Unit,SzVar,SzCode0,St2} = cg_size_calc(8, SzCalc0, FailCtx, St1),
             SzCode = cg_bin_anno(SzCode0, LineAnno),
             Args = [#b_literal{val=new},SzVar,Unit],
             BsInit = #b_set{anno=LineAnno,op=bs_init,dst=Dst,args=Args},
-            {TestIs,St} = make_cond_branch(succeeded, [Dst], Fail, St2),
+            {TestIs,St} = make_succeeded(Dst, FailCtx, St2),
             {SzCode ++ [BsInit] ++ TestIs ++ PutCode0,St}
     end.
 
@@ -1005,18 +1028,18 @@ cg_bin_anno([Set|Sets], Anno) ->
     [Set#b_set{anno=Anno}|Sets];
 cg_bin_anno([], _) -> [].
 
-%% cg_size_calc(PreferredUnit, SzCalc, Fail, St0) ->
+%% cg_size_calc(PreferredUnit, SzCalc, FailCtx, St0) ->
 %%         {ActualUnit,SizeVariable,SizeCode,St}.
 %%  Generate size calculation code.
 
-cg_size_calc(Unit, error, _Fail, St) ->
+cg_size_calc(Unit, error, _FailCtx, St) ->
     {#b_literal{val=Unit},#b_literal{val=badarg},[],St};
-cg_size_calc(8, [{1,_}|_]=SzCalc, Fail, St) ->
-    cg_size_calc(1, SzCalc, Fail, St);
-cg_size_calc(8, SzCalc, Fail, St0) ->
-    {Var,Pre,St} = cg_size_calc_1(SzCalc, Fail, St0),
+cg_size_calc(8, [{1,_}|_]=SzCalc, FailCtx, St) ->
+    cg_size_calc(1, SzCalc, FailCtx, St);
+cg_size_calc(8, SzCalc, FailCtx, St0) ->
+    {Var,Pre,St} = cg_size_calc_1(SzCalc, FailCtx, St0),
     {#b_literal{val=8},Var,Pre,St};
-cg_size_calc(1, SzCalc0, Fail, St0) ->
+cg_size_calc(1, SzCalc0, FailCtx, St0) ->
     SzCalc = map(fun({8,#b_literal{val=Size}}) ->
                          {1,#b_literal{val=8*Size}};
                     ({8,{{bif,byte_size},Src}}) ->
@@ -1026,54 +1049,54 @@ cg_size_calc(1, SzCalc0, Fail, St0) ->
                     ({_,_}=Pair) ->
                          Pair
                  end, SzCalc0),
-    {Var,Pre,St} = cg_size_calc_1(SzCalc, Fail, St0),
+    {Var,Pre,St} = cg_size_calc_1(SzCalc, FailCtx, St0),
     {#b_literal{val=1},Var,Pre,St}.
 
-cg_size_calc_1(SzCalc, Fail, St0) ->
-    cg_size_calc_2(SzCalc, #b_literal{val=0}, Fail, St0).
+cg_size_calc_1(SzCalc, FailCtx, St0) ->
+    cg_size_calc_2(SzCalc, #b_literal{val=0}, FailCtx, St0).
 
-cg_size_calc_2([{_,{'*',Unit,{_,_}=Bif}}|T], Sum0, Fail, St0) ->
-    {Sum1,Pre0,St1} = cg_size_calc_2(T, Sum0, Fail, St0),
-    {BifDst,Pre1,St2} = cg_size_bif(Bif, Fail, St1),
-    {Sum,Pre2,St} = cg_size_add(Sum1, BifDst, Unit, Fail, St2),
+cg_size_calc_2([{_,{'*',Unit,{_,_}=Bif}}|T], Sum0, FailCtx, St0) ->
+    {Sum1,Pre0,St1} = cg_size_calc_2(T, Sum0, FailCtx, St0),
+    {BifDst,Pre1,St2} = cg_size_bif(Bif, FailCtx, St1),
+    {Sum,Pre2,St} = cg_size_add(Sum1, BifDst, Unit, FailCtx, St2),
     {Sum,Pre0++Pre1++Pre2,St};
-cg_size_calc_2([{_,#b_literal{}=Sz}|T], Sum0, Fail, St0) ->
-    {Sum1,Pre0,St1} = cg_size_calc_2(T, Sum0, Fail, St0),
-    {Sum,Pre,St} = cg_size_add(Sum1, Sz, #b_literal{val=1}, Fail, St1),
+cg_size_calc_2([{_,#b_literal{}=Sz}|T], Sum0, FailCtx, St0) ->
+    {Sum1,Pre0,St1} = cg_size_calc_2(T, Sum0, FailCtx, St0),
+    {Sum,Pre,St} = cg_size_add(Sum1, Sz, #b_literal{val=1}, FailCtx, St1),
     {Sum,Pre0++Pre,St};
-cg_size_calc_2([{_,#b_var{}=Sz}|T], Sum0, Fail, St0) ->
-    {Sum1,Pre0,St1} = cg_size_calc_2(T, Sum0, Fail, St0),
-    {Sum,Pre,St} = cg_size_add(Sum1, Sz, #b_literal{val=1}, Fail, St1),
+cg_size_calc_2([{_,#b_var{}=Sz}|T], Sum0, FailCtx, St0) ->
+    {Sum1,Pre0,St1} = cg_size_calc_2(T, Sum0, FailCtx, St0),
+    {Sum,Pre,St} = cg_size_add(Sum1, Sz, #b_literal{val=1}, FailCtx, St1),
     {Sum,Pre0++Pre,St};
-cg_size_calc_2([{_,{_,_}=Bif}|T], Sum0, Fail, St0) ->
-    {Sum1,Pre0,St1} = cg_size_calc_2(T, Sum0, Fail, St0),
-    {BifDst,Pre1,St2} = cg_size_bif(Bif, Fail, St1),
-    {Sum,Pre2,St} = cg_size_add(Sum1, BifDst, #b_literal{val=1}, Fail, St2),
+cg_size_calc_2([{_,{_,_}=Bif}|T], Sum0, FailCtx, St0) ->
+    {Sum1,Pre0,St1} = cg_size_calc_2(T, Sum0, FailCtx, St0),
+    {BifDst,Pre1,St2} = cg_size_bif(Bif, FailCtx, St1),
+    {Sum,Pre2,St} = cg_size_add(Sum1, BifDst, #b_literal{val=1}, FailCtx, St2),
     {Sum,Pre0++Pre1++Pre2,St};
-cg_size_calc_2([], Sum, _Fail, St) ->
+cg_size_calc_2([], Sum, _FailCtx, St) ->
     {Sum,[],St}.
 
-cg_size_bif(#b_var{}=Var, _Fail, St) ->
+cg_size_bif(#b_var{}=Var, _FailCtx, St) ->
     {Var,[],St};
-cg_size_bif({Name,Src}, Fail, St0) ->
+cg_size_bif({Name,Src}, FailCtx, St0) ->
     {Dst,St1} = new_ssa_var('@ssa_bif', St0),
     Bif = #b_set{op=Name,dst=Dst,args=[Src]},
-    {TestIs,St} = make_cond_branch(succeeded, [Dst], Fail, St1),
+    {TestIs,St} = make_succeeded(Dst, FailCtx, St1),
     {Dst,[Bif|TestIs],St}.
 
-cg_size_add(#b_literal{val=0}, Val, #b_literal{val=1}, _Fail, St) ->
+cg_size_add(#b_literal{val=0}, Val, #b_literal{val=1}, _FailCtx, St) ->
     {Val,[],St};
-cg_size_add(A, B, Unit, Fail, St0) ->
+cg_size_add(A, B, Unit, FailCtx, St0) ->
     {Dst,St1} = new_ssa_var('@ssa_sum', St0),
-    {TestIs,St} = make_cond_branch(succeeded, [Dst], Fail, St1),
+    {TestIs,St} = make_succeeded(Dst, FailCtx, St1),
     BsAdd = #b_set{op=bs_add,dst=Dst,args=[A,B,Unit]},
     {Dst,[BsAdd|TestIs],St}.
 
-cg_bin_put(Seg, Fail, St) ->
-    cg_bin_put_1(Seg, Fail, [], [], St).
+cg_bin_put(Seg, FailCtx, St) ->
+    cg_bin_put_1(Seg, FailCtx, [], [], St).
 
 cg_bin_put_1(#k_bin_seg{size=Size0,unit=U,type=T,flags=Fs,seg=Src0,next=Next},
-           Fail, Acc, SzCalcAcc, St0) ->
+             FailCtx, Acc, SzCalcAcc, St0) ->
     [Src,Size] = ssa_args([Src0,Size0], St0),
     NeedSize = bs_need_size(T),
     TypeArg = #b_literal{val=T},
@@ -1083,9 +1106,12 @@ cg_bin_put_1(#k_bin_seg{size=Size0,unit=U,type=T,flags=Fs,seg=Src0,next=Next},
                true -> [TypeArg,Flags,Src,Size,Unit];
                false -> [TypeArg,Flags,Src]
            end,
-    {Is,St} = make_cond_branch(bs_put, Args, Fail, St0),
+    %% bs_put has its own 'succeeded' logic, and should always jump directly to
+    %% the fail label regardless of whether it's in a catch or not.
+    {_, FailLbl} = FailCtx,
+    {Is,St} = make_cond_branch(bs_put, Args, FailLbl, St0),
     SzCalc = bin_size_calc(T, Src, Size, U),
-    cg_bin_put_1(Next, Fail, reverse(Is, Acc), [SzCalc|SzCalcAcc], St);
+    cg_bin_put_1(Next, FailCtx, reverse(Is, Acc), [SzCalc|SzCalcAcc], St);
 cg_bin_put_1(#k_bin_end{}, _, Acc, SzCalcAcc, St) ->
     SzCalc = fold_size_calc(SzCalcAcc, 0, []),
     {reverse(Acc),SzCalc,St}.
diff --git a/lib/compiler/src/beam_ssa.erl b/lib/compiler/src/beam_ssa.erl
index 831e6489a9..77619368c7 100644
--- a/lib/compiler/src/beam_ssa.erl
+++ b/lib/compiler/src/beam_ssa.erl
@@ -21,7 +21,7 @@
 
 -module(beam_ssa).
 -export([add_anno/3,get_anno/2,get_anno/3,
-         clobbers_xregs/1,def/2,def_used/2,
+         clobbers_xregs/1,def/2,def_unused/3,
          definitions/1,
          dominators/1,common_dominators/3,
          flatmapfold_instrs_rpo/4,
@@ -96,11 +96,12 @@
 %% To avoid the collapsing, change the value of SET_LIMIT to 50 in the
 %% file erl_types.erl in the hipe application.
 
--type prim_op() :: 'bs_add' | 'bs_extract' | 'bs_init' | 'bs_init_writable' |
+-type prim_op() :: 'bs_add' | 'bs_extract' | 'bs_get_tail' |
+                   'bs_init' | 'bs_init_writable' |
                    'bs_match' | 'bs_put' | 'bs_start_match' | 'bs_test_tail' |
                    'bs_utf16_size' | 'bs_utf8_size' | 'build_stacktrace' |
                    'call' | 'catch_end' |
-                   'extract' |
+                   'extract' | 'exception_trampoline' |
                    'get_hd' | 'get_map_element' | 'get_tl' | 'get_tuple_element' |
                    'has_map_field' |
                    'is_nonempty_list' | 'is_tagged_tuple' |
@@ -117,8 +118,10 @@
                     '+' | '-' | '*' | '/'.
 
 %% Primops only used internally during code generation.
--type cg_prim_op() :: 'bs_get' | 'bs_match_string' | 'bs_restore' | 'bs_skip' |
-                      'copy' | 'match_fail' | 'put_tuple_arity' | 'put_tuple_element' |
+-type cg_prim_op() :: 'bs_get' | 'bs_get_position' | 'bs_match_string' |
+                      'bs_restore' | 'bs_save' | 'bs_set_position' | 'bs_skip' |
+                      'copy' | 'match_fail' | 'put_tuple_arity' |
+                      'put_tuple_element' | 'put_tuple_elements' |
                       'set_tuple_element'.
 
 -import(lists, [foldl/3,keyfind/3,mapfoldl/3,member/2,reverse/1]).
@@ -317,17 +320,18 @@ def(Ls, Blocks) ->
     Blks = [map_get(L, Blocks) || L <- Top],
     def_1(Blks, []).
 
--spec def_used(Ls, Blocks) -> {Def,Used} when
+-spec def_unused(Ls, Used, Blocks) -> {Def,Unused} when
       Ls :: [label()],
+      Used :: ordsets:ordset(var_name()),
       Blocks :: block_map(),
       Def :: ordsets:ordset(var_name()),
-      Used :: ordsets:ordset(var_name()).
+      Unused :: ordsets:ordset(var_name()).
 
-def_used(Ls, Blocks) ->
+def_unused(Ls, Unused, Blocks) ->
     Top = rpo(Ls, Blocks),
     Blks = [map_get(L, Blocks) || L <- Top],
     Preds = cerl_sets:from_list(Top),
-    def_used_1(Blks, Preds, [], []).
+    def_unused_1(Blks, Preds, [], Unused).
 
 %% dominators(BlockMap) -> {Dominators,Numbering}.
 %%  Calculate the dominator tree, returning a map where each entry
@@ -649,28 +653,28 @@ is_commutative('=/=') -> true;
 is_commutative('/=') -> true;
 is_commutative(_) -> false.
 
-def_used_1([#b_blk{is=Is,last=Last}|Bs], Preds, Def0, Used0) ->
-    {Def,Used1} = def_used_is(Is, Preds, Def0, Used0),
-    Used = ordsets:union(used(Last), Used1),
-    def_used_1(Bs, Preds, Def, Used);
-def_used_1([], _Preds, Def, Used) ->
-    {ordsets:from_list(Def),Used}.
+def_unused_1([#b_blk{is=Is,last=Last}|Bs], Preds, Def0, Unused0) ->
+    Unused1 = ordsets:subtract(Unused0, used(Last)),
+    {Def,Unused} = def_unused_is(Is, Preds, Def0, Unused1),
+    def_unused_1(Bs, Preds, Def, Unused);
+def_unused_1([], _Preds, Def, Unused) ->
+    {ordsets:from_list(Def), Unused}.
 
-def_used_is([#b_set{op=phi,dst=Dst,args=Args}|Is],
-            Preds, Def0, Used0) ->
+def_unused_is([#b_set{op=phi,dst=Dst,args=Args}|Is],
+            Preds, Def0, Unused0) ->
     Def = [Dst|Def0],
     %% We must be careful to only include variables that will
     %% be used when arriving from one of the predecessor blocks
     %% in Preds.
-    Used1 = [V || {#b_var{}=V,L} <- Args, cerl_sets:is_element(L, Preds)],
-    Used = ordsets:union(ordsets:from_list(Used1), Used0),
-    def_used_is(Is, Preds, Def, Used);
-def_used_is([#b_set{dst=Dst}=I|Is], Preds, Def0, Used0) ->
+    Unused1 = [V || {#b_var{}=V,L} <- Args, cerl_sets:is_element(L, Preds)],
+    Unused = ordsets:subtract(Unused0, ordsets:from_list(Unused1)),
+    def_unused_is(Is, Preds, Def, Unused);
+def_unused_is([#b_set{dst=Dst}=I|Is], Preds, Def0, Unused0) ->
     Def = [Dst|Def0],
-    Used = ordsets:union(used(I), Used0),
-    def_used_is(Is, Preds, Def, Used);
-def_used_is([], _Preds, Def, Used) ->
-    {Def,Used}.
+    Unused = ordsets:subtract(Unused0, used(I)),
+    def_unused_is(Is, Preds, Def, Unused);
+def_unused_is([], _Preds, Def, Unused) ->
+    {Def,Unused}.
 
 def_1([#b_blk{is=Is}|Bs], Def0) ->
     Def = def_is(Is, Def0),
diff --git a/lib/compiler/src/beam_ssa.hrl b/lib/compiler/src/beam_ssa.hrl
index fa76b08453..509a94135e 100644
--- a/lib/compiler/src/beam_ssa.hrl
+++ b/lib/compiler/src/beam_ssa.hrl
@@ -62,5 +62,13 @@
 -record(b_local, {name  :: beam_ssa:b_literal(),
                   arity :: non_neg_integer()}).
 
-%% If this block exists, it calls erlang:error(badarg).
--define(BADARG_BLOCK, 1).
+%% This is a psuedo-block used to express that certain instructions and BIFs
+%% throw exceptions on failure. The code generator rewrites all branches to
+%% this block to {f,0} which causes the instruction to throw an exception
+%% instead of branching.
+%%
+%% Since this is not an ordinary block, it's illegal to merge it with other
+%% blocks, and jumps are only valid when we know that an exception will be
+%% thrown by the operation that branches here; the *block itself* does not
+%% throw an exception.
+-define(EXCEPTION_BLOCK, 1).
diff --git a/lib/compiler/src/beam_ssa_bsm.erl b/lib/compiler/src/beam_ssa_bsm.erl
index abbda2ebe4..7a8dc127d7 100644
--- a/lib/compiler/src/beam_ssa_bsm.erl
+++ b/lib/compiler/src/beam_ssa_bsm.erl
@@ -684,8 +684,12 @@ aca_copy_successors(Lbl0, Blocks0, Counter0) ->
     Lbl = maps:get(Lbl0, BRs),
     {Lbl, Blocks, Counter}.
 
+aca_cs_build_brs([?EXCEPTION_BLOCK=Lbl | Path], Counter, Acc) ->
+    %% ?EXCEPTION_BLOCK is a marker and not an actual block, so renaming it
+    %% will break exception handling.
+    aca_cs_build_brs(Path, Counter, Acc#{ Lbl => Lbl });
 aca_cs_build_brs([Lbl | Path], Counter0, Acc) ->
-    aca_cs_build_brs(Path, Counter0 + 1, maps:put(Lbl, Counter0, Acc));
+    aca_cs_build_brs(Path, Counter0 + 1, Acc#{ Lbl => Counter0 });
 aca_cs_build_brs([], Counter, Acc) ->
     {Acc, Counter}.
 
diff --git a/lib/compiler/src/beam_ssa_codegen.erl b/lib/compiler/src/beam_ssa_codegen.erl
index 7248aca5f3..ff880c6296 100644
--- a/lib/compiler/src/beam_ssa_codegen.erl
+++ b/lib/compiler/src/beam_ssa_codegen.erl
@@ -115,14 +115,14 @@ functions(Forms, AtomMod) ->
 function(#b_function{anno=Anno,bs=Blocks}, AtomMod, St0) ->
     #{func_info:={_,Name,Arity}} = Anno,
     try
-        assert_badarg_block(Blocks),            %Assertion.
+        assert_exception_block(Blocks),            %Assertion.
         Regs = maps:get(registers, Anno),
         St1 = St0#cg{labels=#{},used_labels=gb_sets:empty(),
                      regs=Regs},
         {Fi,St2} = new_label(St1),              %FuncInfo label
         {Entry,St3} = local_func_label(Name, Arity, St2),
         {Ult,St4} = new_label(St3),             %Ultimate failure
-        Labels = (St4#cg.labels)#{0=>Entry,?BADARG_BLOCK=>0},
+        Labels = (St4#cg.labels)#{0=>Entry,?EXCEPTION_BLOCK=>0},
         St5 = St4#cg{labels=Labels,used_labels=gb_sets:singleton(Entry),
                      ultimate_fail=Ult},
         {Body,St} = cg_fun(Blocks, St5#cg{fc_label=Fi}),
@@ -138,10 +138,10 @@ function(#b_function{anno=Anno,bs=Blocks}, AtomMod, St0) ->
             erlang:raise(Class, Error, Stack)
     end.
 
-assert_badarg_block(Blocks) ->
-    %% Assertion: ?BADARG_BLOCK must be the call erlang:error(badarg).
+assert_exception_block(Blocks) ->
+    %% Assertion: ?EXCEPTION_BLOCK must be a call erlang:error(badarg).
     case Blocks of
-        #{?BADARG_BLOCK:=Blk} ->
+        #{?EXCEPTION_BLOCK:=Blk} ->
             #b_blk{is=[#b_set{op=call,dst=Ret,
                               args=[#b_remote{mod=#b_literal{val=erlang},
                                               name=#b_literal{val=error}},
@@ -149,7 +149,7 @@ assert_badarg_block(Blocks) ->
                    last=#b_ret{arg=Ret}} = Blk,
             ok;
         #{} ->
-            %% ?BADARG_BLOCK has been removed because it was never used.
+            %% ?EXCEPTION_BLOCK has been removed because it was never used.
             ok
     end.
 
@@ -631,7 +631,7 @@ liveness_get(S, LiveMap) ->
     end.
 
 liveness_successors(Terminator) ->
-    successors(Terminator) -- [?BADARG_BLOCK].
+    successors(Terminator) -- [?EXCEPTION_BLOCK].
 
 liveness_is([#cg_alloc{}=I0|Is], Regs, Live, Acc) ->
     I = I0#cg_alloc{live=num_live(Live, Regs)},
@@ -766,9 +766,8 @@ defined(Linear, #cg{regs=Regs}) ->
 
 def([{L,#cg_blk{is=Is0,last=Last}=Blk0}|Bs], DefMap0, Regs) ->
     Def0 = def_get(L, DefMap0),
-    {Is,Def} = def_is(Is0, Regs, Def0, []),
-    Successors = successors(Last),
-    DefMap = def_successors(Successors, Def, DefMap0),
+    {Is,Def,MaybeDef} = def_is(Is0, Regs, Def0, []),
+    DefMap = def_successors(Last, Def, MaybeDef, DefMap0),
     Blk = Blk0#cg_blk{is=Is},
     [{L,Blk}|def(Bs, DefMap, Regs)];
 def([], _, _) -> [].
@@ -782,6 +781,11 @@ def_get(L, DefMap) ->
 def_is([#cg_alloc{anno=Anno0}=I0|Is], Regs, Def, Acc) ->
     I = I0#cg_alloc{anno=Anno0#{def_yregs=>Def}},
     def_is(Is, Regs, Def, [I|Acc]);
+def_is([#cg_set{op=succeeded,args=[Var]}=I], Regs, Def, Acc) ->
+    %% Var will only be defined on the success branch of the `br`
+    %% for this block.
+    MaybeDef = def_add_yreg(Var, [], Regs),
+    {reverse(Acc, [I]),Def,MaybeDef};
 def_is([#cg_set{op=kill_try_tag,args=[#b_var{}=Tag]}=I|Is], Regs, Def0, Acc) ->
     Def = ordsets:del_element(Tag, Def0),
     def_is(Is, Regs, Def, [I|Acc]);
@@ -824,7 +828,7 @@ def_is([#cg_set{anno=Anno0,dst=Dst}=I0|Is], Regs, Def0, Acc) ->
     Def = def_add_yreg(Dst, Def0, Regs),
     def_is(Is, Regs, Def, [I|Acc]);
 def_is([], _, Def, Acc) ->
-    {reverse(Acc),Def}.
+    {reverse(Acc),Def,[]}.
 
 def_add_yreg(Dst, Def, Regs) ->
     case is_yreg(Dst, Regs) of
@@ -832,6 +836,12 @@ def_add_yreg(Dst, Def, Regs) ->
         false -> Def
     end.
 
+def_successors(#cg_br{bool=#b_var{},succ=Succ,fail=Fail}, Def, MaybeDef, DefMap0) ->
+    DefMap = def_successors([Fail], ordsets:subtract(Def, MaybeDef), DefMap0),
+    def_successors([Succ], Def, DefMap);
+def_successors(Last, Def, [], DefMap) ->
+    def_successors(successors(Last), Def, DefMap).
+
 def_successors([S|Ss], Def0, DefMap) ->
     case DefMap of
         #{S:=Def1} ->
@@ -965,6 +975,12 @@ cg_block(Is0, Last, Next, St0) ->
     case Last of
         #cg_br{succ=Next,fail=Next} ->
             cg_block(Is0, none, St0);
+        #cg_br{succ=Same,fail=Same} when Same =:= ?EXCEPTION_BLOCK ->
+            %% An expression in this block *always* throws an exception, so we
+            %% terminate it with an 'if_end' to make sure the validator knows
+            %% that the following instructions won't actually be reached.
+            {Is,St} = cg_block(Is0, none, St0),
+            {Is++[if_end],St};
         #cg_br{succ=Same,fail=Same} ->
             {Fail,St1} = use_block_label(Same, St0),
             {Is,St} = cg_block(Is0, none, St1),
@@ -1833,7 +1849,7 @@ linearize(Blocks) ->
     Linear = beam_ssa:linearize(Blocks),
     linearize_1(Linear, Blocks).
 
-linearize_1([{?BADARG_BLOCK,_}|Ls], Blocks) ->
+linearize_1([{?EXCEPTION_BLOCK,_}|Ls], Blocks) ->
     linearize_1(Ls, Blocks);
 linearize_1([{L,Block0}|Ls], Blocks) ->
     Block = translate_block(L, Block0, Blocks),
diff --git a/lib/compiler/src/beam_ssa_dead.erl b/lib/compiler/src/beam_ssa_dead.erl
index 88767456a3..55ded77d43 100644
--- a/lib/compiler/src/beam_ssa_dead.erl
+++ b/lib/compiler/src/beam_ssa_dead.erl
@@ -30,7 +30,7 @@
 -import(lists, [append/1,keymember/3,last/1,member/2,
                 takewhile/2,reverse/1]).
 
--type used_vars() :: #{beam_ssa:label():=ordsets:ordset(beam_ssa:var_name())}.
+-type used_vars() :: #{beam_ssa:label():=cerl_sets:set(beam_ssa:var_name())}.
 
 -type basic_type_test() :: atom() | {'is_tagged_tuple',pos_integer(),atom()}.
 -type type_test() :: basic_type_test() | {'not',basic_type_test()}.
@@ -90,13 +90,11 @@ shortcut_opt(#st{bs=Blocks}=St) ->
     %% the diff.)
     %%
     %% Unfortunately, processing the blocks in reverse post order
-    %% potentially makes the time complexity quadratic or even cubic if
-    %% the ordset of unset variables grows large, instead of
-    %% linear for post order processing. We try to still get reasonable
-    %% compilation times by optimizations that will keep the constant
-    %% factor as low as possible, and we try to avoid the cubic time
-    %% complexity by trying to keep the set of unset variables as small
-    %% as possible.
+    %% potentially makes the time complexity quadratic, instead of
+    %% linear for post order processing. We avoid drastic slowdowns by
+    %% limiting how far we search forward to a common block that
+    %% both the success and failure label will reach (see the comment
+    %% in the first clause of shortcut_2/5).
 
     Ls = beam_ssa:rpo(Blocks),
     shortcut_opt(Ls, #{}, St).
@@ -124,10 +122,15 @@ shortcut_terminator(#b_br{bool=#b_var{}=Bool,succ=Succ0,fail=Fail0}=Br,
                     Is, From, Bs, St0) ->
     St = St0#st{target=one_way},
     RelOp = get_rel_op(Bool, Is),
-    SuccBs = bind_var(Bool, #b_literal{val=true}, Bs),
+
+    %% The boolean in a `br` is seldom used by the successors. By
+    %% not binding its value unless it is actually used we might be able
+    %% to skip some work in shortcut/4 and sub/2.
+    SuccBs = bind_var_if_used(Succ0, Bool, #b_literal{val=true}, Bs, St),
     BrSucc = shortcut(Succ0, From, SuccBs, St#st{rel_op=RelOp}),
-    FailBs = bind_var(Bool, #b_literal{val=false}, Bs),
+    FailBs = bind_var_if_used(Fail0, Bool, #b_literal{val=false}, Bs, St),
     BrFail = shortcut(Fail0, From, FailBs, St#st{rel_op=invert_op(RelOp)}),
+
     case {BrSucc,BrFail} of
         {#b_br{bool=#b_literal{val=true},succ=Succ},
          #b_br{bool=#b_literal{val=true},succ=Fail}}
@@ -152,8 +155,14 @@ shortcut_switch([{Lit,L0}|T], Bool, From, Bs, St0) ->
     [{Lit,L}|shortcut_switch(T, Bool, From, Bs, St0)];
 shortcut_switch([], _, _, _, _) -> [].
 
+shortcut(L, _From, Bs, #st{rel_op=none,target=one_way}) when map_size(Bs) =:= 0 ->
+    %% There is no way that we can find a suitable branch, because there is no
+    %% relational operator stored, there are no bindings, and the block L can't
+    %% have any phi nodes from which we could pick bindings because when the target
+    %% is `one_way`, it implies the From block has a two-way `br` terminator.
+    #b_br{bool=#b_literal{val=true},succ=L,fail=L};
 shortcut(L, From, Bs, St) ->
-    shortcut_1(L, From, Bs, ordsets:new(), St).
+    shortcut_1(L, From, Bs, cerl_sets:new(), St).
 
 shortcut_1(L, From, Bs0, UnsetVars0, St) ->
     case shortcut_2(L, From, Bs0, UnsetVars0, St) of
@@ -170,7 +179,19 @@ shortcut_1(L, From, Bs0, UnsetVars0, St) ->
     end.
 
 %% Try to shortcut this block, branching to a successor.
-shortcut_2(L, From, Bs0, UnsetVars0, St) ->
+shortcut_2(L, From, Bs, UnsetVars, St) ->
+    case cerl_sets:size(UnsetVars) of
+        SetSize when SetSize > 128 ->
+            %% This is an heuristic to limit the search for a forced label
+            %% before it drastically slows down the compiler. Experiments
+            %% with scripts/diffable showed that limits larger than 31 did not
+            %% find any more opportunities for optimization.
+            none;
+        _SetSize ->
+            shortcut_3(L, From, Bs, UnsetVars, St)
+    end.
+
+shortcut_3(L, From, Bs0, UnsetVars0, St) ->
     #b_blk{is=Is,last=Last} = get_block(L, St),
     case eval_is(Is, From, Bs0, St) of
         none ->
@@ -347,7 +368,7 @@ update_unset_vars(L, Is, Br, UnsetVars, #st{skippable=Skippable}) ->
             %% Some variables defined in this block are used by
             %% successors. We must update the set of unset variables.
             SetInThisBlock = [V || #b_set{dst=V} <- Is],
-            ordsets:union(UnsetVars, ordsets:from_list(SetInThisBlock))
+            cerl_sets:union(UnsetVars, cerl_sets:from_list(SetInThisBlock))
     end.
 
 shortcut_two_way(#b_br{succ=Succ,fail=Fail}, From, Bs0, UnsetVars0, St0) ->
@@ -376,14 +397,14 @@ is_br_safe(UnsetVars, Br, #st{us=Us}=St) ->
 
             %% A two-way branch never branches to a phi node, so there
             %% is no need to check for phi nodes here.
-             not member(V, UnsetVars) andalso
-                ordsets:is_disjoint(Used0, UnsetVars) andalso
-                ordsets:is_disjoint(Used1, UnsetVars);
+            not cerl_sets:is_element(V, UnsetVars) andalso
+                cerl_sets:is_disjoint(Used0, UnsetVars) andalso
+                cerl_sets:is_disjoint(Used1, UnsetVars);
         #b_br{succ=Same,fail=Same} ->
             %% An unconditional branch must not jump to
             %% a phi node.
             not is_forbidden(Same, St) andalso
-                ordsets:is_disjoint(map_get(Same, Us), UnsetVars)
+                cerl_sets:is_disjoint(map_get(Same, Us), UnsetVars)
     end.
 
 is_forbidden(L, St) ->
@@ -500,6 +521,15 @@ eval_switch_1([], _Arg, _PrevOp, Fail) ->
     %% Fail is now either the failure label or 'none'.
     Fail.
 
+bind_var_if_used(L, Var, Val0, Bs, #st{us=Us}) ->
+    case cerl_sets:is_element(Var, map_get(L, Us)) of
+        true ->
+            Val = get_value(Val0, Bs),
+            Bs#{Var=>Val};
+        false ->
+            Bs
+    end.
+
 bind_var(Var, Val0, Bs) ->
     Val = get_value(Val0, Bs),
     Bs#{Var=>Val}.
@@ -989,7 +1019,7 @@ used_vars([{L,#b_blk{is=Is}=Blk}|Bs], UsedVars0, Skip0) ->
     %% shortcut_opt/1.
 
     Successors = beam_ssa:successors(Blk),
-    Used0 = used_vars_succ(Successors, L, UsedVars0, []),
+    Used0 = used_vars_succ(Successors, L, UsedVars0, cerl_sets:new()),
     Used = used_vars_blk(Blk, Used0),
     UsedVars = used_vars_phis(Is, L, Used, UsedVars0),
 
@@ -1000,8 +1030,8 @@ used_vars([{L,#b_blk{is=Is}=Blk}|Bs], UsedVars0, Skip0) ->
     %% shortcut_opt/1.
 
     Defined0 = [Def || #b_set{dst=Def} <- Is],
-    Defined = ordsets:from_list(Defined0),
-    MaySkip = ordsets:is_disjoint(Defined, Used0),
+    Defined = cerl_sets:from_list(Defined0),
+    MaySkip = cerl_sets:is_disjoint(Defined, Used0),
     case MaySkip of
         true ->
             Skip = Skip0#{L=>true},
@@ -1018,11 +1048,11 @@ used_vars_succ([S|Ss], L, LiveMap, Live0) ->
         #{Key:=Live} ->
             %% The successor has a phi node, and the value for
             %% this block in the phi node is a variable.
-            used_vars_succ(Ss, L, LiveMap, ordsets:union(Live, Live0));
+            used_vars_succ(Ss, L, LiveMap, cerl_sets:union(Live, Live0));
         #{S:=Live} ->
             %% No phi node in the successor, or the value for
             %% this block in the phi node is a literal.
-            used_vars_succ(Ss, L, LiveMap, ordsets:union(Live, Live0));
+            used_vars_succ(Ss, L, LiveMap, cerl_sets:union(Live, Live0));
         #{} ->
             %% A peek_message block which has not been processed yet.
             used_vars_succ(Ss, L, LiveMap, Live0)
@@ -1040,7 +1070,7 @@ used_vars_phis(Is, L, Live0, UsedVars0) ->
             case [{P,V} || {#b_var{}=V,P} <- PhiArgs] of
                 [_|_]=PhiVars ->
                     PhiLive0 = rel2fam(PhiVars),
-                    PhiLive = [{{L,P},ordsets:union(ordsets:from_list(Vs), Live0)} ||
+                    PhiLive = [{{L,P},cerl_sets:union(cerl_sets:from_list(Vs), Live0)} ||
                                   {P,Vs} <- PhiLive0],
                     maps:merge(UsedVars, maps:from_list(PhiLive));
                 [] ->
@@ -1050,14 +1080,14 @@ used_vars_phis(Is, L, Live0, UsedVars0) ->
     end.
 
 used_vars_blk(#b_blk{is=Is,last=Last}, Used0) ->
-    Used = ordsets:union(Used0, beam_ssa:used(Last)),
+    Used = cerl_sets:union(Used0, cerl_sets:from_list(beam_ssa:used(Last))),
     used_vars_is(reverse(Is), Used).
 
 used_vars_is([#b_set{op=phi}|Is], Used) ->
     used_vars_is(Is, Used);
 used_vars_is([#b_set{dst=Dst}=I|Is], Used0) ->
-    Used1 = ordsets:union(Used0, beam_ssa:used(I)),
-    Used = ordsets:del_element(Dst, Used1),
+    Used1 = cerl_sets:union(Used0, cerl_sets:from_list(beam_ssa:used(I))),
+    Used = cerl_sets:del_element(Dst, Used1),
     used_vars_is(Is, Used);
 used_vars_is([], Used) ->
     Used.
@@ -1066,8 +1096,9 @@ used_vars_is([], Used) ->
 %%% Common utilities.
 %%%
 
-sub(#b_set{args=Args}=I, Sub) ->
-    I#b_set{args=[sub_arg(A, Sub) || A <- Args]}.
+sub(#b_set{args=Args}=I, Sub) when map_size(Sub) =/= 0 ->
+    I#b_set{args=[sub_arg(A, Sub) || A <- Args]};
+sub(I, _Sub) -> I.
 
 sub_arg(#b_var{}=Old, Sub) ->
     case Sub of
diff --git a/lib/compiler/src/beam_ssa_opt.erl b/lib/compiler/src/beam_ssa_opt.erl
index 32b64b393f..580abf4ed9 100644
--- a/lib/compiler/src/beam_ssa_opt.erl
+++ b/lib/compiler/src/beam_ssa_opt.erl
@@ -158,6 +158,7 @@ repeated_passes(Opts) ->
           ?PASS(ssa_opt_dead),
           ?PASS(ssa_opt_cse),
           ?PASS(ssa_opt_tail_phis),
+          ?PASS(ssa_opt_sink),
           ?PASS(ssa_opt_tuple_size),
           ?PASS(ssa_opt_record),
           ?PASS(ssa_opt_type_continue)],        %Must run after ssa_opt_dead to
@@ -175,8 +176,8 @@ epilogue_passes(Opts) ->
           ?PASS(ssa_opt_bsm),
           ?PASS(ssa_opt_bsm_units),
           ?PASS(ssa_opt_bsm_shortcut),
-          ?PASS(ssa_opt_blockify),
           ?PASS(ssa_opt_sink),
+          ?PASS(ssa_opt_blockify),
           ?PASS(ssa_opt_merge_blocks),
           ?PASS(ssa_opt_get_tuple_element),
           ?PASS(ssa_opt_trim_unreachable)],
@@ -901,44 +902,52 @@ cse_suitable(#b_set{}) -> false.
 -record(fs,
         {s=undefined :: 'undefined' | 'cleared',
          regs=#{} :: #{beam_ssa:b_var():=beam_ssa:b_var()},
+         vars=cerl_sets:new() :: cerl_sets:set(),
          fail=none :: 'none' | beam_ssa:label(),
          non_guards :: gb_sets:set(beam_ssa:label()),
          bs :: beam_ssa:block_map()
         }).
 
 ssa_opt_float({#st{ssa=Linear0,cnt=Count0}=St, FuncDb}) ->
-    NonGuards0 = float_non_guards(Linear0),
-    NonGuards = gb_sets:from_list(NonGuards0),
+    NonGuards = non_guards(Linear0),
     Blocks = maps:from_list(Linear0),
     Fs = #fs{non_guards=NonGuards,bs=Blocks},
     {Linear,Count} = float_opt(Linear0, Count0, Fs),
     {St#st{ssa=Linear,cnt=Count}, FuncDb}.
 
-float_blk_is_in_guard(#b_blk{last=#b_br{fail=F}}, #fs{non_guards=NonGuards}) ->
-    not gb_sets:is_member(F, NonGuards);
-float_blk_is_in_guard(#b_blk{}, #fs{}) ->
+%% The fconv instruction doesn't support jumping to a fail label, so we have to
+%% skip this optimization if the fail block is a guard.
+%%
+%% We also skip the optimization in blocks that always fail, as it's both
+%% difficult and pointless to rewrite them to use float ops.
+float_can_optimize_blk(#b_blk{last=#b_br{bool=#b_var{},fail=F}},
+                       #fs{non_guards=NonGuards}) ->
+    gb_sets:is_member(F, NonGuards);
+float_can_optimize_blk(#b_blk{}, #fs{}) ->
     false.
 
-float_non_guards([{L,#b_blk{is=Is}}|Bs]) ->
-    case Is of
-        [#b_set{op=landingpad}|_] ->
-            [L|float_non_guards(Bs)];
-        _ ->
-            float_non_guards(Bs)
-    end;
-float_non_guards([]) -> [?BADARG_BLOCK].
-
+float_opt([{L,#b_blk{is=[#b_set{op=exception_trampoline,args=[Var]}]}=Blk0} |
+           Bs0], Count0, Fs) ->
+    %% If we've replaced a BIF with float operations, we'll have a lot of extra
+    %% blocks that jump to the same failure block, which may have a trampoline
+    %% that refers to the original operation.
+    %%
+    %% Since the point of the trampoline is to keep the BIF from being removed
+    %% by liveness optimization, we can discard it as the liveness pass leaves
+    %% floats alone.
+    Blk = case cerl_sets:is_element(Var, Fs#fs.vars) of
+              true -> Blk0#b_blk{is=[]};
+              false -> Blk0
+          end,
+    {Bs, Count} = float_opt(Bs0, Count0, Fs),
+    {[{L,Blk}|Bs],Count};
 float_opt([{L,Blk}|Bs0], Count0, Fs) ->
-    case float_blk_is_in_guard(Blk, Fs) of
+    case float_can_optimize_blk(Blk, Fs) of
         true ->
-            %% This block is inside a guard. Don't do
-            %% any floating point optimizations.
-            {Bs,Count} = float_opt(Bs0, Count0, Fs),
-            {[{L,Blk}|Bs],Count};
+            float_opt_1(L, Blk, Bs0, Count0, Fs);
         false ->
-            %% This block is not inside a guard.
-            %% We can do the optimization.
-            float_opt_1(L, Blk, Bs0, Count0, Fs)
+            {Bs,Count} = float_opt(Bs0, Count0, Fs),
+            {[{L,Blk}|Bs],Count}
     end;
 float_opt([], Count, _Fs) ->
     {[],Count}.
@@ -1017,12 +1026,12 @@ float_maybe_flush(Blk0, #fs{s=cleared,fail=Fail,bs=Blocks}=Fs0, Count0) ->
     #b_blk{last=#b_br{bool=#b_var{},succ=Succ}=Br} = Blk0,
 
     %% If the success block starts with a floating point operation, we can
-    %% defer flushing to that block as long as it isn't a guard.
+    %% defer flushing to that block as long as it's suitable for optimization.
     #b_blk{is=Is} = SuccBlk = map_get(Succ, Blocks),
-    SuccIsGuard = float_blk_is_in_guard(SuccBlk, Fs0),
+    CanOptimizeSucc = float_can_optimize_blk(SuccBlk, Fs0),
 
     case Is of
-        [#b_set{anno=#{float_op:=_}}|_] when not SuccIsGuard ->
+        [#b_set{anno=#{float_op:=_}}|_] when CanOptimizeSucc ->
             %% No flush needed.
             {[],Blk0,Fs0,Count0};
         _ ->
@@ -1078,21 +1087,22 @@ float_opt_is([], Fs, _Count, _Acc) ->
     none.
 
 float_make_op(#b_set{op={bif,Op},dst=Dst,args=As0}=I0,
-              Ts, #fs{s=S,regs=Rs0}=Fs, Count0) ->
+              Ts, #fs{s=S,regs=Rs0,vars=Vs0}=Fs, Count0) ->
     {As1,Rs1,Count1} = float_load(As0, Ts, Rs0, Count0, []),
     {As,Is0} = unzip(As1),
     {Fr,Count2} = new_reg('@fr', Count1),
     FrDst = #b_var{name=Fr},
     I = I0#b_set{op={float,Op},dst=FrDst,args=As},
+    Vs = cerl_sets:add_element(Dst, Vs0),
     Rs = Rs1#{Dst=>FrDst},
     Is = append(Is0) ++ [I],
     case S of
         undefined ->
             {Ignore,Count} = new_reg('@ssa_ignore', Count2),
             C = #b_set{op={float,clearerror},dst=#b_var{name=Ignore}},
-            {[C|Is],Fs#fs{s=cleared,regs=Rs},Count};
+            {[C|Is],Fs#fs{s=cleared,regs=Rs,vars=Vs},Count};
         cleared ->
-            {Is,Fs#fs{regs=Rs},Count2}
+            {Is,Fs#fs{regs=Rs,vars=Vs},Count2}
     end.
 
 float_load([A|As], [T|Ts], Rs0, Count0, Acc) ->
@@ -1221,34 +1231,31 @@ live_opt_is([#b_set{op=phi,dst=Dst}=I|Is], Live, Acc) ->
         false ->
             live_opt_is(Is, Live, Acc)
     end;
-live_opt_is([#b_set{op=succeeded,dst=SuccDst=SuccDstVar,
-                    args=[Dst]}=SuccI,
-             #b_set{dst=Dst}=I|Is], Live0, Acc) ->
-    case gb_sets:is_member(Dst, Live0) of
-        true ->
-            Live1 = gb_sets:add(Dst, Live0),
-            Live = gb_sets:delete_any(SuccDst, Live1),
-            live_opt_is([I|Is], Live, [SuccI|Acc]);
-        false ->
-            case live_opt_unused(I) of
-                {replace,NewI0} ->
-                    NewI = NewI0#b_set{dst=SuccDstVar},
-                    live_opt_is([NewI|Is], Live0, Acc);
-                keep ->
-                    case gb_sets:is_member(SuccDst, Live0) of
-                        true ->
-                            Live1 = gb_sets:add(Dst, Live0),
-                            Live = gb_sets:delete(SuccDst, Live1),
-                            live_opt_is([I|Is], Live, [SuccI|Acc]);
-                        false ->
-                            live_opt_is([I|Is], Live0, Acc)
-                    end
-            end
+live_opt_is([#b_set{op=succeeded,dst=SuccDst,args=[MapDst]}=SuccI,
+             #b_set{op=get_map_element,dst=MapDst}=MapI | Is],
+            Live0, Acc) ->
+    case {gb_sets:is_member(SuccDst, Live0),
+          gb_sets:is_member(MapDst, Live0)} of
+        {true, true} ->
+            Live = gb_sets:delete(SuccDst, Live0),
+            live_opt_is([MapI | Is], Live, [SuccI | Acc]);
+        {true, false} ->
+            %% 'get_map_element' is unused; replace 'succeeded' with
+            %% 'has_map_field'
+            NewI = MapI#b_set{op=has_map_field,dst=SuccDst},
+            live_opt_is([NewI | Is], Live0, Acc);
+        {false, true} ->
+            %% 'succeeded' is unused (we know it will succeed); discard it and
+            %% keep 'get_map_element'
+            live_opt_is([MapI | Is], Live0, Acc);
+        {false, false} ->
+            live_opt_is(Is, Live0, Acc)
     end;
 live_opt_is([#b_set{dst=Dst}=I|Is], Live0, Acc) ->
     case gb_sets:is_member(Dst, Live0) of
         true ->
-            Live1 = gb_sets:union(Live0, gb_sets:from_ordset(beam_ssa:used(I))),
+            LiveUsed = gb_sets:from_ordset(beam_ssa:used(I)),
+            Live1 = gb_sets:union(Live0, LiveUsed),
             Live = gb_sets:delete(Dst, Live1),
             live_opt_is(Is, Live, [I|Acc]);
         false ->
@@ -1256,17 +1263,14 @@ live_opt_is([#b_set{dst=Dst}=I|Is], Live0, Acc) ->
                 true ->
                     live_opt_is(Is, Live0, Acc);
                 false ->
-                    Live = gb_sets:union(Live0, gb_sets:from_ordset(beam_ssa:used(I))),
+                    LiveUsed = gb_sets:from_ordset(beam_ssa:used(I)),
+                    Live = gb_sets:union(Live0, LiveUsed),
                     live_opt_is(Is, Live, [I|Acc])
             end
     end;
 live_opt_is([], Live, Acc) ->
     {Acc,Live}.
 
-live_opt_unused(#b_set{op=get_map_element}=Set) ->
-    {replace,Set#b_set{op=has_map_field}};
-live_opt_unused(_) -> keep.
-
 %%%
 %%% Optimize binary matching.
 %%%
@@ -1777,35 +1781,44 @@ opt_bs_put_split_int_1(Int, L, R) ->
 %%%
 
 ssa_opt_tuple_size({#st{ssa=Linear0,cnt=Count0}=St, FuncDb}) ->
-    {Linear,Count} = opt_tup_size(Linear0, Count0, []),
+    %% This optimization is only safe in guards, as prefixing tuple_size with
+    %% an is_tuple check prevents it from throwing an exception.
+    NonGuards = non_guards(Linear0),
+    {Linear,Count} = opt_tup_size(Linear0, NonGuards, Count0, []),
     {St#st{ssa=Linear,cnt=Count}, FuncDb}.
 
-opt_tup_size([{L,#b_blk{is=Is,last=Last}=Blk}|Bs], Count0, Acc0) ->
+opt_tup_size([{L,#b_blk{is=Is,last=Last}=Blk}|Bs], NonGuards, Count0, Acc0) ->
     case {Is,Last} of
         {[#b_set{op={bif,'=:='},dst=Bool,args=[#b_var{}=Tup,#b_literal{val=Arity}]}],
          #b_br{bool=Bool}} when is_integer(Arity), Arity >= 0 ->
-            {Acc,Count} = opt_tup_size_1(Tup, L, Count0, Acc0),
-            opt_tup_size(Bs, Count, [{L,Blk}|Acc]);
+            {Acc,Count} = opt_tup_size_1(Tup, L, NonGuards, Count0, Acc0),
+            opt_tup_size(Bs, NonGuards, Count, [{L,Blk}|Acc]);
         {_,_} ->
-            opt_tup_size(Bs, Count0, [{L,Blk}|Acc0])
+            opt_tup_size(Bs, NonGuards, Count0, [{L,Blk}|Acc0])
     end;
-opt_tup_size([], Count, Acc) ->
+opt_tup_size([], _NonGuards, Count, Acc) ->
     {reverse(Acc),Count}.
 
-opt_tup_size_1(Size, EqL, Count0, [{L,Blk0}|Acc]) ->
-    case Blk0 of
-        #b_blk{is=Is0,last=#b_br{bool=Bool,succ=EqL,fail=Fail}} ->
-            case opt_tup_size_is(Is0, Bool, Size, []) of
-                none ->
+opt_tup_size_1(Size, EqL, NonGuards, Count0, [{L,Blk0}|Acc]) ->
+    #b_blk{is=Is0,last=Last} = Blk0,
+    case Last of
+        #b_br{bool=Bool,succ=EqL,fail=Fail} ->
+            case gb_sets:is_member(Fail, NonGuards) of
+                true ->
                     {[{L,Blk0}|Acc],Count0};
-                {PreIs,TupleSizeIs,Tuple} ->
-                    opt_tup_size_2(PreIs, TupleSizeIs, L, EqL,
-                                   Tuple, Fail, Count0, Acc)
+                false ->
+                    case opt_tup_size_is(Is0, Bool, Size, []) of
+                        none ->
+                            {[{L,Blk0}|Acc],Count0};
+                        {PreIs,TupleSizeIs,Tuple} ->
+                            opt_tup_size_2(PreIs, TupleSizeIs, L, EqL,
+                                           Tuple, Fail, Count0, Acc)
+                    end
             end;
-        #b_blk{} ->
+        _ ->
             {[{L,Blk0}|Acc],Count0}
     end;
-opt_tup_size_1(_, _, Count, Acc) ->
+opt_tup_size_1(_, _, _, Count, Acc) ->
     {Acc,Count}.
 
 opt_tup_size_2(PreIs, TupleSizeIs, PreL, EqL, Tuple, Fail, Count0, Acc) ->
@@ -1943,12 +1956,28 @@ verify_merge_is(_) ->
 
 is_merge_allowed(_, #b_blk{}, #b_blk{is=[#b_set{op=peek_message}|_]}) ->
     false;
-is_merge_allowed(L, #b_blk{last=#b_br{}}=Blk, #b_blk{}) ->
+is_merge_allowed(_, #b_blk{}, #b_blk{is=[#b_set{op=exception_trampoline}|_]}) ->
+    false;
+is_merge_allowed(_, #b_blk{is=[#b_set{op=exception_trampoline}|_]}, #b_blk{}) ->
+    false;
+is_merge_allowed(L, #b_blk{last=#b_br{}}=Blk, #b_blk{is=Is}) ->
     %% The predecessor block must have exactly one successor (L) for
     %% the merge to be safe.
     case beam_ssa:successors(Blk) of
-        [L] -> true;
-        [_|_] -> false
+        [L] ->
+            case Is of
+                [#b_set{op=phi,args=[_]}|_] ->
+                    %% The type optimizer pass must have been
+                    %% turned off, since it would have removed this
+                    %% redundant phi node. Refuse to merge the blocks
+                    %% to ensure that this phi node remains at the
+                    %% beginning of a block.
+                    false;
+                _ ->
+                    true
+            end;
+        [_|_] ->
+            false
     end;
 is_merge_allowed(_, #b_blk{last=#b_switch{}}, #b_blk{}) ->
     false.
@@ -1969,9 +1998,7 @@ is_merge_allowed(_, #b_blk{last=#b_switch{}}, #b_blk{}) ->
 %%% extracted values.
 %%%
 
-ssa_opt_sink({#st{ssa=Blocks0}=St, FuncDb}) ->
-    Linear = beam_ssa:linearize(Blocks0),
-
+ssa_opt_sink({#st{ssa=Linear}=St, FuncDb}) ->
     %% Create a map with all variables that define get_tuple_element
     %% instructions. The variable name map to the block it is defined in.
     case def_blocks(Linear) of
@@ -1980,10 +2007,12 @@ ssa_opt_sink({#st{ssa=Blocks0}=St, FuncDb}) ->
             {St, FuncDb};
         [_|_]=Defs0 ->
             Defs = maps:from_list(Defs0),
-            {do_ssa_opt_sink(Linear, Defs, St), FuncDb}
+            {do_ssa_opt_sink(Defs, St), FuncDb}
     end.
 
-do_ssa_opt_sink(Linear, Defs, #st{ssa=Blocks0}=St) ->
+do_ssa_opt_sink(Defs, #st{ssa=Linear}=St) ->
+    Blocks0 = maps:from_list(Linear),
+
     %% Now find all the blocks that use variables defined by get_tuple_element
     %% instructions.
     Used = used_blocks(Linear, Defs, []),
@@ -2008,7 +2037,8 @@ do_ssa_opt_sink(Linear, Defs, #st{ssa=Blocks0}=St) ->
                            From = map_get(V, Defs),
                            move_defs(V, From, To, A)
                    end, Blocks0, DefLoc),
-    St#st{ssa=Blocks}.
+
+    St#st{ssa=beam_ssa:linearize(Blocks)}.
 
 def_blocks([{L,#b_blk{is=Is}}|Bs]) ->
     def_blocks_is(Is, L, def_blocks(Bs));
@@ -2041,6 +2071,7 @@ unsuitable_1([{L,#b_blk{is=[#b_set{op=Op}|_]}}|Bs]) ->
     Unsuitable = case Op of
                      bs_extract -> true;
                      bs_put -> true;
+                     exception_trampoline -> true;
                      {float,_} -> true;
                      landingpad -> true;
                      peek_message -> true;
@@ -2244,6 +2275,21 @@ gcd(A, B) ->
         X -> gcd(B, X)
     end.
 
+non_guards(Linear) ->
+    gb_sets:from_list(non_guards_1(Linear)).
+
+non_guards_1([{L,#b_blk{is=Is}}|Bs]) ->
+    case Is of
+        [#b_set{op=exception_trampoline}|_] ->
+            [L | non_guards_1(Bs)];
+        [#b_set{op=landingpad}|_] ->
+            [L | non_guards_1(Bs)];
+        _ ->
+            non_guards_1(Bs)
+    end;
+non_guards_1([]) ->
+    [?EXCEPTION_BLOCK].
+
 rel2fam(S0) ->
     S1 = sofs:relation(S0),
     S = sofs:rel2fam(S1),
diff --git a/lib/compiler/src/beam_ssa_pre_codegen.erl b/lib/compiler/src/beam_ssa_pre_codegen.erl
index a5fcb91cc0..89053c7b9f 100644
--- a/lib/compiler/src/beam_ssa_pre_codegen.erl
+++ b/lib/compiler/src/beam_ssa_pre_codegen.erl
@@ -120,6 +120,7 @@ passes(Opts) ->
 
           %% Preliminaries.
           ?PASS(fix_bs),
+          ?PASS(exception_trampolines),
           ?PASS(sanitize),
           ?PASS(match_fail_instructions),
           case FixTuples of
@@ -158,7 +159,9 @@ passes(Opts) ->
           %% Allocate registers.
           ?PASS(linear_scan),
           ?PASS(frame_size),
-          ?PASS(turn_yregs)],
+          ?PASS(turn_yregs),
+
+          ?PASS(assert_no_critical_edges)],
     [P || P <- Ps, P =/= ignore].
 
 function(#b_function{anno=Anno,args=Args,bs=Blocks0,cnt=Count0}=F0,
@@ -693,6 +696,44 @@ legacy_bs_is([I|Is], Last, IsYreg, Count, Copies, Acc) ->
 legacy_bs_is([], _Last, _IsYreg, Count, Copies, Acc) ->
     {reverse(Acc),Count,Copies}.
 
+%% exception_trampolines(St0) -> St.
+%%
+%% Removes the "exception trampolines" that were added to prevent exceptions
+%% from being optimized away.
+
+exception_trampolines(#st{ssa=Blocks0}=St) ->
+    RPO = reverse(beam_ssa:rpo(Blocks0)),
+    Blocks = et_1(RPO, #{}, Blocks0),
+    St#st{ssa=Blocks}.
+
+et_1([L | Ls], Trampolines, Blocks) ->
+    #{ L := #b_blk{is=Is,last=Last0}=Block0 } = Blocks,
+    case {Is, Last0} of
+        {[#b_set{op=exception_trampoline}], #b_br{succ=Succ}} ->
+            et_1(Ls, Trampolines#{ L => Succ }, maps:remove(L, Blocks));
+        {_, #b_br{succ=Same,fail=Same}} when Same =:= ?EXCEPTION_BLOCK ->
+            %% The exception block is just a marker saying that we should raise
+            %% an exception (= {f,0}) instead of jumping to a particular fail
+            %% block. Since it's not a reachable block we can't allow
+            %% unconditional jumps to it except through a trampoline.
+            error({illegal_jump_to_exception_block, L});
+        {_, #b_br{succ=Succ0,fail=Fail0}} ->
+            Succ = maps:get(Succ0, Trampolines, Succ0),
+            Fail = maps:get(Fail0, Trampolines, Fail0),
+            if
+                Succ =/= Succ0; Fail =/= Fail0 ->
+                    Last = Last0#b_br{succ=Succ,fail=Fail},
+                    Block = Block0#b_blk{last=Last},
+                    et_1(Ls, Trampolines, Blocks#{ L := Block });
+                Succ =:= Succ0, Fail =:= Fail0 ->
+                    et_1(Ls, Trampolines, Blocks)
+            end;
+        {_, _} ->
+             et_1(Ls, Trampolines, Blocks)
+    end;
+et_1([], _Trampolines, Blocks) ->
+    Blocks.
+
 %% sanitize(St0) -> St.
 %%  Remove constructs that can cause problems later:
 %%
@@ -1299,10 +1340,10 @@ place_frame_here(L, Blocks, Doms, Frames) ->
             Descendants = beam_ssa:rpo([L], Blocks),
             PhiPredecessors = phi_predecessors(L, Blocks),
             MustDominate = ordsets:from_list(PhiPredecessors ++ Descendants),
-            Dominates = all(fun(?BADARG_BLOCK) ->
+            Dominates = all(fun(?EXCEPTION_BLOCK) ->
                                     %% This block defines no variables and calls
                                     %% erlang:error(badarg). It does not matter
-                                    %% whether L dominates ?BADARG_BLOCK or not;
+                                    %% whether L dominates ?EXCEPTION_BLOCK or not;
                                     %% it is still safe to put the frame in L.
                                     true;
                                (Bl) ->
@@ -1433,10 +1474,11 @@ fix_receives_1([{L,Blk}|Ls], Blocks0, Count0) ->
             LoopExit = find_loop_exit(Rm, Blocks0),
             Defs0 = beam_ssa:def([L], Blocks0),
             CommonUsed = recv_common(Defs0, LoopExit, Blocks0),
-            {Blocks1,Count1} = recv_fix_common(CommonUsed, LoopExit, Rm,
-                                               Blocks0, Count0),
+            {Blocks1,Count1} = recv_crit_edges(Rm, LoopExit, Blocks0, Count0),
+            {Blocks2,Count2} = recv_fix_common(CommonUsed, LoopExit, Rm,
+                                               Blocks1, Count1),
             Defs = ordsets:subtract(Defs0, CommonUsed),
-            {Blocks,Count} = fix_receive(Rm, Defs, Blocks1, Count1),
+            {Blocks,Count} = fix_receive(Rm, Defs, Blocks2, Count2),
             fix_receives_1(Ls, Blocks, Count);
         #b_blk{} ->
             fix_receives_1(Ls, Blocks0, Count0)
@@ -1449,9 +1491,60 @@ recv_common(_Defs, none, _Blocks) ->
     %% in the tail position of a function.
     [];
 recv_common(Defs, Exit, Blocks) ->
-    {ExitDefs,ExitUsed} = beam_ssa:def_used([Exit], Blocks),
+    {ExitDefs,ExitUnused} = beam_ssa:def_unused([Exit], Defs, Blocks),
     Def = ordsets:subtract(Defs, ExitDefs),
-    ordsets:intersection(Def, ExitUsed).
+    ordsets:subtract(Def, ExitUnused).
+
+%% recv_crit_edges([RemoveMessageLabel], LoopExit,
+%%                 Blocks0, Count0) -> {Blocks,Count}.
+%%
+%%  Adds dummy blocks on all conditional jumps to the exit block so that
+%%  recv_fix_common/5 can insert phi nodes without having to worry about
+%%  critical edges.
+
+recv_crit_edges(_Rms, none, Blocks0, Count0) ->
+    {Blocks0, Count0};
+recv_crit_edges(Rms, Exit, Blocks0, Count0) ->
+    Ls = beam_ssa:rpo(Rms, Blocks0),
+    rce_insert_edges(Ls, Exit, Count0, Blocks0).
+
+rce_insert_edges([L | Ls], Exit, Count0, Blocks0) ->
+    Successors = beam_ssa:successors(map_get(L, Blocks0)),
+    case member(Exit, Successors) of
+        true when Successors =/= [Exit] ->
+            {Blocks, Count} = rce_insert_edge(L, Exit, Count0, Blocks0),
+            rce_insert_edges(Ls, Exit, Count, Blocks);
+        _ ->
+            rce_insert_edges(Ls, Exit, Count0, Blocks0)
+    end;
+rce_insert_edges([], _Exit, Count, Blocks) ->
+    {Blocks, Count}.
+
+rce_insert_edge(L, Exit, Count, Blocks0) ->
+    #b_blk{last=Last0} = FromBlk0 = map_get(L, Blocks0),
+
+    ToExit = #b_br{bool=#b_literal{val=true},succ=Exit,fail=Exit},
+
+    FromBlk = FromBlk0#b_blk{last=rce_reroute_terminator(Last0, Exit, Count)},
+    EdgeBlk = #b_blk{anno=#{},is=[],last=ToExit},
+
+    Blocks = Blocks0#{ Count => EdgeBlk, L => FromBlk },
+    {Blocks, Count + 1}.
+
+rce_reroute_terminator(#b_br{succ=Exit}=Last, Exit, New) ->
+    rce_reroute_terminator(Last#b_br{succ=New}, Exit, New);
+rce_reroute_terminator(#b_br{fail=Exit}=Last, Exit, New) ->
+    rce_reroute_terminator(Last#b_br{fail=New}, Exit, New);
+rce_reroute_terminator(#b_br{}=Last, _Exit, _New) ->
+    Last;
+rce_reroute_terminator(#b_switch{fail=Exit}=Last, Exit, New) ->
+    rce_reroute_terminator(Last#b_switch{fail=New}, Exit, New);
+rce_reroute_terminator(#b_switch{list=List0}=Last, Exit, New) ->
+    List = [if
+                Lbl =:= Exit -> {Arg, New};
+                Lbl =/= Exit -> {Arg, Lbl}
+            end || {Arg, Lbl} <- List0],
+    Last#b_switch{list=List}.
 
 %% recv_fix_common([CommonVar], LoopExit, [RemoveMessageLabel],
 %%                 Blocks0, Count0) -> {Blocks,Count}.
@@ -1505,9 +1598,9 @@ exit_predecessors([], _Exit, _Blocks) -> [].
 %%  later used within a clause of the receive.
 
 fix_receive([L|Ls], Defs, Blocks0, Count0) ->
-    {RmDefs,Used0} = beam_ssa:def_used([L], Blocks0),
+    {RmDefs,Unused} = beam_ssa:def_unused([L], Defs, Blocks0),
     Def = ordsets:subtract(Defs, RmDefs),
-    Used = ordsets:intersection(Def, Used0),
+    Used = ordsets:subtract(Def, Unused),
     {NewVars,Count} = new_vars([Base || #b_var{name=Base} <- Used], Count0),
     Ren = zip(Used, NewVars),
     Blocks1 = beam_ssa:rename_vars(Ren, [L], Blocks0),
@@ -1530,10 +1623,14 @@ find_loop_exit([L1,L2|_Ls], Blocks) ->
     find_loop_exit_1(Path1, cerl_sets:from_list(Path2));
 find_loop_exit(_, _) -> none.
 
+find_loop_exit_1([?EXCEPTION_BLOCK | T], OtherPath) ->
+    %% ?EXCEPTION_BLOCK is a marker and not an actual block, so we can't
+    %% consider it to be a common block even if both paths cross it.
+    find_loop_exit_1(T, OtherPath);
 find_loop_exit_1([H|T], OtherPath) ->
     case cerl_sets:is_element(H, OtherPath) of
         true -> H;
-        false ->  find_loop_exit_1(T, OtherPath)
+        false -> find_loop_exit_1(T, OtherPath)
     end;
 find_loop_exit_1([], _) -> none.
 
@@ -1784,7 +1881,7 @@ collect_yregs([], Yregs) -> Yregs.
 copy_retval_2([L|Ls], Yregs, Copy0, Blocks0, Count0) ->
     #b_blk{is=Is0,last=Last} = Blk = map_get(L, Blocks0),
     RC = case {Last,Ls} of
-             {#b_br{succ=Succ,fail=?BADARG_BLOCK},[Succ|_]} ->
+             {#b_br{succ=Succ,fail=?EXCEPTION_BLOCK},[Succ|_]} ->
                  true;
              {_,_} ->
                  false
@@ -2133,8 +2230,8 @@ reserve_yregs(#st{frames=Frames}=St0) ->
 reserve_yregs_1(L, #st{ssa=Blocks0,cnt=Count0,res=Res0}=St) ->
     Blk = map_get(L, Blocks0),
     Yregs = beam_ssa:get_anno(yregs, Blk),
-    {Def,Used} = beam_ssa:def_used([L], Blocks0),
-    UsedYregs = ordsets:intersection(Yregs, Used),
+    {Def,Unused} = beam_ssa:def_unused([L], Yregs, Blocks0),
+    UsedYregs = ordsets:subtract(Yregs, Unused),
     DefBefore = ordsets:subtract(UsedYregs, Def),
     {BeforeVars,Blocks,Count} = rename_vars(DefBefore, L, Blocks0, Count0),
     InsideVars = ordsets:subtract(UsedYregs, DefBefore),
@@ -2523,9 +2620,9 @@ reserve_xregs_is([], Res, Xs, _Used) ->
     {Res,Xs}.
 
 %% Pick up register hints from the successors of this blocks.
-reserve_terminator(_L, _Is, #b_br{bool=#b_var{},succ=Succ,fail=?BADARG_BLOCK},
+reserve_terminator(_L, _Is, #b_br{bool=#b_var{},succ=Succ,fail=?EXCEPTION_BLOCK},
                    _Blocks, XsMap, _Res) ->
-    %% We know that no variables are used at ?BADARG_BLOCK, so
+    %% We know that no variables are used at ?EXCEPTION_BLOCK, so
     %% any register hints from the success blocks are safe to use.
     map_get(Succ, XsMap);
 reserve_terminator(L, Is, #b_br{bool=#b_var{},succ=Succ,fail=Fail},
diff --git a/lib/compiler/src/beam_ssa_share.erl b/lib/compiler/src/beam_ssa_share.erl
index 426efa2cc9..fa728992f8 100644
--- a/lib/compiler/src/beam_ssa_share.erl
+++ b/lib/compiler/src/beam_ssa_share.erl
@@ -117,8 +117,8 @@ share_terminator(_Last, _Blocks) -> none.
 %% possible if the blocks are not equivalent, as that is the common
 %% case.
 
-are_equivalent(_Succ, _, ?BADARG_BLOCK, _, _Blocks) ->
-    %% ?BADARG_BLOCK is special. Sharing could be incorrect.
+are_equivalent(_Succ, _, ?EXCEPTION_BLOCK, _, _Blocks) ->
+    %% ?EXCEPTION_BLOCK is special. Sharing could be incorrect.
     false;
 are_equivalent(_Succ, #b_blk{is=Is1,last=#b_ret{arg=RetVal1}=Ret1},
          _Fail, #b_blk{is=Is2,last=#b_ret{arg=RetVal2}=Ret2}, _Blocks) ->
@@ -303,8 +303,12 @@ canonical_is([#b_ret{arg=Arg}], VarMap, Acc0) ->
                    Acc0
            end,
     {{ret,canonical_arg(Arg, VarMap),Acc1},VarMap};
-canonical_is([#b_br{bool=#b_var{},fail=Fail}], VarMap, Acc) ->
-    {{br,succ,Fail,Acc},VarMap};
+canonical_is([#b_br{bool=#b_var{}=Arg,fail=Fail}], VarMap, Acc) ->
+    %% A previous buggy version of this code omitted the canonicalized
+    %% argument in the return value. Unfortunately, that worked most
+    %% of the time, except when `br` terminator referenced a variable
+    %% defined in a previous block instead of in the same block.
+    {{br,canonical_arg(Arg, VarMap),succ,Fail,Acc},VarMap};
 canonical_is([#b_br{succ=Succ}], VarMap, Acc) ->
     {{br,Succ,Acc},VarMap};
 canonical_is([], VarMap, Acc) ->
diff --git a/lib/compiler/src/beam_ssa_type.erl b/lib/compiler/src/beam_ssa_type.erl
index 79c67e5705..0912ecb2c2 100644
--- a/lib/compiler/src/beam_ssa_type.erl
+++ b/lib/compiler/src/beam_ssa_type.erl
@@ -84,7 +84,8 @@ join_arg_types(Args, ArgTypes, Anno) ->
               end, Ts0, ParamTypes).
 
 join_arg_types_1([Arg | Args], [TM | TMs], Ts) when map_size(TM) =/= 0 ->
-    join_arg_types_1(Args, TMs, Ts#{ Arg => beam_types:join(maps:values(TM))});
+    Type = beam_types:join(maps:values(TM)),
+    join_arg_types_1(Args, TMs, Ts#{ Arg => Type });
 join_arg_types_1([Arg | Args], [_TM | TMs], Ts) ->
     join_arg_types_1(Args, TMs, Ts#{ Arg => any });
 join_arg_types_1([], [], Ts) ->
@@ -108,7 +109,7 @@ opt_continue_1(Linear0, Args, Id, Ts, FuncDb0) ->
     D = #d{ func_db=FuncDb0,
             func_id=Id,
             ds=Defs,
-            ls=#{0=>Ts,?BADARG_BLOCK=>#{}},
+            ls=#{0=>Ts,?EXCEPTION_BLOCK=>#{}},
             once=UsedOnce },
 
     {Linear, FuncDb, NewRet} = opt(Linear0, D, []),
@@ -174,8 +175,8 @@ opt_1(L, #b_blk{is=Is0,last=Last0}=Blk0, Bs, Ts0,
         {Is,Ts,Ds,Fdb,Sub} ->
             D1 = D0#d{ds=Ds,sub=Sub,func_db=Fdb},
             Last1 = simplify_terminator(Last0, Sub, Ts, Ds),
-            Last = opt_terminator(Last1, Ts, Ds),
-            D = update_successors(Last, Ts, D1),
+            Last2 = opt_terminator(Last1, Ts, Ds),
+            {Last,D} = update_successors(Last2, Ts, D1),
             Blk = Blk0#b_blk{is=Is,last=Last},
             opt(Bs, D, [{L,Blk}|Acc]);
         {no_return,Ret,Is,Ds,Fdb,Sub} ->
@@ -388,9 +389,9 @@ opt_call(#b_set{dst=Dst,args=[#b_local{}=Callee|Args]}=I0, D, Ts0, Ds0, Fdb0) ->
             %% Match contexts are treated as bitstrings when optimizing
             %% arguments, as we don't yet support removing the
             %% "bs_start_match3" instruction.
-            Types = [case get_type(Arg, Ts) of
-                          #t_bs_context{} -> #t_bitstring{};
-                          Type -> Type
+            Types = [case raw_type(Arg, Ts) of
+                         #t_bs_context{} -> #t_bitstring{};
+                         Type -> Type
                      end || Arg <- Args],
 
             %% Update the argument types of *this exact call*, the types
@@ -443,7 +444,7 @@ opt_make_fun(#b_set{op=make_fun,
         #{ Callee := #func_info{exported=false,arg_types=ArgTypes0}=Info } ->
             ArgCount = Callee#b_local.arity - length(FreeVars),
 
-            FVTypes = [get_type(FreeVar, Ts) || FreeVar <- FreeVars],
+            FVTypes = [raw_type(FreeVar, Ts) || FreeVar <- FreeVars],
             Types = duplicate(ArgCount, any) ++ FVTypes,
 
             CallId = {D#d.func_id, Dst},
@@ -486,8 +487,10 @@ simplify(#b_set{op={bif,'or'},args=Args}=I, Ts) ->
             I
     end;
 simplify(#b_set{op={bif,element},args=[#b_literal{val=Index},Tuple]}=I0, Ts) ->
-    case beam_types:get_tuple_size(get_type(Tuple, Ts)) of
-        {_,Size} when is_integer(Index), 1 =< Index, Index =< Size ->
+    case normalized_type(Tuple, Ts) of
+        #t_tuple{size=Size} when is_integer(Index),
+                                 1 =< Index,
+                                 Index =< Size ->
             I = I0#b_set{op=get_tuple_element,
                          args=[Tuple,#b_literal{val=Index-1}]},
             simplify(I, Ts);
@@ -495,28 +498,28 @@ simplify(#b_set{op={bif,element},args=[#b_literal{val=Index},Tuple]}=I0, Ts) ->
             eval_bif(I0, Ts)
     end;
 simplify(#b_set{op={bif,hd},args=[List]}=I, Ts) ->
-    case get_type(List, Ts) of
+    case normalized_type(List, Ts) of
         cons ->
             I#b_set{op=get_hd};
         _ ->
             eval_bif(I, Ts)
     end;
 simplify(#b_set{op={bif,tl},args=[List]}=I, Ts) ->
-    case get_type(List, Ts) of
+    case normalized_type(List, Ts) of
         cons ->
             I#b_set{op=get_tl};
         _ ->
             eval_bif(I, Ts)
     end;
 simplify(#b_set{op={bif,size},args=[Term]}=I, Ts) ->
-    case get_type(Term, Ts) of
+    case normalized_type(Term, Ts) of
         #t_tuple{} ->
             simplify(I#b_set{op={bif,tuple_size}}, Ts);
         _ ->
             eval_bif(I, Ts)
     end;
 simplify(#b_set{op={bif,tuple_size},args=[Term]}=I, Ts) ->
-    case get_type(Term, Ts) of
+    case normalized_type(Term, Ts) of
         #t_tuple{size=Size,exact=true} ->
             #b_literal{val=Size};
         _ ->
@@ -524,7 +527,7 @@ simplify(#b_set{op={bif,tuple_size},args=[Term]}=I, Ts) ->
     end;
 simplify(#b_set{op={bif,is_function},args=[Fun,#b_literal{val=Arity}]}=I, Ts)
   when is_integer(Arity), Arity >= 0 ->
-    case get_type(Fun, Ts) of
+    case normalized_type(Fun, Ts) of
         #t_fun{arity=any} ->
             I;
         #t_fun{arity=Arity} ->
@@ -535,15 +538,15 @@ simplify(#b_set{op={bif,is_function},args=[Fun,#b_literal{val=Arity}]}=I, Ts)
             #b_literal{val=false}
     end;
 simplify(#b_set{op={bif,Op0},args=Args}=I, Ts) when Op0 =:= '=='; Op0 =:= '/=' ->
-    Types = get_types(Args, Ts),
+    Types = normalized_types(Args, Ts),
     EqEq0 = case {beam_types:meet(Types),beam_types:join(Types)} of
-               {none,any} -> true;
-               {#t_integer{},#t_integer{}} -> true;
-               {float,float} -> true;
-               {#t_bitstring{},_} -> true;
-               {#t_atom{},_} -> true;
-               {_,_} -> false
-           end,
+                {none,any} -> true;
+                {#t_integer{},#t_integer{}} -> true;
+                {float,float} -> true;
+                {#t_bitstring{},_} -> true;
+                {#t_atom{},_} -> true;
+                {_,_} -> false
+            end,
     EqEq = EqEq0 orelse any_non_numeric_argument(Args, Ts),
     case EqEq of
         true ->
@@ -557,33 +560,35 @@ simplify(#b_set{op={bif,Op0},args=Args}=I, Ts) when Op0 =:= '=='; Op0 =:= '/=' -
     end;
 simplify(#b_set{op={bif,'=:='},args=[Same,Same]}, _Ts) ->
     #b_literal{val=true};
-simplify(#b_set{op={bif,'=:='},args=[A1,_A2]=Args}=I, Ts) ->
-    [T1,T2] = get_types(Args, Ts),
-    case beam_types:meet(T1, T2) of
+simplify(#b_set{op={bif,'=:='},args=[LHS,RHS]}=I, Ts) ->
+    LType = raw_type(LHS, Ts),
+    RType = raw_type(RHS, Ts),
+    case beam_types:meet(LType, RType) of
         none ->
             #b_literal{val=false};
         _ ->
-            case {beam_types:is_boolean_type(T1),T2} of
+            case {beam_types:is_boolean_type(LType),
+                  beam_types:normalize(RType)} of
                 {true,#t_atom{elements=[true]}} ->
                     %% Bool =:= true  ==>  Bool
-                    A1;
+                    LHS;
                 {true,#t_atom{elements=[false]}} ->
                     %% Bool =:= false ==> not Bool
                     %%
                     %% This will be further optimized to eliminate the
                     %% 'not', swapping the success and failure
-                    %% branches in the br instruction. If A1 comes
+                    %% branches in the br instruction. If LHS comes
                     %% from a type test (such as is_atom/1) or a
                     %% comparison operator (such as >=) that can be
                     %% translated to test instruction, this
                     %% optimization will eliminate one instruction.
-                    simplify(I#b_set{op={bif,'not'},args=[A1]}, Ts);
+                    simplify(I#b_set{op={bif,'not'},args=[LHS]}, Ts);
                 {_,_} ->
                     eval_bif(I, Ts)
             end
     end;
 simplify(#b_set{op={bif,Op},args=Args}=I, Ts) ->
-    Types = get_types(Args, Ts),
+    Types = normalized_types(Args, Ts),
     case is_float_op(Op, Types) of
         false ->
             eval_bif(I, Ts);
@@ -592,7 +597,7 @@ simplify(#b_set{op={bif,Op},args=Args}=I, Ts) ->
             eval_bif(beam_ssa:add_anno(float_op, AnnoArgs, I), Ts)
     end;
 simplify(#b_set{op=get_tuple_element,args=[Tuple,#b_literal{val=N}]}=I, Ts) ->
-    case get_type(Tuple, Ts) of
+    case normalized_type(Tuple, Ts) of
         #t_tuple{size=Size,elements=Es} when Size > N ->
             ElemType = beam_types:get_element_type(N + 1, Es),
             case beam_types:get_singleton_value(ElemType) of
@@ -605,7 +610,7 @@ simplify(#b_set{op=get_tuple_element,args=[Tuple,#b_literal{val=N}]}=I, Ts) ->
             I
     end;
 simplify(#b_set{op=is_nonempty_list,args=[Src]}=I, Ts) ->
-    case get_type(Src, Ts) of
+    case normalized_type(Src, Ts) of
         any ->  I;
         list -> I;
         cons -> #b_literal{val=true};
@@ -613,7 +618,7 @@ simplify(#b_set{op=is_nonempty_list,args=[Src]}=I, Ts) ->
     end;
 simplify(#b_set{op=is_tagged_tuple,
                 args=[Src,#b_literal{val=Size},#b_literal{}=Tag]}=I, Ts) ->
-    simplify_is_record(I, get_type(Src, Ts), Size, Tag, Ts);
+    simplify_is_record(I, normalized_type(Src, Ts), Size, Tag, Ts);
 simplify(#b_set{op=put_list,args=[#b_literal{val=H},
                                   #b_literal{val=T}]}, _Ts) ->
     #b_literal{val=[H|T]};
@@ -631,7 +636,7 @@ simplify(I, _Ts) -> I.
 any_non_numeric_argument([#b_literal{val=Lit}|_], _Ts) ->
     is_non_numeric(Lit);
 any_non_numeric_argument([#b_var{}=V|T], Ts) ->
-    is_non_numeric_type(get_type(V, Ts)) orelse any_non_numeric_argument(T, Ts);
+    is_non_numeric_type(raw_type(V, Ts)) orelse any_non_numeric_argument(T, Ts);
 any_non_numeric_argument([], _Ts) -> false.
 
 is_non_numeric([H|T]) ->
@@ -649,7 +654,7 @@ is_non_numeric(_) -> true.
 
 is_non_numeric_tuple(Tuple, El) when El >= 1 ->
     is_non_numeric(element(El, Tuple)) andalso
-	is_non_numeric_tuple(Tuple, El-1);
+        is_non_numeric_tuple(Tuple, El-1);
 is_non_numeric_tuple(_Tuple, 0) -> true.
 
 is_non_numeric_type(#t_atom{}) -> true;
@@ -676,9 +681,11 @@ make_literal_list([_|_], _) ->
 make_literal_list([], Acc) ->
     reverse(Acc).
 
-is_safe_bool_op(Args, Ts) ->
-    [T1,T2] = get_types(Args, Ts),
-    beam_types:is_boolean_type(T1) andalso beam_types:is_boolean_type(T2).
+is_safe_bool_op([LHS, RHS], Ts) ->
+    LType = raw_type(LHS, Ts),
+    RType = raw_type(RHS, Ts),
+    beam_types:is_boolean_type(LType) andalso
+        beam_types:is_boolean_type(RType).
 
 all_same([{H,_}|T]) ->
     all(fun({E,_}) -> E =:= H end, T).
@@ -691,7 +698,7 @@ eval_bif(#b_set{op={bif,Bif},args=Args}=I, Ts) ->
         true ->
             case make_literal_list(Args) of
                 none ->
-                    case get_types(Args, Ts) of
+                    case normalized_types(Args, Ts) of
                         [any] ->
                             I;
                         [Type] ->
@@ -724,8 +731,7 @@ simplify_arg(#b_var{}=Arg0, Sub, Ts) ->
         #b_literal{}=LitArg ->
             LitArg;
         #b_var{}=Arg ->
-            Type = get_type(Arg, Ts),
-            case beam_types:get_singleton_value(Type) of
+            case beam_types:get_singleton_value(raw_type(Arg, Ts)) of
                 {ok, Val} -> #b_literal{val=Val};
                 error -> Arg
             end
@@ -766,7 +772,7 @@ opt_terminator(#b_br{bool=#b_var{}}=Br, Ts, Ds) ->
 opt_terminator(#b_switch{arg=#b_literal{}}=Sw, _Ts, _Ds) ->
     beam_ssa:normalize(Sw);
 opt_terminator(#b_switch{arg=#b_var{}=V}=Sw, Ts, Ds) ->
-    case get_type(V, Ts) of
+    case normalized_type(V, Ts) of
         any ->
             beam_ssa:normalize(Sw);
         Type ->
@@ -796,7 +802,7 @@ opt_switch(#b_switch{fail=Fail,list=List0}=Sw0, Type, Ts, Ds) ->
 prune_switch_list([{_,Fail}|T], Fail, Type, Ts) ->
     prune_switch_list(T, Fail, Type, Ts);
 prune_switch_list([{Arg,_}=Pair|T], Fail, Type, Ts) ->
-    case beam_types:meet(get_type(Arg, Ts), Type) of
+    case beam_types:meet(raw_type(Arg, Ts), Type) of
         none ->
             %% Different types. This value can never match.
             prune_switch_list(T, Fail, Type, Ts);
@@ -805,82 +811,91 @@ prune_switch_list([{Arg,_}=Pair|T], Fail, Type, Ts) ->
     end;
 prune_switch_list([], _, _, _) -> [].
 
-update_successors(#b_br{bool=#b_literal{val=true},succ=S}, Ts, D) ->
-    update_successor(S, Ts, D);
-update_successors(#b_br{bool=#b_var{}=Bool,succ=Succ,fail=Fail}, Ts0, D0) ->
-    case cerl_sets:is_element(Bool, D0#d.once) of
-        true ->
-            %% This variable is defined in this block and is only
-            %% referenced by this br terminator. Therefore, there is
-            %% no need to include it in the type database passed on to
-            %% the successors of this block.
-            Ts = maps:remove(Bool, Ts0),
-            {SuccTs,FailTs} = infer_types_br(Bool, Ts, D0),
-            D = update_successor(Fail, FailTs, D0),
-            update_successor(Succ, SuccTs, D);
-        false ->
-            {SuccTs,FailTs} = infer_types_br(Bool, Ts0, D0),
-            D = update_successor_bool(Bool, false, Fail, FailTs, D0),
-            update_successor_bool(Bool, true, Succ, SuccTs, D)
-    end;
-update_successors(#b_switch{arg=#b_var{}=V,fail=Fail,list=List}, Ts, D0) ->
-    case cerl_sets:is_element(V, D0#d.once) of
-        true ->
-            %% This variable is defined in this block and is only
-            %% referenced by this switch terminator. Therefore, there is
-            %% no need to include it in the type database passed on to
-            %% the successors of this block.
-            D = update_successor(Fail, Ts, D0),
-            F = fun({Val,S}, A) ->
-                        SuccTs0 = infer_types_switch(V, Val, Ts, D),
-                        SuccTs = maps:remove(V, SuccTs0),
-                        update_successor(S, SuccTs, A)
-                end,
-            foldl(F, D, List);
-        false ->
-            %% V can not be equal to any of the values in List at the fail
-            %% block.
-            FailTs = subtract_sw_list(V, List, Ts),
-            D = update_successor(Fail, FailTs, D0),
-            F = fun({Val,S}, A) ->
-                        SuccTs = infer_types_switch(V, Val, Ts, D),
-                        update_successor(S, SuccTs, A)
-                end,
-            foldl(F, D, List)
+update_successors(#b_br{bool=#b_literal{val=true},succ=Succ}=Last, Ts, D0) ->
+    {Last, update_successor(Succ, Ts, D0)};
+update_successors(#b_br{bool=#b_var{}=Bool,succ=Succ,fail=Fail}=Last0,
+                  Ts, D0) ->
+    UsedOnce = cerl_sets:is_element(Bool, D0#d.once),
+    case infer_types_br(Bool, Ts, UsedOnce, D0) of
+        {#{}=SuccTs, #{}=FailTs} ->
+            D1 = update_successor(Succ, SuccTs, D0),
+            D = update_successor(Fail, FailTs, D1),
+            {Last0, D};
+        {#{}=SuccTs, none} ->
+            Last = Last0#b_br{bool=#b_literal{val=true},fail=Succ},
+            {Last, update_successor(Succ, SuccTs, D0)};
+        {none, #{}=FailTs} ->
+            Last = Last0#b_br{bool=#b_literal{val=true},succ=Fail},
+            {Last, update_successor(Fail, FailTs, D0)}
     end;
-update_successors(#b_ret{arg=Arg}, Ts, D) ->
-    FuncId = D#d.func_id,
-    case D#d.ds of
-        #{ Arg := #b_set{op=call,args=[FuncId | _]} } ->
-            %% Returning a call to ourselves doesn't affect our own return
-            %% type.
-            D;
+update_successors(#b_switch{arg=#b_var{}=V,fail=Fail0,list=List0}=Last0,
+                  Ts, D0) ->
+    UsedOnce = cerl_sets:is_element(V, D0#d.once),
+
+    {List1, D1} = update_switch(List0, V, Ts, UsedOnce, [], D0),
+    FailTs = update_switch_failure(V, List0, Ts, UsedOnce, D1),
+
+    case FailTs of
+        none ->
+            %% The fail block is unreachable; swap it with one of the choices.
+            [{_, Fail} | List] = List1,
+            Last = Last0#b_switch{fail=Fail,list=List},
+            {Last, D1};
         #{} ->
-            RetType = beam_types:join([get_type(Arg, Ts) | D#d.ret_type]),
-            D#d{ret_type=[RetType]}
-    end.
+            D = update_successor(Fail0, FailTs, D1),
+            Last = Last0#b_switch{list=List1},
+            {Last, D}
+    end;
+update_successors(#b_ret{arg=Arg}=Last, Ts, D0) ->
+    FuncId = D0#d.func_id,
+    D = case D0#d.ds of
+            #{ Arg := #b_set{op=call,args=[FuncId | _]} } ->
+                %% Returning a call to ourselves doesn't affect our own return
+                %% type.
+                D0;
+            #{} ->
+                RetType = beam_types:join([raw_type(Arg, Ts) | D0#d.ret_type]),
+                D0#d{ret_type=[RetType]}
+        end,
+    {Last, D}.
 
-subtract_sw_list(V, List, Ts) ->
-    Ts#{ V := sub_sw_list_1(get_type(V, Ts), List, Ts) }.
+update_switch([{Val, Lbl}=Sw | List], V, Ts, UsedOnce, Acc, D0) ->
+    case infer_types_switch(V, Val, Ts, UsedOnce, D0) of
+        none ->
+            update_switch(List, V, Ts, UsedOnce, Acc, D0);
+        SwTs ->
+            D = update_successor(Lbl, SwTs, D0),
+            update_switch(List, V, Ts, UsedOnce, [Sw | Acc], D)
+    end;
+update_switch([], _V, _Ts, _UsedOnce, Acc, D) ->
+    {reverse(Acc), D}.
+
+update_switch_failure(V, List, Ts, UsedOnce, D) ->
+    case sub_sw_list_1(raw_type(V, Ts), List, Ts) of
+        none ->
+            none;
+        FailType ->
+            case beam_types:get_singleton_value(FailType) of
+                {ok, Value} ->
+                    %% This is the only possible value at the fail label, so we
+                    %% can infer types as if we matched it directly.
+                    Lit = #b_literal{val=Value},
+                    infer_types_switch(V, Lit, Ts, UsedOnce, D);
+                error when UsedOnce ->
+                    ts_remove_var(V, Ts);
+                error ->
+                    Ts
+            end
+    end.
 
 sub_sw_list_1(Type, [{Val,_}|T], Ts) ->
-    ValType = get_type(Val, Ts),
+    ValType = raw_type(Val, Ts),
     sub_sw_list_1(beam_types:subtract(Type, ValType), T, Ts);
 sub_sw_list_1(Type, [], _Ts) ->
     Type.
 
-update_successor_bool(#b_var{}=Var, BoolValue, S, Ts, D) ->
-    case beam_types:is_boolean_type(get_type(Var, Ts)) of
-        true ->
-            update_successor(S, Ts#{ Var := beam_types:make_atom(BoolValue) }, D);
-        false ->
-            %% The `br` terminator is preceeded by an instruction that
-            %% does not produce a boolean value, such a `new_try_tag`.
-            update_successor(S, Ts, D)
-    end.
-
-update_successor(?BADARG_BLOCK, _Ts, #d{}=D) ->
-    %% We KNOW that no variables are used in the ?BADARG_BLOCK,
+update_successor(?EXCEPTION_BLOCK, _Ts, #d{}=D) ->
+    %% We KNOW that no variables are used in the ?EXCEPTION_BLOCK,
     %% so there is no need to update the type information. That
     %% can be a huge timesaver for huge functions.
     D;
@@ -898,10 +913,11 @@ update_types(#b_set{op=Op,dst=Dst,args=Args}, Ts, Ds) ->
     Ts#{Dst=>T}.
 
 type(phi, Args, Ts, _Ds) ->
-    Types = [get_type(A, Ts) || {A,_} <- Args],
+    Types = [raw_type(A, Ts) || {A,_} <- Args],
     beam_types:join(Types);
 type({bif,Bif}, Args, Ts, _Ds) ->
-    {RetType, _, _} = beam_call_types:types(erlang, Bif, get_types(Args, Ts)),
+    ArgTypes = normalized_types(Args, Ts),
+    {RetType, _, _} = beam_call_types:types(erlang, Bif, ArgTypes),
     RetType;
 type(bs_init, _Args, _Ts, _Ds) ->
     #t_bitstring{};
@@ -914,10 +930,11 @@ type(bs_get_tail, _Args, _Ts, _Ds) ->
     #t_bitstring{};
 type(call, [#b_remote{mod=#b_literal{val=Mod},
                       name=#b_literal{val=Name}}|Args], Ts, _Ds) ->
-    {RetType, _, _} = beam_call_types:types(Mod, Name, get_types(Args, Ts)),
+    ArgTypes = normalized_types(Args, Ts),
+    {RetType, _, _} = beam_call_types:types(Mod, Name, ArgTypes),
     RetType;
 type(get_tuple_element, [Tuple, Offset], Ts, _Ds) ->
-    #t_tuple{size=Size,elements=Es} = get_type(Tuple, Ts),
+    #t_tuple{size=Size,elements=Es} = normalized_type(Tuple, Ts),
     #b_literal{val=N} = Offset,
     true = Size > N, %Assertion.
     beam_types:get_element_type(N + 1, Es);
@@ -933,7 +950,7 @@ type(put_list, _Args, _Ts, _Ds) ->
     cons;
 type(put_tuple, Args, Ts, _Ds) ->
     {Es, _} = foldl(fun(Arg, {Es0, Index}) ->
-                            Type = get_type(Arg, Ts),
+                            Type = raw_type(Arg, Ts),
                             Es = beam_types:set_element_type(Index, Type, Es0),
                             {Es, Index + 1}
                     end, {#{}, 1}, Args),
@@ -941,7 +958,7 @@ type(put_tuple, Args, Ts, _Ds) ->
 type(succeeded, [#b_var{}=Src], Ts, Ds) ->
     case maps:get(Src, Ds) of
         #b_set{op={bif,Bif},args=BifArgs} ->
-            Types = get_types(BifArgs, Ts),
+            Types = normalized_types(BifArgs, Ts),
             case {Bif,Types} of
                 {BoolOp,[T1,T2]} when BoolOp =:= 'and'; BoolOp =:= 'or' ->
                     BothBool = beam_types:is_boolean_type(T1) andalso
@@ -1138,7 +1155,7 @@ simplify_is_record(I, #t_tuple{exact=Exact,
                    {ok, _} -> no;
                    error ->
                        %% Is it at all possible for the tag to match?
-                       case beam_types:meet(get_type(RecTag, Ts), TagType) of
+                       case beam_types:meet(raw_type(RecTag, Ts), TagType) of
                            none -> no;
                            _ -> maybe
                        end
@@ -1168,7 +1185,7 @@ simplify_switch_bool(#b_switch{arg=B,fail=Fail,list=List0}, Ts, Ds) ->
 simplify_not(#b_br{bool=#b_var{}=V,succ=Succ,fail=Fail}=Br0, Ts, Ds) ->
     case Ds of
         #{V:=#b_set{op={bif,'not'},args=[Bool]}} ->
-            case beam_types:is_boolean_type(get_type(Bool, Ts)) of
+            case beam_types:is_boolean_type(raw_type(Bool, Ts)) of
                 true ->
                     Br = Br0#b_br{bool=Bool,succ=Fail,fail=Succ},
                     beam_ssa:normalize(Br);
@@ -1236,16 +1253,18 @@ used_once_last_uses([V|Vs], L, Uses) ->
     end;
 used_once_last_uses([], _, Uses) -> Uses.
 
+normalized_types(Values, Ts) ->
+    [normalized_type(Val, Ts) || Val <- Values].
+
+normalized_type(V, Ts) ->
+    beam_types:normalize(raw_type(V, Ts)).
 
-get_types(Values, Ts) ->
-    [get_type(Val, Ts) || Val <- Values].
--spec get_type(beam_ssa:value(), type_db()) -> type().
+-spec raw_type(beam_ssa:value(), type_db()) -> type().
 
-get_type(#b_var{}=V, Ts) ->
-    #{V:=T} = Ts,
-    T;
-get_type(#b_literal{val=Val}, _Ts) ->
-    beam_types:make_type_from_value(Val).
+raw_type(#b_literal{val=Value}, _Ts) ->
+    beam_types:make_type_from_value(Value);
+raw_type(V, Ts) ->
+    map_get(V, Ts).
 
 %% infer_types(Var, Types, #d{}) -> {SuccTypes,FailTypes}
 %%  Looking at the expression that defines the variable Var, infer
@@ -1268,82 +1287,67 @@ get_type(#b_literal{val=Val}, _Ts) ->
 %%  'cons' would give 'nil' as the only possible type. The result of the
 %%  subtraction for L will be added to FailTypes.
 
-infer_types_br(#b_var{}=V, Ts, #d{ds=Ds}) ->
+infer_types_br(#b_var{}=V, Ts, UsedOnce, #d{ds=Ds}) ->
     #{V:=#b_set{op=Op,args=Args}} = Ds,
-    {PosTypes0, NegTypes0} = infer_type(Op, Args, Ts, Ds),
 
-    %% We must be careful with types inferred from '=:='.
-    %%
-    %% If we have seen L =:= [a], we know that L is 'cons' if the
-    %% comparison succeeds. However, if the comparison fails, L could
-    %% still be 'cons'. Therefore, we must not subtract 'cons' from the
-    %% previous type of L.
-    %%
-    %% However, it is safe to subtract a type inferred from '=:=' if
-    %% it is single-valued, e.g. if it is [] or the atom 'true'.
-
-    EqTypes = infer_eq_type(Op, Args, Ts, Ds),
-    NegTypes1 = [P || {_,T}=P <- EqTypes, beam_types:is_singleton_type(T)],
+    {PosTypes, NegTypes} = infer_type(Op, Args, Ts, Ds),
 
-    PosTypes = EqTypes ++ PosTypes0,
-    SuccTs = meet_types(PosTypes, Ts),
+    SuccTs0 = meet_types(PosTypes, Ts),
+    FailTs0 = subtract_types(NegTypes, Ts),
 
-    NegTypes = NegTypes0 ++ NegTypes1,
-    FailTs = subtract_types(NegTypes, Ts),
+    case UsedOnce of
+        true ->
+            %% The branch variable is defined in this block and is only
+            %% referenced by this terminator. Therefore, there is no need to
+            %% include it in the type database passed on to the successors of
+            %% of this block.
+            SuccTs = ts_remove_var(V, SuccTs0),
+            FailTs = ts_remove_var(V, FailTs0),
+            {SuccTs, FailTs};
+        false ->
+            SuccTs = infer_br_value(V, true, SuccTs0),
+            FailTs = infer_br_value(V, false, FailTs0),
+            {SuccTs, FailTs}
+    end.
 
-    {SuccTs,FailTs}.
+infer_br_value(_V, _Bool, none) ->
+    none;
+infer_br_value(V, Bool, NewTs) ->
+    #{ V := T } = NewTs,
+    case beam_types:is_boolean_type(T) of
+        true ->
+            NewTs#{ V := beam_types:make_atom(Bool) };
+        false ->
+            %% V is a try/catch tag or similar, leave it alone.
+            NewTs
+    end.
 
-infer_types_switch(V, Lit, Ts, #d{ds=Ds}) ->
-    Types = infer_eq_type({bif,'=:='}, [V, Lit], Ts, Ds),
-    meet_types(Types, Ts).
+infer_types_switch(V, Lit, Ts0, UsedOnce, #d{ds=Ds}) ->
+    {PosTypes, _} = infer_type({bif,'=:='}, [V, Lit], Ts0, Ds),
+    Ts = meet_types(PosTypes, Ts0),
+    case UsedOnce of
+        true -> ts_remove_var(V, Ts);
+        false -> Ts
+    end.
 
-infer_eq_type({bif,'=:='}, [#b_var{}=Src,#b_literal{}=Lit], Ts, Ds) ->
-    Def = maps:get(Src, Ds),
-    Type = get_type(Lit, Ts),
-    [{Src,Type} | infer_eq_lit(Def, Lit)];
-infer_eq_type({bif,'=:='}, [#b_var{}=Arg0,#b_var{}=Arg1], Ts, _Ds) ->
-    %% As an example, assume that L1 is known to be 'list', and L2 is
-    %% known to be 'cons'. Then if 'L1 =:= L2' evaluates to 'true', it can
-    %% be inferred that L1 is 'cons' (the meet of 'cons' and 'list').
-    Type0 = get_type(Arg0, Ts),
-    Type1 = get_type(Arg1, Ts),
-    Type = beam_types:meet(Type0, Type1),
-    [{V,MeetType} ||
-        {V,OrigType,MeetType} <-
-            [{Arg0,Type0,Type},{Arg1,Type1,Type}],
-        OrigType =/= MeetType];
-infer_eq_type(_Op, _Args, _Ts, _Ds) ->
-    [].
-
-infer_eq_lit(#b_set{op={bif,tuple_size},args=[#b_var{}=Tuple]},
-             #b_literal{val=Size}) when is_integer(Size) ->
-    [{Tuple,#t_tuple{exact=true,size=Size}}];
-infer_eq_lit(#b_set{op=get_tuple_element,
-                    args=[#b_var{}=Tuple,#b_literal{val=N}]},
-             #b_literal{}=Lit) ->
-    Index = N + 1,
-    Es = beam_types:set_element_type(Index, get_type(Lit, #{}), #{}),
-    [{Tuple,#t_tuple{size=Index,elements=Es}}];
-infer_eq_lit(_, _) -> [].
+ts_remove_var(_V, none) -> none;
+ts_remove_var(V, Ts) -> maps:remove(V, Ts).
 
 infer_type(succeeded, [#b_var{}=Src], Ts, Ds) ->
     #b_set{op=Op,args=Args} = maps:get(Src, Ds),
-    infer_type(Op, Args, Ts, Ds);
-infer_type(bs_start_match, [#b_var{}=Bin], _Ts, _Ds) ->
-    T = {Bin,#t_bitstring{}},
-    {[T], [T]};
-infer_type(is_nonempty_list, [#b_var{}=Src], _Ts, _Ds) ->
-    T = {Src,cons},
-    {[T], [T]};
+    infer_success_type(Op, Args, Ts, Ds);
+
+%% Type tests are handled separately from other BIFs as we're inferring types
+%% based on their result, so we know that subtraction is safe even if we're
+%% not branching on 'succeeded'.
 infer_type(is_tagged_tuple, [#b_var{}=Src,#b_literal{val=Size},
                              #b_literal{}=Tag], _Ts, _Ds) ->
-    Es = beam_types:set_element_type(1, get_type(Tag, #{}), #{}),
+    Es = beam_types:set_element_type(1, raw_type(Tag, #{}), #{}),
     T = {Src,#t_tuple{exact=true,size=Size,elements=Es}},
     {[T], [T]};
-
-%% Type tests are handled separately from other BIFs as we're inferring types
-%% based on their result rather than whether they succeeded, so we know that
-%% subtraction is always safe.
+infer_type(is_nonempty_list, [#b_var{}=Src], _Ts, _Ds) ->
+    T = {Src,cons},
+    {[T], [T]};
 infer_type({bif,is_atom}, [Arg], _Ts, _Ds) ->
     T = {Arg, #t_atom{}},
     {[T], [T]};
@@ -1374,9 +1378,43 @@ infer_type({bif,is_number}, [Arg], _Ts, _Ds) ->
 infer_type({bif,is_tuple}, [Arg], _Ts, _Ds) ->
     T = {Arg, #t_tuple{}},
     {[T], [T]};
+infer_type({bif,'=:='}, [#b_var{}=LHS,#b_var{}=RHS], Ts, _Ds) ->
+    %% As an example, assume that L1 is known to be 'list', and L2 is
+    %% known to be 'cons'. Then if 'L1 =:= L2' evaluates to 'true', it can
+    %% be inferred that L1 is 'cons' (the meet of 'cons' and 'list').
+    LType = raw_type(LHS, Ts),
+    RType = raw_type(RHS, Ts),
+    Type = beam_types:meet(LType, RType),
+
+    PosTypes = [{V,Type} || {V, OrigType} <- [{LHS, LType}, {RHS, RType}],
+                            OrigType =/= Type],
+
+    %% We must be careful with types inferred from '=:='.
+    %%
+    %% If we have seen L =:= [a], we know that L is 'cons' if the
+    %% comparison succeeds. However, if the comparison fails, L could
+    %% still be 'cons'. Therefore, we must not subtract 'cons' from the
+    %% previous type of L.
+    %%
+    %% However, it is safe to subtract a type inferred from '=:=' if
+    %% it is single-valued, e.g. if it is [] or the atom 'true'.
+    NegTypes = case beam_types:is_singleton_type(Type) of
+                   true -> PosTypes;
+                   false -> []
+               end,
 
-infer_type({bif,Op}, Args, Ts, _Ds) ->
-    ArgTypes = get_types(Args, Ts),
+    {PosTypes, NegTypes};
+infer_type({bif,'=:='}, [#b_var{}=Src,#b_literal{}=Lit], Ts, Ds) ->
+    Def = maps:get(Src, Ds),
+    Type = raw_type(Lit, Ts),
+    EqLitTypes = infer_eq_lit(Def, Lit),
+    PosTypes = [{Src,Type} | EqLitTypes],
+    {PosTypes, EqLitTypes};
+infer_type(_Op, _Args, _Ts, _Ds) ->
+    {[], []}.
+
+infer_success_type({bif,Op}, Args, Ts, _Ds) ->
+    ArgTypes = normalized_types(Args, Ts),
 
     {_, PosTypes0, CanSubtract} = beam_call_types:types(erlang, Op, ArgTypes),
     PosTypes = [T || {#b_var{},_}=T <- zip(Args, PosTypes0)],
@@ -1385,10 +1423,24 @@ infer_type({bif,Op}, Args, Ts, _Ds) ->
         true -> {PosTypes, PosTypes};
         false -> {PosTypes, []}
     end;
-
-infer_type(_Op, _Args, _Ts, _Ds) ->
+infer_success_type(bs_start_match, [#b_var{}=Bin], _Ts, _Ds) ->
+    T = {Bin,#t_bitstring{}},
+    {[T], [T]};
+infer_success_type(_Op, _Args, _Ts, _Ds) ->
     {[], []}.
 
+infer_eq_lit(#b_set{op={bif,tuple_size},args=[#b_var{}=Tuple]},
+             #b_literal{val=Size}) when is_integer(Size) ->
+    [{Tuple,#t_tuple{exact=true,size=Size}}];
+infer_eq_lit(#b_set{op=get_tuple_element,
+                    args=[#b_var{}=Tuple,#b_literal{val=N}]},
+             #b_literal{}=Lit) ->
+    Index = N + 1,
+    Es = beam_types:set_element_type(Index, raw_type(Lit, #{}), #{}),
+    [{Tuple,#t_tuple{size=Index,elements=Es}}];
+infer_eq_lit(_, _) ->
+    [].
+
 join_types(Ts0, Ts1) ->
     if
         map_size(Ts0) < map_size(Ts1) ->
@@ -1417,6 +1469,7 @@ join_types_1([], Ts0, Ts1) ->
 meet_types([{V,T0}|Vs], Ts) ->
     #{V:=T1} = Ts,
     case beam_types:meet(T0, T1) of
+        none -> none;
         T1 -> meet_types(Vs, Ts);
         T -> meet_types(Vs, Ts#{V:=T})
     end;
@@ -1425,7 +1478,9 @@ meet_types([], Ts) -> Ts.
 subtract_types([{V,T0}|Vs], Ts) ->
     #{V:=T1} = Ts,
     case beam_types:subtract(T1, T0) of
+        none -> none;
         T1 -> subtract_types(Vs, Ts);
         T -> subtract_types(Vs, Ts#{V:=T})
     end;
 subtract_types([], Ts) -> Ts.
+
diff --git a/lib/compiler/src/beam_types.erl b/lib/compiler/src/beam_types.erl
index 07d3c3d3f2..821ccd31bb 100644
--- a/lib/compiler/src/beam_types.erl
+++ b/lib/compiler/src/beam_types.erl
@@ -22,14 +22,14 @@
 
 -include("beam_types.hrl").
 
--import(lists, [foldl/3]).
+-import(lists, [foldl/3, reverse/1, reverse/2]).
 
 -export([meet/1, meet/2, join/1, join/2, subtract/2]).
 
 -export([get_singleton_value/1,
-         get_tuple_size/1,
          is_singleton_type/1,
-         is_boolean_type/1]).
+         is_boolean_type/1,
+         normalize/1]).
 
 -export([get_element_type/2, set_element_type/3]).
 
@@ -38,210 +38,276 @@
 -export([make_atom/1,
          make_boolean/0,
          make_integer/1,
-         make_integer/2,
-         make_tuple/2,
-         make_tuple/3]).
+         make_integer/2]).
 
-%% Return the "join" of Type1 and Type2. The join is a more general
-%% type than Type1 and Type2. For example:
+-define(IS_LIST_TYPE(N),
+        N =:= list orelse
+        N =:= cons orelse
+        N =:= nil).
+
+-define(IS_NUMBER_TYPE(N),
+        N =:= number orelse
+        N =:= float orelse
+        is_record(N, t_integer)).
+
+-define(TUPLE_SET_LIMIT, 20).
+
+%% Folds meet/2 over a list.
+
+-spec meet([type()]) -> type().
+
+meet([T1, T2 | Ts]) ->
+    meet([meet(T1, T2) | Ts]);
+meet([T]) -> T.
+
+%% Return the "meet" of Type1 and Type2, which is more general than Type1 and
+%% Type2. This is identical to glb/2 but can operate on and produce unions.
 %%
-%%    join(#t_integer{elements=any}, #t_integer=elements={0,3}}) ->
-%%         #t_integer{}
+%%    A = #t_union{list=nil, number=[number], other=[#t_map{}]}
+%%    B = #t_union{number=[#t_integer{}], other=[#t_map{}]}
+%%
+%%    meet(A, B) ->
+%%         #t_union{number=[#t_integer{}], other=[#t_map{}]}
 %%
-%% The join for two different types result in 'any', which is
-%% the top element for our type lattice:
+%% The meet of two different types result in 'none', which is the bottom
+%% element for our type lattice:
 %%
-%%    join(#t_integer{}, #t_map{}) -> any
+%%    meet(#t_integer{}, #t_map{}) -> none
 
--spec join(type(), type()) -> type().
+-spec meet(type(), type()) -> type().
 
-join(T, T) ->
+meet(T, T) ->
     verified_type(T);
-join(none, T) ->
+meet(any, T) ->
     verified_type(T);
-join(T, none) ->
+meet(T, any) ->
     verified_type(T);
-join(any, _) -> any;
-join(_, any) -> any;
-join(#t_atom{elements=[_|_]=Set1}, #t_atom{elements=[_|_]=Set2}) ->
-    Set = ordsets:union(Set1, Set2),
-    case ordsets:size(Set) of
-        Size when Size =< ?ATOM_SET_SIZE ->
-            #t_atom{elements=Set};
-        _Size ->
-            #t_atom{elements=any}
+meet(#t_union{}=A, B) ->
+    meet_unions(A, B);
+meet(A, #t_union{}=B) ->
+    meet_unions(B, A);
+meet(A, B) ->
+    glb(A, B).
+
+meet_unions(#t_union{atom=AtomA,list=ListA,number=NumberA,
+                     tuple_set=TSetA,other=OtherA},
+            #t_union{atom=AtomB,list=ListB,number=NumberB,
+                     tuple_set=TSetB,other=OtherB}) ->
+    Union = #t_union{atom=glb(AtomA, AtomB),
+                     list=glb(ListA, ListB),
+                     number=glb(NumberA, NumberB),
+                     tuple_set=meet_tuple_sets(TSetA, TSetB),
+                     other=glb(OtherA, OtherB)},
+    shrink_union(Union);
+meet_unions(#t_union{atom=AtomA}, #t_atom{}=B) ->
+    case glb(AtomA, B) of
+        none -> none;
+        Atom -> Atom
     end;
-join(#t_atom{elements=any}=T, #t_atom{elements=[_|_]}) -> T;
-join(#t_atom{elements=[_|_]}, #t_atom{elements=any}=T) -> T;
-join(#t_bitstring{unit=U1}, #t_bitstring{unit=U2}) ->
-    #t_bitstring{unit=gcd(U1, U2)};
-join(#t_fun{}, #t_fun{}) ->
-    #t_fun{};
-join(#t_integer{elements={MinA,MaxA}},
-     #t_integer{elements={MinB,MaxB}}) ->
-    #t_integer{elements={min(MinA,MinB),max(MaxA,MaxB)}};
-join(#t_bs_context{slots=SlotsA,valid=ValidA},
-     #t_bs_context{slots=SlotsB,valid=ValidB}) ->
-    #t_bs_context{slots=min(SlotsA, SlotsB),
-                  valid=ValidA band ValidB};
-join(#t_integer{}, #t_integer{}) -> #t_integer{};
-join(list, cons) -> list;
-join(cons, list) -> list;
-join(nil, cons) -> list;
-join(cons, nil) -> list;
-join(nil, list) -> list;
-join(list, nil) -> list;
-join(#t_integer{}, float) -> number;
-join(float, #t_integer{}) -> number;
-join(#t_integer{}, number) -> number;
-join(number, #t_integer{}) -> number;
-join(float, number) -> number;
-join(number, float) -> number;
-join(#t_tuple{size=Sz,exact=ExactA,elements=EsA},
-     #t_tuple{size=Sz,exact=ExactB,elements=EsB}) ->
-    Exact = ExactA and ExactB,
-    Es = join_tuple_elements(Sz, EsA, EsB),
-    #t_tuple{size=Sz,exact=Exact,elements=Es};
-join(#t_tuple{size=SzA,elements=EsA}, #t_tuple{size=SzB,elements=EsB}) ->
-    Sz = min(SzA, SzB),
-    Es = join_tuple_elements(Sz, EsA, EsB),
-    #t_tuple{size=Sz,elements=Es};
-join(_T1, _T2) ->
-    %%io:format("~p ~p\n", [_T1,_T2]),
-    any.
+meet_unions(#t_union{number=NumberA}, B) when ?IS_NUMBER_TYPE(B) ->
+    case glb(NumberA, B) of
+        none -> none;
+        Number -> Number
+    end;
+meet_unions(#t_union{list=ListA}, B) when ?IS_LIST_TYPE(B) ->
+    case glb(ListA, B) of
+        none -> none;
+        List -> List
+    end;
+meet_unions(#t_union{tuple_set=Tuples}, #t_tuple{}=B) ->
+    Set = meet_tuple_sets(Tuples, new_tuple_set(B)),
+    shrink_union(#t_union{tuple_set=Set});
+meet_unions(#t_union{other=OtherA}, OtherB) ->
+    case glb(OtherA, OtherB) of
+        none -> none;
+        Other -> Other
+    end.
 
-join_tuple_elements(MinSize, EsA, EsB) ->
-    Es0 = join_elements(EsA, EsB),
-    maps:filter(fun(Index, _Type) -> Index =< MinSize end, Es0).
+meet_tuple_sets(none, _) ->
+    none;
+meet_tuple_sets(_, none) ->
+    none;
+meet_tuple_sets(#t_tuple{}=A, #t_tuple{}=B) ->
+    new_tuple_set(glb(A, B));
+meet_tuple_sets(#t_tuple{}=Tuple, Records) ->
+    mts_tuple(Records, Tuple, []);
+meet_tuple_sets(Records, #t_tuple{}=Tuple) ->
+    meet_tuple_sets(Tuple, Records);
+meet_tuple_sets(RecordsA, RecordsB) ->
+    mts_records(RecordsA, RecordsB).
+
+mts_tuple([{Key, Type} | Records], Tuple, Acc) ->
+    case glb(Type, Tuple) of
+        none -> mts_tuple(Records, Tuple, Acc);
+        T -> mts_tuple(Records, Tuple, [{Key, T} | Acc])
+    end;
+mts_tuple([], _Tuple, [_|_]=Acc) ->
+    reverse(Acc);
+mts_tuple([], _Tuple, []) ->
+    none.
 
-join_elements(Es1, Es2) ->
-    Keys = if
-               map_size(Es1) =< map_size(Es2) -> maps:keys(Es1);
-               map_size(Es1) > map_size(Es2) -> maps:keys(Es2)
-           end,
-    join_elements_1(Keys, Es1, Es2, #{}).
+mts_records(RecordsA, RecordsB) ->
+    mts_records(RecordsA, RecordsB, []).
 
-join_elements_1([Key | Keys], Es1, Es2, Acc0) ->
-    case {Es1, Es2} of
-        {#{ Key := Type1 }, #{ Key := Type2 }} ->
-            Acc = set_element_type(Key, join(Type1, Type2), Acc0),
-            join_elements_1(Keys, Es1, Es2, Acc);
-        {#{}, #{}} ->
-            join_elements_1(Keys, Es1, Es2, Acc0)
+mts_records([{Key, A} | RsA], [{Key, B} | RsB], Acc) ->
+    case glb(A, B) of
+        none -> mts_records(RsA, RsB, Acc);
+        T -> mts_records(RsA, RsB, [{Key, T} | Acc])
     end;
-join_elements_1([], _Es1, _Es2, Acc) ->
-    Acc.
+mts_records([{KeyA, _} | _ ]=RsA, [{KeyB, _} | RsB], Acc) when KeyA > KeyB ->
+    mts_records(RsA, RsB, Acc);
+mts_records([{KeyA, _} | RsA], [{KeyB, _} | _] = RsB, Acc) when KeyA < KeyB ->
+    mts_records(RsA, RsB, Acc);
+mts_records(_RsA, [], [_|_]=Acc) ->
+    reverse(Acc);
+mts_records([], _RsB, [_|_]=Acc) ->
+    reverse(Acc);
+mts_records(_RsA, _RsB, []) ->
+    none.
 
-gcd(A, B) ->
-    case A rem B of
-        0 -> B;
-        X -> gcd(B, X)
-    end.
+%% Folds join/2 over a list.
 
-%% Joins all the given types into a single type.
 -spec join([type()]) -> type().
 
-join([T1,T2|Ts]) ->
-    join([join(T1, T2)|Ts]);
+join([T1, T2| Ts]) ->
+    join([join(T1, T2) | Ts]);
 join([T]) -> T.
 
-%% Return the "meet" of Type1 and Type2. The meet is a narrower
-%% type than Type1 and Type2. For example:
-%%
-%%    meet(#t_integer{elements=any}, #t_integer{elements={0,3}}) ->
-%%         #t_integer{elements={0,3}}
+%% Return the "join" of Type1 and Type2, which is more general than Type1 and
+%% Type2. This is identical to lub/2 but can operate on and produce unions.
 %%
-%% The meet for two different types result in 'none', which is
-%% the bottom element for our type lattice:
-%%
-%%    meet(#t_integer{}, #t_map{}) -> none
+%%    join(#t_integer{}, #t_map{}) -> #t_union{number=[#t_integer{}],
+%%                                             other=[#t_map{}]}
 
--spec meet(type(), type()) -> type().
+-spec join(type(), type()) -> type().
 
-meet(T, T) ->
-    verified_type(T);
-meet(#t_atom{elements=[_|_]=Set1}, #t_atom{elements=[_|_]=Set2}) ->
-    case ordsets:intersection(Set1, Set2) of
-        [] ->
-            none;
-        [_|_]=Set ->
-            #t_atom{elements=Set}
+join(T, T) -> T;
+join(_T, any) -> any;
+join(any, _T) -> any;
+join(T, none) -> T;
+join(none, T) -> T;
+
+join(#t_union{}=A, B) ->
+    join_unions(A, B);
+join(A, #t_union{}=B) ->
+    join_unions(B, A);
+
+%% Union creation...
+join(#t_atom{}=A, #t_atom{}=B) ->
+    lub(A, B);
+join(#t_atom{}=A, B) when ?IS_LIST_TYPE(B) ->
+    #t_union{atom=A,list=B};
+join(#t_atom{}=A, B) when ?IS_NUMBER_TYPE(B) ->
+    #t_union{atom=A,number=B};
+join(#t_atom{}=A, #t_tuple{}=B) ->
+    #t_union{atom=A,tuple_set=new_tuple_set(B)};
+join(#t_atom{}=A, B) ->
+    #t_union{atom=A,other=B};
+join(A, #t_atom{}=B) ->
+    join(B, A);
+
+join(A, B) when ?IS_LIST_TYPE(A), ?IS_LIST_TYPE(B) ->
+    lub(A, B);
+join(A, B) when ?IS_LIST_TYPE(A), ?IS_NUMBER_TYPE(B) ->
+    #t_union{list=A,number=B};
+join(A, #t_tuple{}=B) when ?IS_LIST_TYPE(A) ->
+    #t_union{list=A,tuple_set=new_tuple_set(B)};
+join(A, B) when ?IS_LIST_TYPE(A) ->
+    #t_union{list=A,other=B};
+join(A, B) when ?IS_LIST_TYPE(B) ->
+    join(B, A);
+
+join(A, B) when ?IS_NUMBER_TYPE(A), ?IS_NUMBER_TYPE(B) ->
+    lub(A, B);
+join(A, #t_tuple{}=B) when ?IS_NUMBER_TYPE(A) ->
+    #t_union{number=A,tuple_set=new_tuple_set(B)};
+join(A, B) when ?IS_NUMBER_TYPE(A) ->
+    #t_union{number=A,other=B};
+join(A, B) when ?IS_NUMBER_TYPE(B) ->
+    join(B, A);
+
+join(#t_tuple{}=A, #t_tuple{}=B) ->
+    case {record_key(A), record_key(B)} of
+        {Same, Same} ->
+            lub(A, B);
+        {none, _Key} ->
+            lub(A, B);
+        {_Key, none} ->
+            lub(A, B);
+        {KeyA, KeyB} when KeyA < KeyB ->
+            #t_union{tuple_set=[{KeyA, A}, {KeyB, B}]};
+        {KeyA, KeyB} when KeyA > KeyB ->
+            #t_union{tuple_set=[{KeyB, B}, {KeyA, A}]}
     end;
-meet(#t_atom{elements=[_|_]}=T, #t_atom{elements=any}) ->
-    T;
-meet(#t_atom{elements=any}, #t_atom{elements=[_|_]}=T) ->
-    T;
-meet(#t_fun{arity=any}, #t_fun{}=T) ->
-    T;
-meet(#t_fun{}=T, #t_fun{arity=any}) ->
-    T;
-meet(#t_integer{elements={_,_}}=T, #t_integer{elements=any}) ->
-    T;
-meet(#t_integer{elements=any}, #t_integer{elements={_,_}}=T) ->
-    T;
-meet(#t_integer{elements={MinA,MaxA}}, #t_integer{elements={MinB,MaxB}})
-  when MinA >= MinB, MaxA =< MaxB;
-       MinB >= MinA, MaxB =< MaxA ->
-    #t_integer{elements={max(MinA, MinB),min(MaxA, MaxB)}};
-meet(#t_integer{}=T, number) -> T;
-meet(float=T, number) -> T;
-meet(number, #t_integer{}=T) -> T;
-meet(number, float=T) -> T;
-meet(list, cons) -> cons;
-meet(list, nil) -> nil;
-meet(cons, list) -> cons;
-meet(nil, list) -> nil;
-meet(#t_tuple{}=T1, #t_tuple{}=T2) ->
-    meet_tuples(T1, T2);
-meet(#t_bitstring{unit=U1}, #t_bitstring{unit=U2}) ->
-    #t_bitstring{unit=U1 * U2 div gcd(U1, U2)};
-meet(any, T) ->
-    verified_type(T);
-meet(T, any) ->
-    verified_type(T);
-meet(_, _) ->
-    %% Inconsistent types. There will be an exception at runtime.
-    none.
-
-meet_tuples(#t_tuple{size=Sz1,exact=true},
-            #t_tuple{size=Sz2,exact=true}) when Sz1 =/= Sz2 ->
-    none;
-meet_tuples(#t_tuple{size=Sz1,exact=Ex1,elements=Es1},
-            #t_tuple{size=Sz2,exact=Ex2,elements=Es2}) ->
-    Size = max(Sz1, Sz2),
-    Exact = Ex1 or Ex2,
-    case meet_elements(Es1, Es2) of
-        none ->
-            none;
-        Es ->
-            #t_tuple{size=Size,exact=Exact,elements=Es}
+join(#t_tuple{}=A, B) ->
+    %% All other combinations have been tried already, so B must be 'other'
+    #t_union{tuple_set=new_tuple_set(A),other=B};
+join(A, #t_tuple{}=B) ->
+    join(B, A);
+
+join(A, B) ->
+    lub(A, B).
+
+join_unions(#t_union{atom=AtomA,list=ListA,number=NumberA,
+                     tuple_set=TSetA,other=OtherA},
+            #t_union{atom=AtomB,list=ListB,number=NumberB,
+                     tuple_set=TSetB,other=OtherB}) ->
+    Union = #t_union{atom=lub(AtomA, AtomB),
+                     list=lub(ListA, ListB),
+                     number=lub(NumberA, NumberB),
+                     tuple_set=join_tuple_sets(TSetA, TSetB),
+                     other=lub(OtherA, OtherB)},
+    shrink_union(Union);
+join_unions(#t_union{atom=AtomA}=A, #t_atom{}=B) ->
+    A#t_union{atom=lub(AtomA, B)};
+join_unions(#t_union{list=ListA}=A, B) when ?IS_LIST_TYPE(B) ->
+    A#t_union{list=lub(ListA, B)};
+join_unions(#t_union{number=NumberA}=A, B) when ?IS_NUMBER_TYPE(B) ->
+    A#t_union{number=lub(NumberA, B)};
+join_unions(#t_union{tuple_set=TSetA}=A, #t_tuple{}=B) ->
+    Set = join_tuple_sets(TSetA, new_tuple_set(B)),
+    shrink_union(A#t_union{tuple_set=Set});
+join_unions(#t_union{other=OtherA}=A, B) ->
+    case lub(OtherA, B) of
+        any -> any;
+        T -> A#t_union{other=T}
     end.
 
-meet_elements(Es1, Es2) ->
-    Keys = maps:keys(Es1) ++ maps:keys(Es2),
-    meet_elements_1(Keys, Es1, Es2, #{}).
-
-meet_elements_1([Key | Keys], Es1, Es2, Acc) ->
-    case {Es1, Es2} of
-        {#{ Key := Type1 }, #{ Key := Type2 }} ->
-            case meet(Type1, Type2) of
-                none -> none;
-                Type -> meet_elements_1(Keys, Es1, Es2, Acc#{ Key => Type })
-            end;
-        {#{ Key := Type1 }, _} ->
-            meet_elements_1(Keys, Es1, Es2, Acc#{ Key => Type1 });
-        {_, #{ Key := Type2 }} ->
-            meet_elements_1(Keys, Es1, Es2, Acc#{ Key => Type2 })
-    end;
-meet_elements_1([], _Es1, _Es2, Acc) ->
+join_tuple_sets(A, none) ->
+    A;
+join_tuple_sets(none, B) ->
+    B;
+join_tuple_sets(#t_tuple{}=A, #t_tuple{}=B) ->
+    lub(A, B);
+join_tuple_sets(#t_tuple{}=Tuple, Records) ->
+    jts_tuple(Records, Tuple);
+join_tuple_sets(Records, #t_tuple{}=Tuple) ->
+    join_tuple_sets(Tuple, Records);
+join_tuple_sets(RecordsA, RecordsB) ->
+    jts_records(RecordsA, RecordsB).
+
+jts_tuple([{_Key, Tuple} | Records], Acc) ->
+    jts_tuple(Records, lub(Tuple, Acc));
+jts_tuple([], Acc) ->
     Acc.
 
-%% Meets all the given types into a single type.
--spec meet([type()]) -> type().
-
-meet([T1,T2|Ts]) ->
-    meet([meet(T1, T2)|Ts]);
-meet([T]) -> T.
+jts_records(RsA, RsB) ->
+    jts_records(RsA, RsB, 0, []).
+
+jts_records(RsA, RsB, N, Acc) when N > ?TUPLE_SET_LIMIT ->
+    A = normalize_tuple_set(RsA, none),
+    B = normalize_tuple_set(RsB, A),
+    #t_tuple{} = normalize_tuple_set(Acc, B);
+jts_records([{Key, A} | RsA], [{Key, B} | RsB], N, Acc) ->
+    jts_records(RsA, RsB, N + 1, [{Key, lub(A, B)} | Acc]);
+jts_records([{KeyA, _} | _]=RsA, [{KeyB, B} | RsB], N, Acc) when KeyA > KeyB ->
+    jts_records(RsA, RsB, N + 1, [{KeyB, B} | Acc]);
+jts_records([{KeyA, A} | RsA], [{KeyB, _} | _] = RsB, N, Acc) when KeyA < KeyB ->
+    jts_records(RsA, RsB, N + 1, [{KeyA, A} | Acc]);
+jts_records([], RsB, _N, Acc) ->
+    reverse(Acc, RsB);
+jts_records(RsA, [], _N, Acc) ->
+    reverse(Acc, RsA).
 
 %% Subtract Type2 from Type1. Example:
 %%    subtract(list, cons) -> nil
@@ -257,38 +323,28 @@ subtract(number, float) -> #t_integer{};
 subtract(number, #t_integer{elements=any}) -> float;
 subtract(list, cons) -> nil;
 subtract(list, nil) -> cons;
-subtract(T, _) -> T.
-
-%% Verifies that the given type is well-formed.
 
--spec verified_type(T) -> T when
-      T :: type().
+subtract(#t_union{atom=Atom}=A, #t_atom{}=B)->
+    shrink_union(A#t_union{atom=subtract(Atom, B)});
+subtract(#t_union{number=Number}=A, B) when ?IS_NUMBER_TYPE(B) ->
+    shrink_union(A#t_union{number=subtract(Number, B)});
+subtract(#t_union{list=List}=A, B) when ?IS_LIST_TYPE(B) ->
+    shrink_union(A#t_union{list=subtract(List, B)});
+subtract(#t_union{tuple_set=[_|_]=Records0}=A, #t_tuple{}=B) ->
+    %% Filter out all records that are strictly more specific than B.
+    NewSet = case [{Key, T} || {Key, T} <- Records0, meet(T, B) =/= T] of
+                 [_|_]=Records -> Records;
+                 [] -> none
+             end,
+    shrink_union(A#t_union{tuple_set=NewSet});
+subtract(#t_union{tuple_set=#t_tuple{}=Tuple}=A, #t_tuple{}=B) ->
+    %% Exclude Tuple if it's strictly more specific than B.
+    case meet(Tuple, B) of
+        Tuple -> shrink_union(A#t_union{tuple_set=none});
+        _ -> A
+    end;
 
-verified_type(any=T) -> T;
-verified_type(none=T) -> T;
-verified_type(#t_atom{elements=any}=T) -> T;
-verified_type(#t_atom{elements=[_|_]}=T) -> T;
-verified_type(#t_bitstring{unit=U}=T) when is_integer(U), U >= 1 -> T;
-verified_type(#t_bs_context{}=T) -> T;
-verified_type(#t_fun{arity=Arity}=T) when Arity =:= any; is_integer(Arity) -> T;
-verified_type(#t_integer{elements=any}=T) -> T;
-verified_type(#t_integer{elements={Min,Max}}=T)
-  when is_integer(Min), is_integer(Max), Min =< Max -> T;
-verified_type(list=T) -> T;
-verified_type(#t_map{}=T) -> T;
-verified_type(nil=T) -> T;
-verified_type(cons=T) -> T;
-verified_type(number=T) -> T;
-verified_type(#t_tuple{size=Size,elements=Es}=T) ->
-    %% All known elements must have a valid index and type. 'any' is prohibited
-    %% since it's implicit and should never be present in the map.
-    maps:fold(fun(Index, Element, _) when is_integer(Index),
-                                          1 =< Index, Index =< Size,
-                                          Element =/= any, Element =/= none ->
-                      verified_type(Element)
-              end, [], Es),
-    T;
-verified_type(float=T) -> T.
+subtract(T, _) -> T.
 
 %%%
 %%% Type operators
@@ -306,23 +362,15 @@ get_singleton_value(nil) ->
 get_singleton_value(_) ->
     error.
 
--spec get_tuple_size(Type) -> Result when
-      Type :: type(),
-      Result :: none | {at_least, Size} | {exact, Size},
-      Size :: non_neg_integer().
-get_tuple_size(#t_tuple{size=Size,exact=false}) ->
-    {at_least,Size};
-get_tuple_size(#t_tuple{size=Size,exact=true}) ->
-    {exact,Size};
-get_tuple_size(_) ->
-    none.
-
 -spec is_boolean_type(type()) -> boolean().
 is_boolean_type(#t_atom{elements=[F,T]}) ->
     F =:= false andalso T =:= true;
 is_boolean_type(#t_atom{elements=[B]}) ->
     is_boolean(B);
-is_boolean_type(_) -> false.
+is_boolean_type(#t_union{}=T) ->
+    is_boolean_type(normalize(T));
+is_boolean_type(_) ->
+    false.
 
 -spec is_singleton_type(type()) -> boolean().
 is_singleton_type(Type) ->
@@ -348,6 +396,23 @@ get_element_type(Index, Es) ->
         #{} -> any
     end.
 
+-spec normalize(type()) -> normal_type().
+normalize(#t_union{atom=Atom,list=List,number=Number,
+                   tuple_set=Tuples,other=Other}) ->
+    A = lub(Atom, List),
+    B = lub(A, Number),
+    C = lub(B, Other),
+    normalize_tuple_set(Tuples, C);
+normalize(T) ->
+    verified_normal_type(T).
+
+normalize_tuple_set([{_, A} | Records], B) ->
+    normalize_tuple_set(Records, lub(A, B));
+normalize_tuple_set([], B) ->
+    B;
+normalize_tuple_set(A, B) ->
+    lub(A, B).
+
 %%%
 %%% Type constructors
 %%%
@@ -395,17 +460,319 @@ make_integer(Int) when is_integer(Int) ->
 make_integer(Min, Max) when is_integer(Min), is_integer(Max), Min =< Max ->
     #t_integer{elements={Min,Max}}.
 
--spec make_tuple(Size, Exact) -> type() when
-      Size :: non_neg_integer(),
-      Exact :: boolean().
-make_tuple(Size, Exact) ->
-    make_tuple(Size, Exact, #{}).
-
--spec make_tuple(Size, Exact, Elements) -> type() when
-      Size :: non_neg_integer(),
-      Exact :: boolean(),
-      Elements :: #{ non_neg_integer() => type() }.
-make_tuple(Size, Exact, Elements) ->
-    #t_tuple{size=Size,
-             exact=Exact,
-             elements=Elements}.
+%%%
+%%% Helpers
+%%%
+
+%% Return the greatest lower bound of the types Type1 and Type2. The GLB is a
+%% more specific type than Type1 and Type2, and is always a normal type.
+%%
+%%    glb(#t_integer{elements=any}, #t_integer{elements={0,3}}) ->
+%%         #t_integer{elements={0,3}}
+%%
+%% The GLB of two different types result in 'none', which is the bottom
+%% element for our type lattice:
+%%
+%%    glb(#t_integer{}, #t_map{}) -> none
+
+-spec glb(normal_type(), normal_type()) -> normal_type().
+
+glb(T, T) ->
+    verified_normal_type(T);
+glb(any, T) ->
+    verified_normal_type(T);
+glb(T, any) ->
+    verified_normal_type(T);
+glb(#t_atom{elements=[_|_]=Set1}, #t_atom{elements=[_|_]=Set2}) ->
+    case ordsets:intersection(Set1, Set2) of
+        [] ->
+            none;
+        [_|_]=Set ->
+            #t_atom{elements=Set}
+    end;
+glb(#t_atom{elements=[_|_]}=T, #t_atom{elements=any}) ->
+    T;
+glb(#t_atom{elements=any}, #t_atom{elements=[_|_]}=T) ->
+    T;
+glb(#t_bs_context{slots=SlotCountA,valid=ValidSlotsA},
+    #t_bs_context{slots=SlotCountB,valid=ValidSlotsB}) ->
+    CommonSlotMask = (1 bsl min(SlotCountA, SlotCountB)) - 1,
+    CommonSlotsA = ValidSlotsA band CommonSlotMask,
+    CommonSlotsB = ValidSlotsB band CommonSlotMask,
+    if
+        CommonSlotsA =:= CommonSlotsB ->
+            #t_bs_context{slots=max(SlotCountA, SlotCountB),
+                          valid=ValidSlotsA bor ValidSlotsB};
+        CommonSlotsA =/= CommonSlotsB ->
+            none
+    end;
+glb(#t_fun{arity=any}, #t_fun{}=T) ->
+    T;
+glb(#t_fun{}=T, #t_fun{arity=any}) ->
+    T;
+glb(#t_integer{elements={_,_}}=T, #t_integer{elements=any}) ->
+    T;
+glb(#t_integer{elements=any}, #t_integer{elements={_,_}}=T) ->
+    T;
+glb(#t_integer{elements={MinA,MaxA}}, #t_integer{elements={MinB,MaxB}})
+  when MinA >= MinB, MinA =< MaxB;
+       MinB >= MinA, MinB =< MaxA ->
+    true = MinA =< MaxA andalso MinB =< MaxB,   %Assertion.
+    #t_integer{elements={max(MinA, MinB),min(MaxA, MaxB)}};
+glb(#t_integer{}=T, number) -> T;
+glb(float=T, number) -> T;
+glb(number, #t_integer{}=T) -> T;
+glb(number, float=T) -> T;
+glb(list, cons) -> cons;
+glb(list, nil) -> nil;
+glb(cons, list) -> cons;
+glb(nil, list) -> nil;
+glb(#t_tuple{}=T1, #t_tuple{}=T2) ->
+    glb_tuples(T1, T2);
+glb(#t_bitstring{unit=U1}, #t_bitstring{unit=U2}) ->
+    #t_bitstring{unit=U1 * U2 div gcd(U1, U2)};
+glb(_, _) ->
+    %% Inconsistent types. There will be an exception at runtime.
+    none.
+
+glb_tuples(#t_tuple{size=Sz1,exact=true},
+           #t_tuple{size=Sz2,exact=true}) when Sz1 =/= Sz2 ->
+    none;
+glb_tuples(#t_tuple{size=Sz1,exact=Ex1,elements=Es1},
+           #t_tuple{size=Sz2,exact=Ex2,elements=Es2}) ->
+    Size = max(Sz1, Sz2),
+    Exact = Ex1 or Ex2,
+    case glb_elements(Es1, Es2) of
+        none ->
+            none;
+        Es ->
+            #t_tuple{size=Size,exact=Exact,elements=Es}
+    end.
+
+glb_elements(Es1, Es2) ->
+    Keys = maps:keys(Es1) ++ maps:keys(Es2),
+    glb_elements_1(Keys, Es1, Es2, #{}).
+
+glb_elements_1([Key | Keys], Es1, Es2, Acc) ->
+    case {Es1, Es2} of
+        {#{ Key := Type1 }, #{ Key := Type2 }} ->
+            %% Note the use of meet/2; elements don't need to be normal types.
+            case meet(Type1, Type2) of
+                none -> none;
+                Type -> glb_elements_1(Keys, Es1, Es2, Acc#{ Key => Type })
+            end;
+        {#{ Key := Type1 }, _} ->
+            glb_elements_1(Keys, Es1, Es2, Acc#{ Key => Type1 });
+        {_, #{ Key := Type2 }} ->
+            glb_elements_1(Keys, Es1, Es2, Acc#{ Key => Type2 })
+    end;
+glb_elements_1([], _Es1, _Es2, Acc) ->
+    Acc.
+
+%% Return the least upper bound of the types Type1 and Type2. The LUB is a more
+%% general type than Type1 and Type2, and is always a normal type.
+%%
+%% For example:
+%%
+%%    lub(#t_integer{elements=any}, #t_integer=elements={0,3}}) ->
+%%         #t_integer{}
+%%
+%% The LUB for two different types result in 'any' (not a union type!), which
+%% is the top element for our type lattice:
+%%
+%%    lub(#t_integer{}, #t_map{}) -> any
+
+-spec lub(normal_type(), normal_type()) -> normal_type().
+
+lub(T, T) ->
+    verified_normal_type(T);
+lub(none, T) ->
+    verified_normal_type(T);
+lub(T, none) ->
+    verified_normal_type(T);
+lub(any, _) ->
+    any;
+lub(_, any) ->
+    any;
+lub(#t_atom{elements=[_|_]=Set1}, #t_atom{elements=[_|_]=Set2}) ->
+    Set = ordsets:union(Set1, Set2),
+    case ordsets:size(Set) of
+        Size when Size =< ?ATOM_SET_SIZE ->
+            #t_atom{elements=Set};
+        _Size ->
+            #t_atom{elements=any}
+    end;
+lub(#t_atom{elements=any}=T, #t_atom{elements=[_|_]}) -> T;
+lub(#t_atom{elements=[_|_]}, #t_atom{elements=any}=T) -> T;
+lub(#t_bitstring{unit=U1}, #t_bitstring{unit=U2}) ->
+    #t_bitstring{unit=gcd(U1, U2)};
+lub(#t_fun{}, #t_fun{}) ->
+    #t_fun{};
+lub(#t_integer{elements={MinA,MaxA}},
+    #t_integer{elements={MinB,MaxB}}) ->
+    #t_integer{elements={min(MinA,MinB),max(MaxA,MaxB)}};
+lub(#t_bs_context{slots=SlotsA,valid=ValidA},
+    #t_bs_context{slots=SlotsB,valid=ValidB}) ->
+    #t_bs_context{slots=min(SlotsA, SlotsB),
+                  valid=ValidA band ValidB};
+lub(#t_integer{}, #t_integer{}) -> #t_integer{};
+lub(list, cons) -> list;
+lub(cons, list) -> list;
+lub(nil, cons) -> list;
+lub(cons, nil) -> list;
+lub(nil, list) -> list;
+lub(list, nil) -> list;
+lub(#t_integer{}, float) -> number;
+lub(float, #t_integer{}) -> number;
+lub(#t_integer{}, number) -> number;
+lub(number, #t_integer{}) -> number;
+lub(float, number) -> number;
+lub(number, float) -> number;
+lub(#t_tuple{size=Sz,exact=ExactA,elements=EsA},
+    #t_tuple{size=Sz,exact=ExactB,elements=EsB}) ->
+    Exact = ExactA and ExactB,
+    Es = lub_tuple_elements(Sz, EsA, EsB),
+    #t_tuple{size=Sz,exact=Exact,elements=Es};
+lub(#t_tuple{size=SzA,elements=EsA}, #t_tuple{size=SzB,elements=EsB}) ->
+    Sz = min(SzA, SzB),
+    Es = lub_tuple_elements(Sz, EsA, EsB),
+    #t_tuple{size=Sz,elements=Es};
+lub(_T1, _T2) ->
+    %%io:format("~p ~p\n", [_T1,_T2]),
+    any.
+
+lub_tuple_elements(MinSize, EsA, EsB) ->
+    Es0 = lub_elements(EsA, EsB),
+    maps:filter(fun(Index, _Type) -> Index =< MinSize end, Es0).
+
+lub_elements(Es1, Es2) ->
+    Keys = if
+               map_size(Es1) =< map_size(Es2) -> maps:keys(Es1);
+               map_size(Es1) > map_size(Es2) -> maps:keys(Es2)
+           end,
+    lub_elements_1(Keys, Es1, Es2, #{}).
+
+lub_elements_1([Key | Keys], Es1, Es2, Acc0) ->
+    case {Es1, Es2} of
+        {#{ Key := Type1 }, #{ Key := Type2 }} ->
+            %% Note the use of join/2; elements don't need to be normal types.
+            Acc = set_element_type(Key, join(Type1, Type2), Acc0),
+            lub_elements_1(Keys, Es1, Es2, Acc);
+        {#{}, #{}} ->
+            lub_elements_1(Keys, Es1, Es2, Acc0)
+    end;
+lub_elements_1([], _Es1, _Es2, Acc) ->
+    Acc.
+
+%%
+
+gcd(A, B) ->
+    case A rem B of
+        0 -> B;
+        X -> gcd(B, X)
+    end.
+
+%%
+
+record_key(#t_tuple{exact=true,size=Size,elements=#{ 1 := Tag }}) ->
+    case is_singleton_type(Tag) of
+        true -> {Size, Tag};
+        false -> none
+    end;
+record_key(_) ->
+    none.
+
+new_tuple_set(T) ->
+    case record_key(T) of
+        none -> T;
+        Key -> [{Key, T}]
+    end.
+
+%%
+
+shrink_union(#t_union{other=any}) ->
+    any;
+shrink_union(#t_union{atom=Atom,list=none,number=none,
+                      tuple_set=none,other=none}) ->
+    Atom;
+shrink_union(#t_union{atom=none,list=List,number=none,
+                      tuple_set=none,other=none}) ->
+    List;
+shrink_union(#t_union{atom=none,list=none,number=Number,
+                      tuple_set=none,other=none}) ->
+    Number;
+shrink_union(#t_union{atom=none,list=none,number=none,
+                      tuple_set=#t_tuple{}=Tuple,other=none}) ->
+    Tuple;
+shrink_union(#t_union{atom=none,list=none,number=none,
+                      tuple_set=[{_Key, Record}],other=none}) ->
+    #t_tuple{} = Record;                        %Assertion.
+shrink_union(#t_union{atom=none,list=none,number=none,
+                      tuple_set=none,other=Other}) ->
+    Other;
+shrink_union(#t_union{}=T) ->
+    T.
+
+%% Verifies that the given type is well-formed.
+
+-spec verified_type(T) -> T when
+      T :: type().
+
+verified_type(#t_union{atom=Atom,
+                       list=List,
+                       number=Number,
+                       tuple_set=TSet,
+                       other=Other}=T) ->
+    _ = verified_normal_type(Atom),
+    _ = verified_normal_type(List),
+    _ = verified_normal_type(Number),
+    _ = verify_tuple_set(TSet),
+    _ = verified_normal_type(Other),
+    T;
+verified_type(T) ->
+    verified_normal_type(T).
+
+verify_tuple_set([_|_]=T) ->
+    _ = [verified_normal_type(Rec) || {_, Rec} <- T],
+    T;
+verify_tuple_set(#t_tuple{}=T) ->
+    none = record_key(T),                       %Assertion.
+    T;
+verify_tuple_set(none=T) ->
+    T.
+
+-spec verified_normal_type(T) -> T when
+      T :: normal_type().
+
+verified_normal_type(any=T) -> T;
+verified_normal_type(none=T) -> T;
+verified_normal_type(#t_atom{elements=any}=T) -> T;
+verified_normal_type(#t_atom{elements=[_|_]}=T) -> T;
+verified_normal_type(#t_bitstring{unit=U}=T)
+  when is_integer(U), U >= 1 ->
+    T;
+verified_normal_type(#t_bs_context{}=T) -> T;
+verified_normal_type(#t_fun{arity=Arity}=T)
+  when Arity =:= any; is_integer(Arity) ->
+    T;
+verified_normal_type(float=T) -> T;
+verified_normal_type(#t_integer{elements=any}=T) -> T;
+verified_normal_type(#t_integer{elements={Min,Max}}=T)
+  when is_integer(Min), is_integer(Max), Min =< Max ->
+    T;
+verified_normal_type(list=T) -> T;
+verified_normal_type(#t_map{}=T) -> T;
+verified_normal_type(nil=T) -> T;
+verified_normal_type(cons=T) -> T;
+verified_normal_type(number=T) -> T;
+verified_normal_type(#t_tuple{size=Size,elements=Es}=T) ->
+    %% All known elements must have a valid index and type (which may be a
+    %% union). 'any' is prohibited since it's implicit and should never be
+    %% present in the map, and a 'none' element ought to have reduced the
+    %% entire tuple to 'none'.
+    maps:fold(fun(Index, Element, _) when is_integer(Index),
+                                          1 =< Index, Index =< Size,
+                                          Element =/= any, Element =/= none ->
+                      verified_type(Element)
+              end, [], Es),
+    T.
diff --git a/lib/compiler/src/beam_types.hrl b/lib/compiler/src/beam_types.hrl
index 825eca4c64..09f87d61ba 100644
--- a/lib/compiler/src/beam_types.hrl
+++ b/lib/compiler/src/beam_types.hrl
@@ -37,10 +37,15 @@
 %%       -- cons         Cons (nonempty list).
 %%       -- nil          The empty list.
 %%    - #t_tuple{}       Tuple.
-%%    - #t_abstract{}    Psuedo-type used in the validator to track tuples
-%                        under construction, match context positions, etc.
 %%
 %%  none                 No type (bottom element).
+%%
+%% We also use #t_union{} to represent conflicting types produced by certain
+%% expressions, e.g. the "#t_atom{} or #t_tuple{}" of lists:keyfind/3, which is
+%% very useful for preserving type information when we would otherwise have
+%% reduced it to 'any'. Since few operations can make direct use of this extra
+%% type information, types should generally be normalized to one of the above
+%% before use.
 
 -define(ATOM_SET_SIZE, 5).
 
@@ -54,7 +59,6 @@
 -record(t_tuple, {size=0 :: integer(),
                   exact=false :: boolean(),
                   elements=#{} :: tuple_elements()}).
--record(t_abstract, {kind :: atom()}).
 
 %% Known element types, unknown elements are assumed to be 'any'. The key is
 %% a 1-based integer index for tuples, and a plain literal for maps (that is,
@@ -65,8 +69,20 @@
 
 -type elements() :: tuple_elements() | map_elements().
 
--type type() :: any | none |
-                list | number |
-                #t_atom{} | #t_bitstring{} | #t_bs_context{} | #t_fun{} |
-                #t_integer{} | #t_map{} | #t_tuple{} | #t_abstract{} |
-                'cons' | 'float' | 'nil'.
+-type normal_type() :: any | none |
+                       list | number |
+                       #t_atom{} | #t_bitstring{} | #t_bs_context{} |
+                       #t_fun{} | #t_integer{} | #t_map{} | #t_tuple{} |
+                       'cons' | 'float' | 'nil'.
+
+-type record_key() :: {Arity :: integer(), Tag :: normal_type() }.
+-type record_set() :: ordsets:ordset({record_key(), #t_tuple{}}).
+-type tuple_set() :: #t_tuple{} | record_set().
+
+-record(t_union, {atom=none :: none | #t_atom{},
+                  list=none :: none | list | cons | nil,
+                  number=none :: none | number | float | #t_integer{},
+                  tuple_set=none :: none | tuple_set(),
+                  other=none :: normal_type()}).
+
+-type type() :: #t_union{} | normal_type().
diff --git a/lib/compiler/src/beam_validator.erl b/lib/compiler/src/beam_validator.erl
index 90049b3a25..fd265fe174 100644
--- a/lib/compiler/src/beam_validator.erl
+++ b/lib/compiler/src/beam_validator.erl
@@ -111,8 +111,15 @@ validate_0(Module, [{function,Name,Ar,Entry,Code}|Fs], Ft) ->
 	    erlang:raise(Class, Error, Stack)
     end.
 
+-record(t_abstract, {kind}).
+
+%% The types are the same as in 'beam_types.hrl', with the addition of
+%% #t_abstract{} that describes tuples under construction, match context
+%% positions, and so on.
+-type validator_type() :: #t_abstract{} | type().
+
 -record(value_ref, {id :: index()}).
--record(value, {op :: term(), args :: [argument()], type :: type()}).
+-record(value, {op :: term(), args :: [argument()], type :: validator_type()}).
 
 -type argument() :: #value_ref{} | literal().
 
@@ -124,6 +131,24 @@ validate_0(Module, [{function,Name,Ar,Entry,Code}|Fs], Ft) ->
                    {literal, term()} |
                    nil.
 
+%% Register tags describe the state of the register rather than the value they
+%% contain (if any).
+%%
+%% initialized          The register has been initialized with some valid term
+%%                      so that it is safe to pass to the garbage collector.
+%%                      NOT safe to use in any other way (will not crash the
+%%                      emulator, but clearly points to a bug in the compiler).
+%%
+%% uninitialized        The register contains any old garbage and can not be
+%%                      passed to the garbage collector.
+%%
+%% {catchtag,[Lbl]}     A special term used within a catch. Must only be used
+%%                      by the catch instructions; NOT safe to use in other
+%%                      instructions.
+%%
+%% {trytag,[Lbl]}       A special term used within a try block. Must only be
+%%                      used by the catch instructions; NOT safe to use in other
+%%                      instructions.
 -type tag() :: initialized |
                uninitialized |
                {catchtag, [label()]} |
@@ -328,7 +353,7 @@ valfun_1({try_case_end,Src}, Vst) ->
     kill_state(Vst);
 %% Instructions that cannot cause exceptions
 valfun_1({bs_get_tail,Ctx,Dst,Live}, Vst0) ->
-    bsm_validate_context(Ctx, Vst0),
+    assert_type(#t_bs_context{}, Ctx, Vst0),
     verify_live(Live, Vst0),
     verify_y_init(Vst0),
     Vst = prune_x_regs(Live, Vst0),
@@ -371,7 +396,7 @@ valfun_1({init,Reg}, Vst) ->
 valfun_1({test_heap,Heap,Live}, Vst) ->
     test_heap(Heap, Live, Vst);
 valfun_1({bif,Op,{f,_},Ss,Dst}=I, Vst) ->
-    case beam_call_types:never_throws(erlang, Op, length(Ss)) of
+    case erl_bifs:is_safe(erlang, Op, length(Ss)) of
         true ->
             %% It can't fail, so we finish handling it here (not updating
             %% catch state).
@@ -401,7 +426,7 @@ valfun_1({put_tuple2,Dst,{list,Elements}}, Vst0) ->
     create_term(Type, put_tuple2, [], Dst, Vst);
 valfun_1({put_tuple,Sz,Dst}, Vst0) when is_integer(Sz) ->
     Vst1 = eat_heap(1, Vst0),
-    Vst = create_term(#t_abstract{kind=tuple_in_progress}, put_tuple, [],
+    Vst = create_term(#t_abstract{kind=unfinished_tuple}, put_tuple, [],
                       Dst, Vst1),
     #vst{current=St0} = Vst,
     St = St0#st{puts_left={Sz,{Dst,Sz,#{}}}},
@@ -545,7 +570,7 @@ valfun_1({get_tuple_element,Src,N,Dst}, Vst) ->
     Index = N+1,
     assert_not_literal(Src),
     assert_type(#t_tuple{size=Index}, Src, Vst),
-    #t_tuple{elements=Es} = get_term_type(Src, Vst),
+    #t_tuple{elements=Es} = normalize(get_term_type(Src, Vst)),
     Type = beam_types:get_element_type(Index, Es),
     extract_term(Type, {bif,element}, [{integer,Index}, Src], Dst, Vst);
 valfun_1({jump,{f,Lbl}}, Vst) ->
@@ -737,7 +762,7 @@ valfun_4({set_tuple_element,Src,Tuple,N}, Vst) ->
     %% helpers as we must support overwriting (rather than just widening or
     %% narrowing) known elements, and we can't use extract_term either since
     %% the source tuple may be aliased.
-    #t_tuple{elements=Es0}=Type = get_term_type(Tuple, Vst),
+    #t_tuple{elements=Es0}=Type = normalize(get_term_type(Tuple, Vst)),
     Es = beam_types:set_element_type(I, get_term_type(Src, Vst), Es0),
     override_type(Type#t_tuple{elements=Es}, Tuple, Vst);
 %% Match instructions.
@@ -756,17 +781,17 @@ valfun_4({test,bs_start_match3,{f,Fail},Live,[Src],Dst}, Vst) ->
 valfun_4({test,bs_start_match2,{f,Fail},Live,[Src,Slots],Dst}, Vst) ->
     validate_bs_start_match(Fail, Live, bsm_match_state(Slots), Src, Dst, Vst);
 valfun_4({test,bs_match_string,{f,Fail},[Ctx,_,_]}, Vst) ->
-    bsm_validate_context(Ctx, Vst),
+    assert_type(#t_bs_context{}, Ctx, Vst),
     branch(Fail, Vst, fun(V) -> V end);
 valfun_4({test,bs_skip_bits2,{f,Fail},[Ctx,Src,_,_]}, Vst) ->
-    bsm_validate_context(Ctx, Vst),
+    assert_type(#t_bs_context{}, Ctx, Vst),
     assert_term(Src, Vst),
     branch(Fail, Vst, fun(V) -> V end);
 valfun_4({test,bs_test_tail2,{f,Fail},[Ctx,_]}, Vst) ->
-    bsm_validate_context(Ctx, Vst),
+    assert_type(#t_bs_context{}, Ctx, Vst),
     branch(Fail, Vst, fun(V) -> V end);
 valfun_4({test,bs_test_unit,{f,Fail},[Ctx,_]}, Vst) ->
-    bsm_validate_context(Ctx, Vst),
+    assert_type(#t_bs_context{}, Ctx, Vst),
     branch(Fail, Vst, fun(V) -> V end);
 valfun_4({test,bs_skip_utf8,{f,Fail},[Ctx,Live,_]}, Vst) ->
     validate_bs_skip_utf(Fail, Ctx, Live, Vst);
@@ -807,14 +832,14 @@ valfun_4({bs_save2,Ctx,SavePoint}, Vst) ->
 valfun_4({bs_restore2,Ctx,SavePoint}, Vst) ->
     bsm_restore(Ctx, SavePoint, Vst);
 valfun_4({bs_get_position, Ctx, Dst, Live}, Vst0) ->
-    bsm_validate_context(Ctx, Vst0),
+    assert_type(#t_bs_context{}, Ctx, Vst0),
     verify_live(Live, Vst0),
     verify_y_init(Vst0),
     Vst = prune_x_regs(Live, Vst0),
     create_term(#t_abstract{kind=ms_position}, bs_get_position, [Ctx],
                 Dst, Vst, Vst0);
 valfun_4({bs_set_position, Ctx, Pos}, Vst) ->
-    bsm_validate_context(Ctx, Vst),
+    assert_type(#t_bs_context{}, Ctx, Vst),
     assert_type(#t_abstract{kind=ms_position}, Pos, Vst),
     Vst;
 
@@ -1028,8 +1053,11 @@ verify_get_map(Fail, Src, List, Vst0) ->
 %%    {get_map_elements,{f,7},{x,1},{list,[{atom,a},{x,1},{atom,b},{x,2}]}}.
 %%
 %% If 'a' exists but not 'b', {x,1} is overwritten when we jump to {f,7}.
+%%
+%% We must be careful to preserve the uninitialized status for Y registers
+%% that have been allocated but not yet defined.
 clobber_map_vals([Key,Dst|T], Map, Vst0) ->
-    case is_reg_defined(Dst, Vst0) of
+    case is_reg_initialized(Dst, Vst0) of
         true ->
             Vst = extract_term(any, {bif,map_get}, [Key, Map], Dst, Vst0),
             clobber_map_vals(T, Map, Vst);
@@ -1039,6 +1067,17 @@ clobber_map_vals([Key,Dst|T], Map, Vst0) ->
 clobber_map_vals([], _Map, Vst) ->
     Vst.
 
+is_reg_initialized({x,_}=Reg, #vst{current=#st{xs=Xs}}) ->
+    is_map_key(Reg, Xs);
+is_reg_initialized({y,_}=Reg, #vst{current=#st{ys=Ys}}) ->
+    case Ys of
+        #{Reg:=Val} ->
+            Val =/= uninitialized;
+        #{} ->
+            false
+    end;
+is_reg_initialized(V, #vst{}) -> error({not_a_register, V}).
+
 extract_map_keys([Key,_Val|T]) ->
     [Key|extract_map_keys(T)];
 extract_map_keys([]) -> [].
@@ -1125,7 +1164,7 @@ validate_bs_start_match(Fail, Live, Type, Src, Dst, Vst) ->
 %% Common code for validating bs_get* instructions.
 %%
 validate_bs_get(Op, Fail, Ctx, Live, Type, Dst, Vst) ->
-    bsm_validate_context(Ctx, Vst),
+    assert_type(#t_bs_context{}, Ctx, Vst),
     verify_live(Live, Vst),
     verify_y_init(Vst),
 
@@ -1139,7 +1178,7 @@ validate_bs_get(Op, Fail, Ctx, Live, Type, Dst, Vst) ->
 %% Common code for validating bs_skip_utf* instructions.
 %%
 validate_bs_skip_utf(Fail, Ctx, Live, Vst) ->
-    bsm_validate_context(Ctx, Vst),
+    assert_type(#t_bs_context{}, Ctx, Vst),
     verify_y_init(Vst),
     verify_live(Live, Vst),
 
@@ -1462,44 +1501,35 @@ bsm_match_state() ->
 bsm_match_state(Slots) ->
     #t_bs_context{slots=Slots}.
 
-bsm_validate_context(Reg, Vst) ->
-    _ = bsm_get_context(Reg, Vst),
-    ok.
-
-bsm_get_context({Kind,_}=Reg, Vst) when Kind =:= x; Kind =:= y->
-    case get_movable_term_type(Reg, Vst) of
-        #t_bs_context{}=Ctx -> Ctx;
-        _ -> error({no_bsm_context,Reg})
-    end;
-bsm_get_context(Reg, _) ->
-    error({bad_source,Reg}).
-
 bsm_save(Reg, {atom,start}, Vst) ->
     %% Save point refering to where the match started.
     %% It is always valid. But don't forget to validate the context register.
-    bsm_validate_context(Reg, Vst),
+    assert_type(#t_bs_context{}, Reg, Vst),
     Vst;
 bsm_save(Reg, SavePoint, Vst) ->
-    case bsm_get_context(Reg, Vst) of
-	#t_bs_context{valid=Bits,slots=Slots}=Ctxt0 when SavePoint < Slots ->
-	    Ctx = Ctxt0#t_bs_context{valid=Bits bor (1 bsl SavePoint),slots=Slots},
-	    override_type(Ctx, Reg, Vst);
-	_ -> error({illegal_save,SavePoint})
+    case get_movable_term_type(Reg, Vst) of
+        #t_bs_context{valid=Bits,slots=Slots}=Ctxt0 when SavePoint < Slots ->
+            Ctx = Ctxt0#t_bs_context{valid=Bits bor (1 bsl SavePoint),
+                                     slots=Slots},
+            override_type(Ctx, Reg, Vst);
+        _ ->
+            error({illegal_save, SavePoint})
     end.
 
 bsm_restore(Reg, {atom,start}, Vst) ->
     %% (Mostly) automatic save point refering to where the match started.
     %% It is always valid. But don't forget to validate the context register.
-    bsm_validate_context(Reg, Vst),
+    assert_type(#t_bs_context{}, Reg, Vst),
     Vst;
 bsm_restore(Reg, SavePoint, Vst) ->
-    case bsm_get_context(Reg, Vst) of
-	#t_bs_context{valid=Bits,slots=Slots} when SavePoint < Slots ->
-	    case Bits band (1 bsl SavePoint) of
-		0 -> error({illegal_restore,SavePoint,not_set});
-		_ -> Vst
-	    end;
-	_ -> error({illegal_restore,SavePoint,range})
+    case get_movable_term_type(Reg, Vst) of
+        #t_bs_context{valid=Bits,slots=Slots} when SavePoint < Slots ->
+            case Bits band (1 bsl SavePoint) of
+                0 -> error({illegal_restore, SavePoint, not_set});
+                _ -> Vst
+            end;
+        _ ->
+            error({illegal_restore, SavePoint, range})
     end.
 
 validate_select_val(_Fail, _Choices, _Src, #vst{current=none}=Vst) ->
@@ -1515,7 +1545,7 @@ validate_select_val(Fail, [Val,{f,L}|T], Src, Vst0) ->
                          update_ne_types(Src, Val, FailVst)
                  end),
     validate_select_val(Fail, T, Src, Vst);
-validate_select_val(Fail, [], _, Vst) ->
+validate_select_val(Fail, [], _Src, Vst) ->
     branch(Fail, Vst,
            fun(SuccVst) ->
                    %% The next instruction is never executed.
@@ -1543,83 +1573,94 @@ validate_select_tuple_arity(Fail, [], _, #vst{}=Vst) ->
                    kill_state(SuccVst)
            end).
 
-infer_types({Kind,_}=Reg, Vst) when Kind =:= x; Kind =:= y ->
-    infer_types(get_reg_vref(Reg, Vst), Vst);
-infer_types(#value_ref{}=Ref, #vst{current=#st{vs=Vs}}) ->
+%%
+%% Infers types from comparisons, looking at the expressions that produced the
+%% compared values and updates their types if we've learned something new from
+%% the comparison.
+%%
+
+infer_types(CompareOp, {Kind,_}=LHS, RHS, Vst) when Kind =:= x; Kind =:= y ->
+    infer_types(CompareOp, get_reg_vref(LHS, Vst), RHS, Vst);
+infer_types(CompareOp, LHS, {Kind,_}=RHS, Vst) when Kind =:= x; Kind =:= y ->
+    infer_types(CompareOp, LHS, get_reg_vref(RHS, Vst), Vst);
+infer_types(CompareOp, LHS, RHS, #vst{current=#st{vs=Vs}}=Vst0) ->
     case Vs of
-        #{ Ref := Entry } -> infer_types_1(Entry);
-        #{} -> fun(_, S) -> S end
+        #{ LHS := LEntry, RHS := REntry } ->
+            Vst = infer_types_1(LEntry, RHS, CompareOp, Vst0),
+            infer_types_1(REntry, LHS, CompareOp, Vst);
+        #{ LHS := LEntry } ->
+            infer_types_1(LEntry, RHS, CompareOp, Vst0);
+        #{ RHS := REntry } ->
+            infer_types_1(REntry, LHS, CompareOp, Vst0);
+        #{} ->
+            Vst0
+    end.
+
+infer_types_1(#value{op={bif,'=:='},args=[LHS,RHS]}, Val, Op, Vst) ->
+    case Val of
+        {atom, Bool} when Op =:= eq_exact, Bool; Op =:= ne_exact, not Bool ->
+            update_eq_types(LHS, RHS, Vst);
+        {atom, Bool} when Op =:= ne_exact, Bool; Op =:= eq_exact, not Bool ->
+            update_ne_types(LHS, RHS, Vst);
+        _ ->
+            Vst
     end;
-infer_types(_, #vst{}) ->
-    fun(_, S) -> S end.
-
-infer_types_1(#value{op={bif,'=:='},args=[LHS,RHS]}) ->
-    fun({atom,true}, S) ->
-            %% Either side might contain something worth inferring, so we need
-            %% to check them both.
-            Infer_L = infer_types(RHS, S),
-            Infer_R = infer_types(LHS, S),
-            Infer_R(RHS, Infer_L(LHS, S));
-       (_, S) -> S
+infer_types_1(#value{op={bif,'=/='},args=[LHS,RHS]}, Val, Op, Vst) ->
+    case Val of
+        {atom, Bool} when Op =:= ne_exact, Bool; Op =:= eq_exact, not Bool ->
+            update_ne_types(LHS, RHS, Vst);
+        {atom, Bool} when Op =:= eq_exact, Bool; Op =:= ne_exact, not Bool ->
+            update_eq_types(LHS, RHS, Vst);
+        _ ->
+            Vst
     end;
-infer_types_1(#value{op={bif,element},args=[{integer,Index},Tuple]}) ->
-    fun(Val, S) ->
-            case is_value_alive(Tuple, S) of
-                true ->
-                    ElementType = get_term_type(Val, S),
-                    Es = beam_types:set_element_type(Index, ElementType, #{}),
-                    Type = #t_tuple{size=Index,elements=Es},
-                    update_type(fun meet/2, Type, Tuple, S);
-                false ->
-                    S
-            end
+infer_types_1(#value{op={bif,element},args=[{integer,Index},Tuple]},
+              Val, Op, Vst) when Index >= 1 ->
+    ElementType = get_term_type(Val, Vst),
+    Es = beam_types:set_element_type(Index, ElementType, #{}),
+    Type = #t_tuple{size=Index,elements=Es},
+    case Op of
+        eq_exact -> update_type(fun meet/2, Type, Tuple, Vst);
+        ne_exact -> update_type(fun subtract/2, Type, Tuple, Vst)
     end;
-infer_types_1(#value{op={bif,is_atom},args=[Src]}) ->
-    infer_type_test_bif(#t_atom{}, Src);
-infer_types_1(#value{op={bif,is_boolean},args=[Src]}) ->
-    infer_type_test_bif(beam_types:make_boolean(), Src);
-infer_types_1(#value{op={bif,is_binary},args=[Src]}) ->
-    infer_type_test_bif(#t_bitstring{unit=8}, Src);
-infer_types_1(#value{op={bif,is_bitstring},args=[Src]}) ->
-    infer_type_test_bif(#t_bitstring{}, Src);
-infer_types_1(#value{op={bif,is_float},args=[Src]}) ->
-    infer_type_test_bif(float, Src);
-infer_types_1(#value{op={bif,is_integer},args=[Src]}) ->
-    infer_type_test_bif(#t_integer{}, Src);
-infer_types_1(#value{op={bif,is_list},args=[Src]}) ->
-    infer_type_test_bif(list, Src);
-infer_types_1(#value{op={bif,is_map},args=[Src]}) ->
-    infer_type_test_bif(#t_map{}, Src);
-infer_types_1(#value{op={bif,is_number},args=[Src]}) ->
-    infer_type_test_bif(number, Src);
-infer_types_1(#value{op={bif,is_tuple},args=[Src]}) ->
-    infer_type_test_bif(#t_tuple{}, Src);
-infer_types_1(#value{op={bif,tuple_size}, args=[Tuple]}) ->
-    fun({integer,Arity}, S) ->
-            case is_value_alive(Tuple, S) of
-                true ->
-                    Type = #t_tuple{exact=true,size=Arity},
-                    update_type(fun meet/2, Type, Tuple, S);
-                false ->
-                    S
-            end;
-       (_, S) -> S
+infer_types_1(#value{op={bif,is_atom},args=[Src]}, Val, Op, Vst) ->
+    infer_type_test_bif(#t_atom{}, Src, Val, Op, Vst);
+infer_types_1(#value{op={bif,is_boolean},args=[Src]}, Val, Op, Vst) ->
+    infer_type_test_bif(beam_types:make_boolean(), Src, Val, Op, Vst);
+infer_types_1(#value{op={bif,is_binary},args=[Src]}, Val, Op, Vst) ->
+    infer_type_test_bif(#t_bitstring{unit=8}, Src, Val, Op, Vst);
+infer_types_1(#value{op={bif,is_bitstring},args=[Src]}, Val, Op, Vst) ->
+    infer_type_test_bif(#t_bitstring{}, Src, Val, Op, Vst);
+infer_types_1(#value{op={bif,is_float},args=[Src]}, Val, Op, Vst) ->
+    infer_type_test_bif(float, Src, Val, Op, Vst);
+infer_types_1(#value{op={bif,is_integer},args=[Src]}, Val, Op, Vst) ->
+    infer_type_test_bif(#t_integer{}, Src, Val, Op, Vst);
+infer_types_1(#value{op={bif,is_list},args=[Src]}, Val, Op, Vst) ->
+    infer_type_test_bif(list, Src, Val, Op, Vst);
+infer_types_1(#value{op={bif,is_map},args=[Src]}, Val, Op, Vst) ->
+    infer_type_test_bif(#t_map{}, Src, Val, Op, Vst);
+infer_types_1(#value{op={bif,is_number},args=[Src]}, Val, Op, Vst) ->
+    infer_type_test_bif(number, Src, Val, Op, Vst);
+infer_types_1(#value{op={bif,is_tuple},args=[Src]}, Val, Op, Vst) ->
+    infer_type_test_bif(#t_tuple{}, Src, Val, Op, Vst);
+infer_types_1(#value{op={bif,tuple_size}, args=[Tuple]},
+              {integer,Arity}, Op, Vst) ->
+    Type = #t_tuple{exact=true,size=Arity},
+    case Op of
+        eq_exact -> update_type(fun meet/2, Type, Tuple, Vst);
+        ne_exact -> update_type(fun subtract/2, Type, Tuple, Vst)
     end;
-infer_types_1(_) ->
-    fun(_, S) -> S end.
-
-infer_type_test_bif(Type, Src) ->
-    fun({atom,Bool}, S) when is_boolean(Bool) ->
-            case is_value_alive(Src, S) of
-                true when Bool =:= true ->
-                    update_type(fun meet/2, Type, Src, S);
-                true when Bool =:= false ->
-                    update_type(fun subtract/2, Type, Src, S);
-                false ->
-                    S
-            end;
-       (_, S) ->
-            S
+infer_types_1(_, _, _, Vst) ->
+    Vst.
+
+infer_type_test_bif(Type, Src, Val, Op, Vst) ->
+    case Val of
+        {atom, Bool} when Op =:= eq_exact, Bool; Op =:= ne_exact, not Bool ->
+            update_type(fun meet/2, Type, Src, Vst);
+        {atom, Bool} when Op =:= ne_exact, Bool; Op =:= eq_exact, not Bool ->
+            update_type(fun subtract/2, Type, Src, Vst);
+        _ ->
+            Vst
     end.
 
 %%%
@@ -1738,33 +1779,22 @@ update_type(Merge, With, Literal, Vst) ->
         _Type -> Vst
     end.
 
-update_ne_types(LHS, {atom,Bool}=RHS, Vst) when is_boolean(Bool) ->
-    %% This is a stopgap to make negative inference work for type test BIFs
-    %% like is_tuple. Consider the following unoptimized code:
-    %%
-    %%    {call_ext,2,{extfunc,erlang,'--',2}}.
-    %%    {bif,is_tuple,{f,0},[{x,0}],{x,1}}.
-    %%    {test,is_eq_exact,{x,1},{f,2},{atom,false}}.
-    %%    ... snip ...
-    %%    {label,1}.
-    %%    {test,is_eq_exact,{x,1},{f,1},{atom,true}}.
-    %%    ... unreachable because {x,0} is known to be a list, so {x,1} can't
-    %%        be true ...
-    %%    {label,2}.
-    %%    ... unreachable because {x,1} is neither true nor false! ...
-    %%
-    %% If we fail to determine that the first is_eq_exact never fails, our
-    %% state will be inconsistent after the second is_eq_exact check; we know
-    %% for certain that {x,0} is a list so infer_types says it can't succeed,
-    %% but it can't fail either because we also know that {x,1} is a boolean,
-    %% and the first check ruled out 'false'.
-    LType = get_term_type(LHS, Vst),
-    RType = get_term_type(RHS, Vst),
-    case beam_types:is_boolean_type(LType) of
-        true -> update_eq_types(LHS, {atom, not Bool}, Vst);
-        false -> update_type(fun subtract/2, RType, LHS, Vst)
-    end;
-update_ne_types(LHS, RHS, Vst) ->
+update_eq_types(LHS, RHS, Vst0) ->
+    LType = get_term_type(LHS, Vst0),
+    RType = get_term_type(RHS, Vst0),
+
+    Vst1 = update_type(fun meet/2, RType, LHS, Vst0),
+    Vst = update_type(fun meet/2, LType, RHS, Vst1),
+
+    infer_types(eq_exact, LHS, RHS, Vst).
+
+update_ne_types(LHS, RHS, Vst0) ->
+    Vst1 = update_ne_types_1(LHS, RHS, Vst0),
+    Vst = update_ne_types_1(RHS, LHS, Vst1),
+
+    infer_types(ne_exact, LHS, RHS, Vst).
+
+update_ne_types_1(LHS, RHS, Vst0) ->
     %% While updating types on equality is fairly straightforward, inequality
     %% is a bit trickier since all we know is that the *value* of LHS differs
     %% from RHS, so we can't blindly subtract their types.
@@ -1774,25 +1804,25 @@ update_ne_types(LHS, RHS, Vst) ->
     %% #t_integer{} we would erroneously infer that the new type is float.
     %%
     %% Therefore, we only subtract when we know that RHS has a specific value.
-    RType = get_term_type(RHS, Vst),
+    RType = get_term_type(RHS, Vst0),
     case beam_types:is_singleton_type(RType) of
-        true -> update_type(fun subtract/2, RType, LHS, Vst);
-        false -> Vst
+        true ->
+            Vst = update_type(fun subtract/2, RType, LHS, Vst0),
+
+            %% If LHS has a specific value after subtraction we can infer types
+            %% as if we've made an exact match, which is much stronger than
+            %% ne_exact.
+            LType = get_term_type(LHS, Vst),
+            case beam_types:get_singleton_value(LType) of
+                {ok, Value} ->
+                    infer_types(eq_exact, LHS, value_to_literal(Value), Vst);
+                error ->
+                    Vst
+            end;
+        false ->
+            Vst0
     end.
 
-update_eq_types(LHS, RHS, Vst0) ->
-    %% Either side might contain something worth inferring, so we need
-    %% to check them both.
-    Infer_L = infer_types(RHS, Vst0),
-    Infer_R = infer_types(LHS, Vst0),
-    Vst1 = Infer_R(RHS, Infer_L(LHS, Vst0)),
-
-    T1 = get_term_type(LHS, Vst1),
-    T2 = get_term_type(RHS, Vst1),
-
-    Vst = update_type(fun meet/2, T2, LHS, Vst1),
-    update_type(fun meet/2, T1, RHS, Vst).
-
 %% Helper functions for the above.
 
 assign_1(Src, Dst, Vst0) ->
@@ -1873,10 +1903,6 @@ check_try_catch_tags(Type, {y,N}=Reg, Vst) ->
             ok
     end.
 
-is_reg_defined({x,_}=Reg, #vst{current=#st{xs=Xs}}) -> is_map_key(Reg, Xs);
-is_reg_defined({y,_}=Reg, #vst{current=#st{ys=Ys}}) -> is_map_key(Reg, Ys);
-is_reg_defined(V, #vst{}) -> error({not_a_register, V}).
-
 assert_term(Src, Vst) ->
     _ = get_term_type(Src, Vst),
     ok.
@@ -1904,43 +1930,43 @@ is_literal({integer,I}) when is_integer(I) -> true;
 is_literal({literal,_L}) -> true;
 is_literal(_) -> false.
 
-%% The possible types.
-%%
-%% First non-term types:
-%%
-%% initialized          Only for Y registers. Means that the Y register
-%%                      has been initialized with some valid term so that
-%%	                    it is safe to pass to the garbage collector.
-%%	                    NOT safe to use in any other way (will not crash the
-%%	                    emulator, but clearly points to a bug in the compiler).
-%%
-%% {catchtag,[Lbl]}     A special term used within a catch. Must only be used
-%%	                    by the catch instructions; NOT safe to use in other
-%%	                    instructions.
-%%
-%% {trytag,[Lbl]}       A special term used within a try block. Must only be
-%%	                    used by the catch instructions; NOT safe to use in other
-%%	                    instructions.
-%%
-%% #t_bs_context{}	    A match context for bit syntax matching. We do allow
-%%	                    it to moved/to from stack, but otherwise it must only
-%%	                    be accessed by bit syntax matching instructions.
+%% `dialyzer` complains about the float and general literal cases never being
+%% matched and I don't like suppressing warnings. Should they become possible
+%% I'm sure `dialyzer` will warn about it.
+value_to_literal([]) -> nil;
+value_to_literal(A) when is_atom(A) -> {atom,A};
+value_to_literal(I) when is_integer(I) -> {integer,I}.
+
+%% These are just wrappers around their equivalents in beam_types, which
+%% handle the validator-specific #t_abstract{} type.
 %%
-%% These are simple wrappers around
+%% The funny-looking abstract types produced here are intended to provoke
+%% errors on actual use; they do no harm just lying around.
+
+normalize(#t_abstract{}=A) -> error({abstract_type, A});
+normalize(T) -> beam_types:normalize(T).
 
-join(#t_abstract{}=Same, #t_abstract{}=Same) -> Same;
+join(Same, Same) -> Same;
+join(#t_abstract{}=A, B) -> #t_abstract{kind={join, A, B}};
+join(A, #t_abstract{}=B) -> #t_abstract{kind={join, A, B}};
 join(A, B) -> beam_types:join(A, B).
 
-meet(#t_abstract{}=Same, #t_abstract{}=Same) -> Same;
+meet(Same, Same) -> Same;
+meet(#t_abstract{}=A, B) -> #t_abstract{kind={meet, A, B}};
+meet(A, #t_abstract{}=B) -> #t_abstract{kind={meet, A, B}};
 meet(A, B) -> beam_types:meet(A, B).
 
+subtract(#t_abstract{}=A, B) -> #t_abstract{kind={subtract, A, B}};
+subtract(A, #t_abstract{}=B) -> #t_abstract{kind={subtract, A, B}};
 subtract(A, B) -> beam_types:subtract(A, B).
 
 assert_type(RequiredType, Term, Vst) ->
-    GivenType = get_term_type(Term, Vst),
+    GivenType = get_movable_term_type(Term, Vst),
     case meet(RequiredType, GivenType) of
-        GivenType -> ok;
-        _RequiredType -> error({bad_type,{needed,RequiredType},{actual,GivenType}})
+        GivenType ->
+            ok;
+        _RequiredType ->
+            error({bad_type,{needed,RequiredType},{actual,GivenType}})
     end.
 
 validate_src(Ss, Vst) when is_list(Ss) ->
@@ -1954,6 +1980,7 @@ validate_src(Ss, Vst) when is_list(Ss) ->
 get_term_type(Src, Vst) ->
     case get_movable_term_type(Src, Vst) of
         #t_bs_context{} -> error({match_context,Src});
+        #t_abstract{} -> error({abstract_term,Src});
         Type -> Type
     end.
 
@@ -1963,7 +1990,7 @@ get_term_type(Src, Vst) ->
 
 get_movable_term_type(Src, Vst) ->
     case get_raw_type(Src, Vst) of
-        #t_abstract{kind=tuple_in_progress=Kind} -> error({Kind,Src});
+        #t_abstract{kind=unfinished_tuple=Kind} -> error({Kind,Src});
         initialized -> error({unassigned,Src});
         uninitialized -> error({uninitialized_reg,Src});
         {catchtag,_} -> error({catchtag,Src});
@@ -2009,11 +2036,6 @@ get_raw_type(#value_ref{}=Ref, #vst{current=#st{vs=Vs}}) ->
 get_raw_type(Src, #vst{}) ->
     get_literal_type(Src).
 
-is_value_alive(#value_ref{}=Ref, #vst{current=#st{vs=Vs}}) ->
-    is_map_key(Ref, Vs);
-is_value_alive(_, _) ->
-    false.
-
 get_literal_type(nil) -> 
     beam_types:make_type_from_value([]);
 get_literal_type({atom,A}) when is_atom(A) -> 
@@ -2183,25 +2205,44 @@ merge_vrefs(RefA, RefB, Merge, Counter) ->
 
 merge_values(Merge, VsA, VsB) ->
     maps:fold(fun(Spec, New, Acc) ->
-                      merge_values_1(Spec, New, VsA, VsB, Acc)
+                      mv_1(Spec, New, VsA, VsB, Acc)
               end, #{}, Merge).
 
-merge_values_1(Same, Same, VsA, VsB, Acc) ->
+mv_1(Same, Same, VsA, VsB, Acc0) ->
     %% We're merging different versions of the same value, so it's safe to
     %% reuse old entries if the type's unchanged.
-    #value{type=TypeA}=EntryA = map_get(Same, VsA),
-    #value{type=TypeB}=EntryB = map_get(Same, VsB),
+    #value{type=TypeA,args=Args}=EntryA = map_get(Same, VsA),
+    #value{type=TypeB,args=Args}=EntryB = map_get(Same, VsB),
+
     Entry = case join(TypeA, TypeB) of
                 TypeA -> EntryA;
                 TypeB -> EntryB;
                 JoinedType -> EntryA#value{type=JoinedType}
             end,
-    Acc#{ Same => Entry };
-merge_values_1({RefA, RefB}, New, VsA, VsB, Acc) ->
+
+    Acc = Acc0#{ Same => Entry },
+
+    %% Type inference may depend on values that are no longer reachable from a
+    %% register, so all arguments must be merged into the new state.
+    mv_args(Args, VsA, VsB, Acc);
+mv_1({RefA, RefB}, New, VsA, VsB, Acc) ->
     #value{type=TypeA} = map_get(RefA, VsA),
     #value{type=TypeB} = map_get(RefB, VsB),
     Acc#{ New => #value{op=join,args=[],type=join(TypeA, TypeB)} }.
 
+mv_args([#value_ref{}=Arg | Args], VsA, VsB, Acc0) ->
+    case Acc0 of
+        #{ Arg := _ } ->
+            mv_args(Args, VsA, VsB, Acc0);
+        #{} ->
+            Acc = mv_1(Arg, Arg, VsA, VsB, Acc0),
+            mv_args(Args, VsA, VsB, Acc)
+    end;
+mv_args([_ | Args], VsA, VsB, Acc) ->
+    mv_args(Args, VsA, VsB, Acc);
+mv_args([], _VsA, _VsB, Acc) ->
+    Acc.
+
 merge_fragility(FragileA, FragileB) ->
     cerl_sets:union(FragileA, FragileB).
 
@@ -2361,7 +2402,7 @@ assert_not_fragile(Lit, #vst{}) ->
 %%%
 
 bif_types(Op, Ss, Vst) ->
-    Args = [get_term_type(Arg, Vst) || Arg <- Ss],
+    Args = [normalize(get_term_type(Arg, Vst)) || Arg <- Ss],
     beam_call_types:types(erlang, Op, Args).
 
 call_types({extfunc,M,F,A}, A, Vst) ->
@@ -2376,7 +2417,8 @@ get_call_args(Arity, Vst) ->
 get_call_args_1(Arity, Arity, _) ->
     [];
 get_call_args_1(N, Arity, Vst) when N < Arity ->
-    [get_movable_term_type({x,N}, Vst) | get_call_args_1(N + 1, Arity, Vst)].
+    ArgType = normalize(get_movable_term_type({x,N}, Vst)),
+    [ArgType | get_call_args_1(N + 1, Arity, Vst)].
 
 check_limit({x,X}=Src) when is_integer(X) ->
     if
diff --git a/lib/compiler/src/cerl_sets.erl b/lib/compiler/src/cerl_sets.erl
index f489baf238..84e488fc55 100644
--- a/lib/compiler/src/cerl_sets.erl
+++ b/lib/compiler/src/cerl_sets.erl
@@ -153,14 +153,21 @@ intersection1(S1, []) -> S1.
       Set1 :: set(Element),
       Set2 :: set(Element).
 
-is_disjoint(S1, S2) when map_size(S1) < map_size(S2) ->
-    fold(fun (_, false) -> false;
-	     (E, true) -> not is_element(E, S2)
-	 end, true, S1);
+is_disjoint(S1, S2) when map_size(S1) > map_size(S2) ->
+    is_disjoint_1(S1, maps:iterator(S2));
 is_disjoint(S1, S2) ->
-    fold(fun (_, false) -> false;
-	     (E, true) -> not is_element(E, S1)
-	 end, true, S2).
+    is_disjoint_1(S2, maps:iterator(S1)).
+
+is_disjoint_1(Set, Iter) ->
+    case maps:next(Iter) of
+        {K, _, NextIter} ->
+            case Set of
+                #{K := _} -> false;
+                #{} -> is_disjoint_1(Set, NextIter)
+            end;
+        none ->
+            true
+    end.
 
 %% subtract(Set1, Set2) -> Set.
 %%  Return all and only the elements of Set1 which are not also in
@@ -180,8 +187,21 @@ subtract(S1, S2) ->
       Set1 :: set(Element),
       Set2 :: set(Element).
 
+is_subset(S1, S2) when map_size(S1) > map_size(S2) ->
+    false;
 is_subset(S1, S2) ->
-    fold(fun (E, Sub) -> Sub andalso is_element(E, S2) end, true, S1).
+    is_subset_1(S2, maps:iterator(S1)).
+
+is_subset_1(Set, Iter) ->
+    case maps:next(Iter) of
+        {K, _, NextIter} ->
+            case Set of
+                #{K := _} -> is_subset_1(Set, NextIter);
+                #{} -> false
+            end;
+        none ->
+            true
+    end.
 
 %% fold(Fun, Accumulator, Set) -> Accumulator.
 %%  Fold function Fun over all elements in Set and return Accumulator.
@@ -193,8 +213,16 @@ is_subset(S1, S2) ->
       AccIn :: Acc,
       AccOut :: Acc.
 
-fold(F, Init, D) ->
-    lists:foldl(fun(E,Acc) -> F(E,Acc) end,Init,maps:keys(D)).
+fold(Fun, Init, Set) ->
+    fold_1(Fun, Init, maps:iterator(Set)).
+
+fold_1(Fun, Acc, Iter) ->
+    case maps:next(Iter) of
+        {K, _, NextIter} ->
+            fold_1(Fun, Fun(K,Acc), NextIter);
+        none ->
+            Acc
+    end.
 
 %% filter(Fun, Set) -> Set.
 %%  Filter Set with Fun.
@@ -203,5 +231,18 @@ fold(F, Init, D) ->
       Set1 :: set(Element),
       Set2 :: set(Element).
 
-filter(F, D) ->
-    maps:filter(fun(K,_) -> F(K) end, D).
+filter(Fun, Set) ->
+    maps:from_list(filter_1(Fun, maps:iterator(Set))).
+
+filter_1(Fun, Iter) ->
+    case maps:next(Iter) of
+        {K, _, NextIter} ->
+            case Fun(K) of
+                true ->
+                    [{K,ok} | filter_1(Fun, NextIter)];
+                false ->
+                    filter_1(Fun, NextIter)
+            end;
+        none ->
+            []
+    end.
diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl
index 42f9e8b902..21d67f5649 100644
--- a/lib/compiler/src/compile.erl
+++ b/lib/compiler/src/compile.erl
@@ -271,8 +271,11 @@ expand_opt(r22, Os) ->
     [no_shared_fun_wrappers, no_swap | Os];
 expand_opt({debug_info_key,_}=O, Os) ->
     [encrypt_debug_info,O|Os];
-expand_opt(no_type_opt, Os) ->
-    [no_ssa_opt_type_start,
+expand_opt(no_type_opt=O, Os) ->
+    %% Be sure to keep the no_type_opt option so that it will
+    %% be recorded in the BEAM file, allowing the test suites
+    %% to recompile the file with this option.
+    [O,no_ssa_opt_type_start,
      no_ssa_opt_type_continue,
      no_ssa_opt_type_finish | Os];
 expand_opt(O, Os) -> [O|Os].
diff --git a/lib/compiler/test/Makefile b/lib/compiler/test/Makefile
index 2c0767b17f..44cff40128 100644
--- a/lib/compiler/test/Makefile
+++ b/lib/compiler/test/Makefile
@@ -110,6 +110,8 @@ NO_MOD_OPT = $(NO_OPT)
 
 NO_SSA_OPT = $(NO_OPT)
 
+NO_TYPE_OPT = $(NO_OPT)
+
 NO_OPT_MODULES= $(NO_OPT:%=%_no_opt_SUITE)
 NO_OPT_ERL_FILES= $(NO_OPT_MODULES:%=%.erl)
 POST_OPT_MODULES= $(NO_OPT:%=%_post_opt_SUITE)
@@ -122,6 +124,8 @@ NO_MOD_OPT_MODULES= $(NO_MOD_OPT:%=%_no_module_opt_SUITE)
 NO_MOD_OPT_ERL_FILES= $(NO_MOD_OPT_MODULES:%=%.erl)
 NO_SSA_OPT_MODULES= $(NO_SSA_OPT:%=%_no_ssa_opt_SUITE)
 NO_SSA_OPT_ERL_FILES= $(NO_SSA_OPT_MODULES:%=%.erl)
+NO_TYPE_OPT_MODULES= $(NO_TYPE_OPT:%=%_no_type_opt_SUITE)
+NO_TYPE_OPT_ERL_FILES= $(NO_TYPE_OPT_MODULES:%=%.erl)
 
 ERL_FILES= $(MODULES:%=%.erl)
 CORE_FILES= $(CORE_MODULES:%=%.core)
@@ -151,7 +155,7 @@ EBIN = .
 # ----------------------------------------------------
 
 make_emakefile: $(NO_OPT_ERL_FILES) $(POST_OPT_ERL_FILES) $(NO_SSA_OPT_ERL_FILES) \
-  $(INLINE_ERL_FILES) $(R21_ERL_FILES) $(NO_MOD_OPT_ERL_FILES)
+  $(INLINE_ERL_FILES) $(R21_ERL_FILES) $(NO_MOD_OPT_ERL_FILES) $(NO_TYPE_OPT_ERL_FILES)
 	$(ERL_TOP)/make/make_emakefile $(ERL_COMPILE_FLAGS) -o$(EBIN) $(MODULES) \
 	> $(EMAKEFILE)
 	$(ERL_TOP)/make/make_emakefile +no_copt +no_postopt \
@@ -170,6 +174,8 @@ make_emakefile: $(NO_OPT_ERL_FILES) $(POST_OPT_ERL_FILES) $(NO_SSA_OPT_ERL_FILES
 	-o$(EBIN) $(NO_MOD_OPT_MODULES) >> $(EMAKEFILE)
 	$(ERL_TOP)/make/make_emakefile +from_core $(ERL_COMPILE_FLAGS) \
 	-o$(EBIN) $(CORE_MODULES) >> $(EMAKEFILE)
+	$(ERL_TOP)/make/make_emakefile +no_type_opt $(ERL_COMPILE_FLAGS) \
+	-o$(EBIN) $(NO_TYPE_OPT_MODULES) >> $(EMAKEFILE)
 
 tests debug opt: make_emakefile
 	erl $(ERL_MAKE_FLAGS) -make
@@ -203,6 +209,10 @@ docs:
 %_no_module_opt_SUITE.erl: %_SUITE.erl
 	sed -e 's;-module($(basename $<));-module($(basename $@));' $< > $@
 
+%_no_type_opt_SUITE.erl: %_SUITE.erl
+	sed -e 's;-module($(basename $<));-module($(basename $@));' $< > $@
+
+
 # ----------------------------------------------------
 # Release Target
 # ---------------------------------------------------- 
@@ -217,7 +227,8 @@ release_tests_spec: make_emakefile
 	$(INSTALL_DATA) $(NO_OPT_ERL_FILES) $(POST_OPT_ERL_FILES) \
 		$(INLINE_ERL_FILES) $(R21_ERL_FILES) \
 		$(NO_MOD_OPT_ERL_FILES) \
-		$(NO_SSA_OPT_ERL_FILES) "$(RELSYSDIR)"
+		$(NO_SSA_OPT_ERL_FILES) \
+	        $(NO_TYPE_OPT_ERL_FILES) "$(RELSYSDIR)"
 	$(INSTALL_DATA) $(CORE_FILES) "$(RELSYSDIR)"
 	for file in $(ERL_DUMMY_FILES); do \
 	    module=`basename $$file .erl`; \
diff --git a/lib/compiler/test/beam_ssa_SUITE.erl b/lib/compiler/test/beam_ssa_SUITE.erl
index 96cc846799..72e55ae9ec 100644
--- a/lib/compiler/test/beam_ssa_SUITE.erl
+++ b/lib/compiler/test/beam_ssa_SUITE.erl
@@ -23,7 +23,7 @@
 	 init_per_group/2,end_per_group/2,
          calls/1,tuple_matching/1,recv/1,maps/1,
          cover_ssa_dead/1,combine_sw/1,share_opt/1,
-         beam_ssa_dead_crash/1]).
+         beam_ssa_dead_crash/1,stack_init/1]).
 
 suite() -> [{ct_hooks,[ts_install_cth]}].
 
@@ -39,7 +39,8 @@ groups() ->
        cover_ssa_dead,
        combine_sw,
        share_opt,
-       beam_ssa_dead_crash
+       beam_ssa_dead_crash,
+       stack_init
       ]}].
 
 init_per_suite(Config) ->
@@ -190,6 +191,15 @@ recv(_Config) ->
     self() ! {[self(),r1],{2,99,<<"data">>}},
     {Parent,r1,<<1:32,2:8,99:8,"data">>} = tricky_recv_4(),
 
+    %% Test tricky_recv_5/0.
+    self() ! 1,
+    a = tricky_recv_5(),
+    self() ! 2,
+    b = tricky_recv_5(),
+
+    %% tricky_recv_6/0 is a compile-time error.
+    tricky_recv_6(),
+
     ok.
 
 sync_wait_mon({Pid, Ref}, Timeout) ->
@@ -295,6 +305,38 @@ tricky_recv_4() ->
 	end,
     id({Pid,R,Request}).
 
+%% beam_ssa_pre_codegen would accidentally create phi nodes on critical edges
+%% when fixing up receives; the call to id/2 can either succeed or land in the
+%% catch block, and we added a phi node to its immediate successor.
+tricky_recv_5() ->
+    try
+        receive
+            X=1 ->
+                id(42),
+                a;
+            X=2 ->
+                b
+        end,
+        case X of
+            1 -> a;
+            2 -> b
+        end
+    catch
+        _:_ -> c
+    end.
+
+%% When fixing tricky_recv_5, we introduced a compiler crash when the common
+%% exit block was ?EXCEPTION_BLOCK and floats were in the picture.
+tricky_recv_6() ->
+    RefA = make_ref(),
+    RefB = make_ref(),
+    receive
+        {RefA, Number} -> Number + 1.0;
+        {RefB, Number} -> Number + 2.0
+    after 0 ->
+        ok
+    end.
+
 maps(_Config) ->
     {'EXIT',{{badmatch,#{}},_}} = (catch maps_1(any)),
     ok.
@@ -443,9 +485,11 @@ do_comb_sw_2(X) ->
     erase(?MODULE).
 
 share_opt(_Config) ->
-    ok = do_share_opt(0).
+    ok = do_share_opt_1(0),
+    ok = do_share_opt_2(),
+    ok.
 
-do_share_opt(A) ->
+do_share_opt_1(A) ->
     %% The compiler would be stuck in an infinite loop in beam_ssa_share.
     case A of
         0 -> a;
@@ -454,6 +498,26 @@ do_share_opt(A) ->
     end,
     receive after 1 -> ok end.
 
+do_share_opt_2() ->
+    ok = sopt_2({[pointtopoint], [{dstaddr,any}]}, ok),
+    ok = sopt_2({[broadcast], [{broadaddr,any}]}, ok),
+    ok = sopt_2({[], []}, ok),
+    ok.
+
+sopt_2({Flags, Opts}, ok) ->
+    Broadcast = lists:member(broadcast, Flags),
+    P2P = lists:member(pointtopoint, Flags),
+    case Opts of
+        %% The following two clauses would be combined to one, silently
+        %% discarding the guard test of the P2P variable.
+        [{broadaddr,_}|Os] when Broadcast ->
+            sopt_2({Flags, Os}, ok);
+        [{dstaddr,_}|Os] when P2P ->
+            sopt_2({Flags, Os}, ok);
+        [] ->
+            ok
+    end.
+    
 beam_ssa_dead_crash(_Config) ->
     not_A_B = do_beam_ssa_dead_crash(id(false), id(true)),
     not_A_not_B = do_beam_ssa_dead_crash(false, false),
@@ -508,6 +572,30 @@ do_beam_ssa_dead_crash(A, B) ->
             end
     end.
 
+stack_init(_Config) ->
+    6 = stack_init(a, #{a => [1,2,3]}),
+    0 = stack_init(missing, #{}),
+    ok.
+
+stack_init(Key, Map) ->
+    %% beam_ssa_codegen would wrongly assume that y(0) would always be
+    %% initialized by the `get_map_elements` instruction that follows, and
+    %% would set up the stack frame using an `allocate` instruction and
+    %% would not generate an `init` instruction to initialize y(0).
+    Res = case Map of
+              #{Key := Elements} ->
+                  %% Elements will be assigned to y(0) if the key Key exists.
+                  lists:foldl(fun(El, Acc) ->
+                                      Acc + El
+                              end, 0, Elements);
+              #{} ->
+                  %% y(0) will be left uninitialized when the key is not
+                  %% present in the map.
+                  0
+          end,
+    %% y(0) would be uninitialized here if the key was not present in the map
+    %% (if the second clause was executed).
+    id(Res).
 
 %% The identity function.
 id(I) -> I.
diff --git a/lib/compiler/test/beam_type_SUITE.erl b/lib/compiler/test/beam_type_SUITE.erl
index 076a604aa4..a99dee48aa 100644
--- a/lib/compiler/test/beam_type_SUITE.erl
+++ b/lib/compiler/test/beam_type_SUITE.erl
@@ -24,7 +24,8 @@
 	 integers/1,numbers/1,coverage/1,booleans/1,setelement/1,
 	 cons/1,tuple/1,record_float/1,binary_float/1,float_compare/1,
 	 arity_checks/1,elixir_binaries/1,find_best/1,
-         test_size/1,cover_lists_functions/1,list_append/1,bad_binary_unit/1]).
+         test_size/1,cover_lists_functions/1,list_append/1,bad_binary_unit/1,
+         none_argument/1]).
 
 suite() -> [{ct_hooks,[ts_install_cth]}].
 
@@ -49,7 +50,8 @@ groups() ->
        test_size,
        cover_lists_functions,
        list_append,
-       bad_binary_unit
+       bad_binary_unit,
+       none_argument
       ]}].
 
 init_per_suite(Config) ->
@@ -518,5 +520,24 @@ bad_binary_unit(_Config) ->
     false = is_binary(Bitstring),
     ok.
 
+%% ERL-1013: The compiler would crash during the type optimization pass.
+none_argument(_Config) ->
+    Binary = id(<<3:16, 42>>),
+    error = id(case Binary of
+                   <<Len:16, Body/binary>> when length(Body) == Len - 2 ->
+                       %% The type for Body will be none. It means
+                       %% that this clause will never match and that
+                       %% uncompress/1 will never be called.
+                       uncompress(Body);
+                   _ ->
+                       error
+               end),
+    ok.
+
+uncompress(CompressedBinary) ->
+    %% The type for CompressedBinary is none, which beam_ssa_type
+    %% did not handle properly.
+    zlib:uncompress(CompressedBinary).
+
 id(I) ->
     I.
diff --git a/lib/compiler/test/beam_types_SUITE.erl b/lib/compiler/test/beam_types_SUITE.erl
index 297bd4026e..8e71a716cd 100644
--- a/lib/compiler/test/beam_types_SUITE.erl
+++ b/lib/compiler/test/beam_types_SUITE.erl
@@ -25,18 +25,32 @@
 -export([all/0, suite/0, groups/0,
          init_per_suite/1, end_per_suite/1]).
 
--export([consistency/1, commutativity/1,
-         binary_consistency/1, integer_consistency/1]).
+-export([absorption/1,
+         associativity/1,
+         commutativity/1,
+         idempotence/1,
+         identity/1]).
+
+-export([binary_absorption/1,
+         integer_absorption/1,
+         integer_associativity/1]).
 
 suite() ->
     [{ct_hooks,[ts_install_cth]}].
 
 all() ->
-    [{group,property_tests}].
+    [{group,property_tests},
+     binary_absorption,
+     integer_absorption,
+     integer_associativity].
 
 groups() ->
-    [{property_tests,[], [consistency, commutativity,
-                          binary_consistency, integer_consistency]}].
+    [{property_tests,[parallel],
+      [absorption,
+       associativity,
+       commutativity,
+       idempotence,
+       identity]}].
 
 init_per_suite(Config) ->
     ct_property_test:init_per_suite(Config).
@@ -44,15 +58,27 @@ init_per_suite(Config) ->
 end_per_suite(Config) ->
     Config.
 
-consistency(Config) when is_list(Config) ->
-    %% manual test: proper:quickcheck(beam_types_prop:consistency()).
-    true = ct_property_test:quickcheck(beam_types_prop:consistency(), Config).
+absorption(Config) when is_list(Config) ->
+    %% manual test: proper:quickcheck(beam_types_prop:absorption()).
+    true = ct_property_test:quickcheck(beam_types_prop:absorption(), Config).
+
+associativity(Config) when is_list(Config) ->
+    %% manual test: proper:quickcheck(beam_types_prop:associativity()).
+    true = ct_property_test:quickcheck(beam_types_prop:associativity(), Config).
 
 commutativity(Config) when is_list(Config) ->
     %% manual test: proper:quickcheck(beam_types_prop:commutativity()).
     true = ct_property_test:quickcheck(beam_types_prop:commutativity(), Config).
 
-binary_consistency(Config) when is_list(Config) ->
+idempotence(Config) when is_list(Config) ->
+    %% manual test: proper:quickcheck(beam_types_prop:idempotence()).
+    true = ct_property_test:quickcheck(beam_types_prop:idempotence(), Config).
+
+identity(Config) when is_list(Config) ->
+    %% manual test: proper:quickcheck(beam_types_prop:identity()).
+    true = ct_property_test:quickcheck(beam_types_prop:identity(), Config).
+
+binary_absorption(Config) when is_list(Config) ->
     %% These binaries should meet into {binary,12} as that's the best common
     %% unit for both types.
     A = #t_bitstring{unit=4},
@@ -66,15 +92,33 @@ binary_consistency(Config) when is_list(Config) ->
 
     ok.
 
-integer_consistency(Config) when is_list(Config) ->
-    %% Integers that don't overlap fully should never meet.
-    A = #t_integer{elements={3,5}},
-    B = #t_integer{elements={4,6}},
+integer_absorption(Config) when is_list(Config) ->
+    %% Integers that don't overlap at all should never meet.
+    A = #t_integer{elements={2,3}},
+    B = #t_integer{elements={4,5}},
 
     none = beam_types:meet(A, B),
-    #t_integer{elements={3,6}} = beam_types:join(A, B),
+    #t_integer{elements={2,5}} = beam_types:join(A, B),
 
     A = beam_types:meet(A, beam_types:join(A, B)),
     A = beam_types:join(A, beam_types:meet(A, B)),
 
     ok.
+
+integer_associativity(Config) when is_list(Config) ->
+    A = #t_integer{elements={3,5}},
+    B = #t_integer{elements={4,6}},
+    C = #t_integer{elements={5,5}},
+
+    %% a ∨ (b ∨ c) = (a ∨ b) ∨ c,
+    LHS_Join = beam_types:join(A, beam_types:join(B, C)),
+    RHS_Join = beam_types:join(beam_types:join(A, B), C),
+    #t_integer{elements={3,6}} = LHS_Join = RHS_Join,
+
+    %% a ∧ (b ∧ c) = (a ∧ b) ∧ c.
+    LHS_Meet = beam_types:meet(A, beam_types:meet(B, C)),
+    RHS_Meet = beam_types:meet(beam_types:meet(A, B), C),
+    #t_integer{elements={5,5}} = LHS_Meet = RHS_Meet,
+
+    ok.
+
diff --git a/lib/compiler/test/beam_validator_SUITE.erl b/lib/compiler/test/beam_validator_SUITE.erl
index 35dda9cc01..685e1a95a7 100644
--- a/lib/compiler/test/beam_validator_SUITE.erl
+++ b/lib/compiler/test/beam_validator_SUITE.erl
@@ -35,7 +35,7 @@
 	 map_field_lists/1,cover_bin_opt/1,
 	 val_dsetel/1,bad_tuples/1,bad_try_catch_nesting/1,
          receive_stacked/1,aliased_types/1,type_conflict/1,
-         infer_on_eq/1,infer_dead_value/1]).
+         infer_on_eq/1,infer_dead_value/1,infer_on_ne/1]).
 
 -include_lib("common_test/include/ct.hrl").
 
@@ -65,7 +65,7 @@ groups() ->
        map_field_lists,cover_bin_opt,val_dsetel,
        bad_tuples,bad_try_catch_nesting,
        receive_stacked,aliased_types,type_conflict,
-       infer_on_eq,infer_dead_value]}].
+       infer_on_eq,infer_dead_value,infer_on_ne]}].
 
 init_per_suite(Config) ->
     test_lib:recompile(?MODULE),
@@ -520,9 +520,9 @@ bad_tuples(Config) ->
      {{bad_tuples,long,2},
       {{put,{atom,too_long}},8,not_building_a_tuple}},
      {{bad_tuples,self_referential,1},
-      {{put,{x,1}},7,{tuple_in_progress,{x,1}}}},
+      {{put,{x,1}},7,{unfinished_tuple,{x,1}}}},
      {{bad_tuples,short,1},
-      {{move,{x,1},{x,0}},7,{tuple_in_progress,{x,1}}}}] = Errors,
+      {{move,{x,1},{x,0}},7,{unfinished_tuple,{x,1}}}}] = Errors,
 
     ok.
 
@@ -681,11 +681,16 @@ infer_on_eq_4(T) ->
 
 %% ERIERL-348; types were inferred for dead values, causing validation to fail.
 
+-record(idv, {key}).
+
 infer_dead_value(Config) when is_list(Config) ->
     a = idv_1({a, b, c, d, e, f, g}, {0, 0, 0, 0, 0, 0, 0}),
     b = idv_1({a, b, c, d, 0, 0, 0}, {a, b, c, d, 0, 0, 0}),
     c = idv_1({0, 0, 0, 0, 0, f, g}, {0, 0, 0, 0, 0, f, g}),
     error = idv_1(gurka, gaffel),
+
+    ok = idv_2(id(#idv{})),
+
     ok.
 
 idv_1({_A, _B, _C, _D, _E, _F, _G},
@@ -700,6 +705,42 @@ idv_1({_A, _B, _C, _D, _E, F, G},
 idv_1(_A, _B) ->
     error.
 
+%% ERL-998; type inference for select_val (#b_switch{}) was more clever than
+%% that for is_ne_exact (#b_br{}), sometimes failing validation when the type
+%% optimization pass acted on the former and the validator got the latter.
+
+-record(ion, {state}).
+
+infer_on_ne(Config) when is_list(Config) ->
+    #ion{state = closing} = ion_1(#ion{ state = id(open) }),
+    #ion{state = closing} = ion_close(#ion{ state = open }),
+    ok.
+
+ion_1(State = #ion{state = open}) -> ion_2(State);
+ion_1(State = #ion{state = closing}) -> ion_2(State).
+
+ion_2(State = #ion{state = open}) -> ion_close(State);
+ion_2(#ion{state = closing}) -> ok.
+
+ion_close(State = #ion{}) -> State#ion{state = closing}.
+
+%% ERL-995: The first solution to ERIERL-348 was incomplete and caused
+%% validation to fail when living values depended on delayed type inference on
+%% "dead" values.
+
+idv_2(State) ->
+    Flag = (State#idv.key == undefined),
+    case id(gurka) of
+        {_} -> id([Flag]);
+        _ -> ok
+    end,
+    if
+        Flag -> idv_called_once(State);
+        true -> ok
+    end.
+
+idv_called_once(_State) -> ok.
+
 %%%-------------------------------------------------------------------------
 
 transform_remove(Remove, Module) ->
diff --git a/lib/compiler/test/bs_match_SUITE.erl b/lib/compiler/test/bs_match_SUITE.erl
index d97f49c56e..145a50f4ad 100644
--- a/lib/compiler/test/bs_match_SUITE.erl
+++ b/lib/compiler/test/bs_match_SUITE.erl
@@ -44,7 +44,8 @@
          beam_bsm/1,guard/1,is_ascii/1,non_opt_eq/1,
          expression_before_match/1,erl_689/1,restore_on_call/1,
          restore_after_catch/1,matches_on_parameter/1,big_positions/1,
-         matching_meets_apply/1,bs_start_match2_defs/1]).
+         matching_meets_apply/1,bs_start_match2_defs/1,
+         exceptions_after_match_failure/1]).
 
 -export([coverage_id/1,coverage_external_ignore/2]).
 
@@ -80,7 +81,8 @@ groups() ->
        beam_bsm,guard,is_ascii,non_opt_eq,
        expression_before_match,erl_689,restore_on_call,
        matches_on_parameter,big_positions,
-       matching_meets_apply,bs_start_match2_defs]}].
+       matching_meets_apply,bs_start_match2_defs,
+       exceptions_after_match_failure]}].
 
 
 init_per_suite(Config) ->
@@ -2005,4 +2007,17 @@ do_matching_meets_apply(_Bin, {Handler, State}) ->
     %% Another case of the above.
     Handler:abs(State).
 
+%% Exception handling was broken on the failure path of bs_start_match as
+%% beam_ssa_bsm accidentally cloned and renamed the ?BADARG_BLOCK.
+exceptions_after_match_failure(_Config) ->
+    {'EXIT', {badarith, _}} = (catch do_exceptions_after_match_failure(atom)),
+    ok = do_exceptions_after_match_failure(<<0, 1, "gurka">>),
+    ok = do_exceptions_after_match_failure(2.0).
+
+do_exceptions_after_match_failure(<<_A, _B, "gurka">>) ->
+    ok;
+do_exceptions_after_match_failure(Other) ->
+    Other / 2.0,
+    ok.
+
 id(I) -> I.
diff --git a/lib/compiler/test/match_SUITE.erl b/lib/compiler/test/match_SUITE.erl
index aac9de278d..bc74ec4984 100644
--- a/lib/compiler/test/match_SUITE.erl
+++ b/lib/compiler/test/match_SUITE.erl
@@ -25,7 +25,8 @@
 	 match_in_call/1,untuplify/1,shortcut_boolean/1,letify_guard/1,
 	 selectify/1,deselectify/1,underscore/1,match_map/1,map_vars_used/1,
 	 coverage/1,grab_bag/1,literal_binary/1,
-         unary_op/1,eq_types/1,match_after_return/1,match_right_tuple/1]).
+         unary_op/1,eq_types/1,match_after_return/1,match_right_tuple/1,
+         tuple_size_in_try/1]).
 	 
 -include_lib("common_test/include/ct.hrl").
 
@@ -41,7 +42,8 @@ groups() ->
        shortcut_boolean,letify_guard,selectify,deselectify,
        underscore,match_map,map_vars_used,coverage,
        grab_bag,literal_binary,unary_op,eq_types,
-       match_after_return,match_right_tuple]}].
+       match_after_return,match_right_tuple,
+       tuple_size_in_try]}].
 
 
 init_per_suite(Config) ->
@@ -922,4 +924,19 @@ match_right_tuple_1(T) ->
 
 force_succ_regs(_A, B) -> B.
 
+tuple_size_in_try(Config) when is_list(Config) ->
+    %% The tuple_size optimization was applied outside of guards, causing
+    %% either the emulator or compiler to crash.
+    ok = tsit(gurka),
+    ok = tsit(gaffel).
+
+tsit(A) ->
+    try
+        id(ignored),
+        1 = tuple_size(A),
+        error
+    catch
+        _:_ -> ok
+    end.
+
 id(I) -> I.
diff --git a/lib/compiler/test/property_test/beam_types_prop.erl b/lib/compiler/test/property_test/beam_types_prop.erl
index 9c103d3886..8199d1bd5a 100644
--- a/lib/compiler/test/property_test/beam_types_prop.erl
+++ b/lib/compiler/test/property_test/beam_types_prop.erl
@@ -22,8 +22,8 @@
 
 -compile([export_all, nowarn_export_all]).
 
-%% This module has only supports proper, as we don't have an eqc license to
-%% test with.
+%% This module only supports proper, as we don't have an eqc license to test
+%% with.
 
 -proptest([proper]).
 
@@ -34,33 +34,99 @@
 -include_lib("proper/include/proper.hrl").
 -define(MOD_eqc,proper).
 
-consistency() ->
+%% The default repetitions of 100 is a bit too low to reliably cover all type
+%% combinations, so we crank it up a bit.
+-define(REPETITIONS, 1000).
+
+absorption() ->
+    numtests(?REPETITIONS, absorption_1()).
+
+absorption_1() ->
     ?FORALL({TypeA, TypeB},
             ?LET(TypeA, type(),
                  ?LET(TypeB, type(), {TypeA, TypeB})),
-            consistency_check(TypeA, TypeB)).
+            absorption_check(TypeA, TypeB)).
+
+absorption_check(A, B) ->
+    %% a ∨ (a ∧ b) = a,
+    A = join(A, meet(A, B)),
+
+    %% a ∧ (a ∨ b) = a.
+    A = meet(A, join(A, B)),
 
-consistency_check(A, B) ->
-    consistency_check_1(A, B),
-    consistency_check_1(B, A),
     true.
 
-consistency_check_1(A, B) ->
-    A = beam_types:meet(A, beam_types:join(A, B)),
-    A = beam_types:join(A, beam_types:meet(A, B)),
-    ok.
+associativity() ->
+    numtests(?REPETITIONS, associativity_1()).
+
+associativity_1() ->
+    ?FORALL({TypeA, TypeB, TypeC},
+            ?LET(TypeA, type(),
+                 ?LET(TypeB, type(),
+                      ?LET(TypeC, type(), {TypeA, TypeB, TypeC}))),
+            associativity_check(TypeA, TypeB, TypeC)).
+
+associativity_check(A, B, C) ->
+    %% a ∨ (b ∨ c) = (a ∨ b) ∨ c,
+    LHS_Join = join(A, join(B, C)),
+    RHS_Join = join(join(A, B), C),
+    LHS_Join = RHS_Join,
+
+    %% a ∧ (b ∧ c) = (a ∧ b) ∧ c.
+    LHS_Meet = meet(A, meet(B, C)),
+    RHS_Meet = meet(meet(A, B), C),
+    LHS_Meet = RHS_Meet,
+
+    true.
 
 commutativity() ->
+    numtests(?REPETITIONS, commutativity_1()).
+
+commutativity_1() ->
     ?FORALL({TypeA, TypeB},
             ?LET(TypeA, type(),
                  ?LET(TypeB, type(), {TypeA, TypeB})),
-            commutativity_check(TypeA, TypeB)).
+             commutativity_check(TypeA, TypeB)).
 
 commutativity_check(A, B) ->
-    true = beam_types:meet(A, B) =:= beam_types:meet(B, A),
-    true = beam_types:join(A, B) =:= beam_types:join(B, A),
+    %% a ∨ b = b ∨ a,
+    true = join(A, B) =:= join(B, A),
+
+    %% a ∧ b = b ∧ a.
+    true = meet(A, B) =:= meet(B, A),
+
+    true.
+
+idempotence() ->
+    numtests(?REPETITIONS, idempotence_1()).
+
+idempotence_1() ->
+    ?FORALL(Type, type(), idempotence_check(Type)).
+
+idempotence_check(Type) ->
+    %% a ∨ a = a,
+    Type = join(Type, Type),
+
+    %% a ∧ a = a.
+    Type = meet(Type, Type),
+
+    true.
+
+identity() ->
+    ?FORALL(Type, type(), identity_check(Type)).
+
+identity_check(Type) ->
+    %% a ∨ [bottom element] = a,
+    Type = join(Type, none),
+
+    %% a ∧ [top element] = a.
+    Type = meet(Type, any),
+
     true.
 
+meet(A, B) -> beam_types:meet(A, B).
+join(A, B) -> beam_types:join(A, B).
+
 %%%
 %%% Generators
 %%%
@@ -75,7 +141,10 @@ type(Depth) ->
               other_types()).
 
 other_types() ->
-    [any, gen_atom(), gen_binary(), none].
+    [any,
+     gen_atom(),
+     gen_binary(),
+     none].
 
 list_types() ->
     [cons, list, nil].
@@ -83,8 +152,8 @@ list_types() ->
 numerical_types() ->
     [gen_integer(), float, number].
 
-nested_types(Depth) when Depth >= 3 -> [];
-nested_types(Depth) -> [#t_map{}, gen_tuple(Depth + 1)].
+nested_types(Depth) when Depth >= 3 -> [none];
+nested_types(Depth) -> [#t_map{}, gen_union(Depth + 1), gen_tuple(Depth + 1)].
 
 gen_atom() ->
     ?LET(Size, range(0, ?ATOM_SET_SIZE),
@@ -92,35 +161,54 @@ gen_atom() ->
              0 ->
                  #t_atom{};
              _ ->
-                 ?LET(Set, sized_list(Size, atom()),
+                 ?LET(Set, sized_list(Size, gen_atom_val()),
                       begin
                           #t_atom{elements=ordsets:from_list(Set)}
                       end)
          end).
 
+gen_atom_val() ->
+    ?LET(N, range($0, $~), list_to_atom([N])).
+
 gen_binary() ->
-    ?SHRINK(#t_bitstring{unit=range(1, 128)},
-            [#t_bitstring{unit=[1, 2, 3, 5, 7, 8, 16, 32, 64]}]).
+    ?SHRINK(#t_bitstring{unit=range(1, 128)}, [#t_bitstring{unit=1}]).
 
 gen_integer() ->
     oneof([gen_integer_bounded(), #t_integer{}]).
 
 gen_integer_bounded() ->
-    ?LET(A, integer(),
-         ?LET(B, integer(),
-              begin
-                  #t_integer{elements={min(A,B), max(A,B)}}
-              end)).
+    ?LET({A, B}, {integer(), integer()},
+         begin
+             #t_integer{elements={min(A,B), max(A,B)}}
+         end).
 
 gen_tuple(Depth) ->
     ?SIZED(Size,
-           ?LET(Exact, oneof([true, false]),
-                ?LET(Elements, gen_tuple_elements(Size, Depth),
-                     begin
-                         #t_tuple{exact=Exact,
-                                  size=Size,
-                                  elements=Elements}
-                     end))).
+           ?LET({Exact, Elements}, {boolean(), gen_tuple_elements(Size, Depth)},
+                begin
+                    #t_tuple{exact=Exact,
+                             size=Size,
+                             elements=Elements}
+                end)).
+
+gen_union(Depth) ->
+    ?LAZY(oneof([gen_wide_union(Depth), gen_tuple_union(Depth)])).
+
+gen_wide_union(Depth) ->
+    ?LET({A, B, C, D}, {oneof(nested_types(Depth)),
+                        oneof(numerical_types()),
+                        oneof(list_types()),
+                        oneof(other_types())},
+          begin
+              T0 = join(A, B),
+              T1 = join(T0, C),
+              join(T1, D)
+          end).
+
+gen_tuple_union(Depth) ->
+    ?SIZED(Size,
+           ?LET(Tuples, sized_list(Size, gen_tuple(Depth)),
+                lists:foldl(fun join/2, none, Tuples))).
 
 gen_tuple_elements(Size, Depth) ->
     ?LET(Types, sized_list(rand:uniform(Size div 4 + 1), gen_element(Depth)),
diff --git a/lib/compiler/test/test_lib.erl b/lib/compiler/test/test_lib.erl
index a468482acb..4b68e663cd 100644
--- a/lib/compiler/test/test_lib.erl
+++ b/lib/compiler/test/test_lib.erl
@@ -99,7 +99,8 @@ get_data_dir(Config) ->
     Data2 = re:replace(Data1, "_post_opt_SUITE", "_SUITE", Opts),
     Data3 = re:replace(Data2, "_inline_SUITE", "_SUITE", Opts),
     Data4 = re:replace(Data3, "_r21_SUITE", "_SUITE", Opts),
-    Data = re:replace(Data4, "_no_module_opt_SUITE", "_SUITE", Opts),
+    Data5 = re:replace(Data4, "_no_module_opt_SUITE", "_SUITE", Opts),
+    Data = re:replace(Data5, "_no_type_opt_SUITE", "_SUITE", Opts),
     re:replace(Data, "_no_ssa_opt_SUITE", "_SUITE", Opts).
 
 is_cloned_mod(Mod) ->
diff --git a/lib/compiler/vsn.mk b/lib/compiler/vsn.mk
index 508bbc902c..7192ddca15 100644
--- a/lib/compiler/vsn.mk
+++ b/lib/compiler/vsn.mk
@@ -1 +1 @@
-COMPILER_VSN = 7.4.2
+COMPILER_VSN = 7.4.4