1 files changed, 292 insertions, 42 deletions
diff --git a/lib/compiler/src/beam_ssa_codegen.erl b/lib/compiler/src/beam_ssa_codegen.erl
index 357352269c..c2d5035b19 100644
--- a/lib/compiler/src/beam_ssa_codegen.erl
+++ b/lib/compiler/src/beam_ssa_codegen.erl
@@ -108,7 +108,8 @@ module(#b_module{name=Mod,exports=Es,attributes=Attrs,body=Fs}, _Opts) ->
 -type ssa_register() :: xreg() | yreg() | {'fr',reg_num()} | {'z',reg_num()}.
 
 functions(Forms, AtomMod) ->
-    mapfoldl(fun (F, St) -> function(F, AtomMod, St) end, #cg{lcount=1}, Forms).
+    mapfoldl(fun (F, St) -> function(F, AtomMod, St) end,
+             #cg{lcount=1}, Forms).
 
 function(#b_function{anno=Anno,bs=Blocks}, AtomMod, St0) ->
     #{func_info:={_,Name,Arity}} = Anno,
@@ -125,8 +126,9 @@ function(#b_function{anno=Anno,bs=Blocks}, AtomMod, St0) ->
                      ultimate_fail=Ult},
         {Body,St} = cg_fun(Blocks, St5),
         Asm = [{label,Fi},line(Anno),
-               {func_info,AtomMod,{atom,Name},Arity}] ++ Body ++
-            [{label,Ult},if_end],
+               {func_info,AtomMod,{atom,Name},Arity}] ++
+               add_parameter_annos(Body, Anno) ++
+               [{label,Ult},if_end],
         Func = {function,Name,Arity,Entry,Asm},
         {Func,St}
     catch
@@ -150,6 +152,17 @@ assert_badarg_block(Blocks) ->
             ok
     end.
 
+add_parameter_annos([{label, _}=Entry | Body], Anno) ->
+    ParamInfo = maps:get(parameter_type_info, Anno, #{}),
+    Annos = maps:fold(
+        fun(K, V, Acc) when is_map_key(K, ParamInfo) ->
+                TypeInfo = maps:get(K, ParamInfo),
+                [{'%', {type_info, V, TypeInfo}} | Acc];
+           (_K, _V, Acc) ->
+                Acc
+        end, [], maps:get(registers, Anno)),
+    [Entry | sort(Annos)] ++ Body.
+
 cg_fun(Blocks, St0) ->
     Linear0 = linearize(Blocks),
     St = collect_catch_labels(Linear0, St0),
@@ -218,7 +231,7 @@ need_heap_never(_) -> false.
 
 need_heap_blks([{L,#cg_blk{is=Is0}=Blk0}|Bs], H0, Acc) ->
     {Is1,H1} = need_heap_is(reverse(Is0), H0, []),
-    {Ns,H} = need_heap_terminator(Bs, H1),
+    {Ns,H} = need_heap_terminator(Bs, L, H1),
     Is = Ns ++ Is1,
     Blk = Blk0#cg_blk{is=Is},
     need_heap_blks(Bs, H, [{L,Blk}|Acc]);
@@ -228,6 +241,13 @@ need_heap_blks([], H, Acc) ->
 need_heap_is([#cg_alloc{words=Words}=Alloc0|Is], N, Acc) ->
     Alloc = Alloc0#cg_alloc{words=add_heap_words(N, Words)},
     need_heap_is(Is, #need{}, [Alloc|Acc]);
+need_heap_is([#cg_set{anno=Anno,op=bs_init}=I0|Is], N, Acc) ->
+    Alloc = case need_heap_need(N) of
+                [#cg_alloc{words=Need}] -> alloc(Need);
+                [] -> 0
+            end,
+    I = I0#cg_set{anno=Anno#{alloc=>Alloc}},
+    need_heap_is(Is, #need{}, [I|Acc]);
 need_heap_is([#cg_set{op=Op,args=Args}=I|Is], N, Acc) ->
     case classify_heap_need(Op, Args) of
         {put,Words} ->
@@ -243,11 +263,31 @@ need_heap_is([#cg_set{op=Op,args=Args}=I|Is], N, Acc) ->
 need_heap_is([], N, Acc) ->
     {Acc,N}.
 
-need_heap_terminator([{_,#cg_blk{last=#cg_br{succ=Same,fail=Same}}}|_], N) ->
+need_heap_terminator([{_,#cg_blk{last=#cg_br{succ=L,fail=L}}}|_], L, N) ->
+    %% Fallthrough.
     {[],N};
-need_heap_terminator([{_,#cg_blk{}}|_], N) ->
+need_heap_terminator([{_,#cg_blk{is=Is,last=#cg_br{succ=L}}}|_], L, N) ->
+    case need_heap_need(N) of
+        [] ->
+            {[],#need{}};
+        [_|_]=Alloc ->
+            %% If the preceding instructions are a binary construction,
+            %% hoist the allocation and incorporate into the bs_init
+            %% instruction.
+            case reverse(Is) of
+                [#cg_set{op=succeeded},#cg_set{op=bs_init}|_] ->
+                    {[],N};
+                [#cg_set{op=bs_put}|_] ->
+                    {[],N};
+                _ ->
+                    %% Not binary construction. Must emit an allocation
+                    %% instruction in this block.
+                    {Alloc,#need{}}
+            end
+    end;
+need_heap_terminator([{_,#cg_blk{}}|_], _, N) ->
     {need_heap_need(N),#need{}};
-need_heap_terminator([], H) ->
+need_heap_terminator([], _, H) ->
     {need_heap_need(H),#need{}}.
 
 need_heap_need(#need{h=0,f=0}) -> [];
@@ -286,6 +326,8 @@ classify_heap_need(put_list, _) ->
     {put,2};
 classify_heap_need(put_tuple_arity, [#b_literal{val=Words}]) ->
     {put,Words+1};
+classify_heap_need(put_tuple, Elements) ->
+    {put,length(Elements)+1};
 classify_heap_need({bif,Name}, Args) ->
     case is_gc_bif(Name, Args) of
         false -> neutral;
@@ -313,12 +355,15 @@ classify_heap_need(Name, _Args) ->
 
 classify_heap_need(bs_add) -> gc;
 classify_heap_need(bs_get) -> gc;
+classify_heap_need(bs_get_tail) -> gc;
 classify_heap_need(bs_init) -> gc;
 classify_heap_need(bs_init_writable) -> gc;
 classify_heap_need(bs_match_string) -> gc;
 classify_heap_need(bs_put) -> neutral;
 classify_heap_need(bs_restore) -> neutral;
 classify_heap_need(bs_save) -> neutral;
+classify_heap_need(bs_get_position) -> gc;
+classify_heap_need(bs_set_position) -> neutral;
 classify_heap_need(bs_skip) -> gc;
 classify_heap_need(bs_start_match) -> neutral;
 classify_heap_need(bs_test_tail) -> neutral;
@@ -327,7 +372,6 @@ classify_heap_need(bs_utf8_size) -> neutral;
 classify_heap_need(build_stacktrace) -> gc;
 classify_heap_need(call) -> gc;
 classify_heap_need(catch_end) -> gc;
-classify_heap_need(context_to_binary) -> gc;
 classify_heap_need(copy) -> neutral;
 classify_heap_need(extract) -> gc;
 classify_heap_need(get_hd) -> neutral;
@@ -590,8 +634,7 @@ liveness_successors(Terminator) ->
 liveness_is([#cg_alloc{}=I0|Is], Regs, Live, Acc) ->
     I = I0#cg_alloc{live=num_live(Live, Regs)},
     liveness_is(Is, Regs, Live, [I|Acc]);
-liveness_is([#cg_set{dst=Dst0,args=Args}=I0|Is], Regs, Live0, Acc) ->
-    #b_var{name=Dst} = Dst0,
+liveness_is([#cg_set{dst=Dst,args=Args}=I0|Is], Regs, Live0, Acc) ->
     Live1 = liveness_clobber(I0, Live0, Regs),
     I1 = liveness_yregs_anno(I0, Live1, Regs),
     Live2 = liveness_args(Args, Live1),
@@ -608,7 +651,7 @@ liveness_terminator(#cg_switch{arg=Arg}, Live) ->
 liveness_terminator(#cg_ret{arg=Arg}, Live) ->
     liveness_terminator_1(Arg, Live).
 
-liveness_terminator_1(#b_var{name=V}, Live) ->
+liveness_terminator_1(#b_var{}=V, Live) ->
     ordsets:add_element(V, Live);
 liveness_terminator_1(#b_literal{}, Live) ->
     Live;
@@ -616,7 +659,7 @@ liveness_terminator_1(Reg, Live) ->
     _ = verify_beam_register(Reg),
     ordsets:add_element(Reg, Live).
 
-liveness_args([#b_var{name=V}|As], Live) ->
+liveness_args([#b_var{}=V|As], Live) ->
     liveness_args(As, ordsets:add_element(V, Live));
 liveness_args([#b_remote{mod=Mod,name=Name}|As], Live) ->
     liveness_args([Mod,Name|As], Live);
@@ -639,7 +682,7 @@ liveness_anno(#cg_set{op=Op}=I, Live, Regs) ->
             I
     end.
 
-liveness_yregs_anno(#cg_set{op=Op,dst=#b_var{name=Dst}}=I, Live0, Regs) ->
+liveness_yregs_anno(#cg_set{op=Op,dst=Dst}=I, Live0, Regs) ->
     case need_live_anno(Op) of
         true ->
             Live = ordsets:del_element(Dst, Live0),
@@ -694,6 +737,8 @@ need_live_anno(Op) ->
         {bif,_} -> true;
         bs_get -> true;
         bs_init -> true;
+        bs_get_position -> true;
+        bs_get_tail -> true;
         bs_start_match -> true;
         bs_skip -> true;
         call -> true;
@@ -702,7 +747,16 @@ need_live_anno(Op) ->
     end.
 
 %%%
-%%% Add annotations for defined Y registers.
+%%% Add the following annotations for Y registers:
+%%%
+%%%   def_yregs   An ordset with variables that refer to live Y registers.
+%%%               That is, Y registers that that have been killed
+%%%               are not included. This annotation is added to all
+%%%               instructions that require Y registers to be initialized.
+%%%
+%%%   kill_yregs  This annotation is added to call instructions. It is
+%%%               an ordset containing variables referring to Y registers
+%%%               that will no longer be used after the call instruction.
 %%%
 
 defined(Linear, #cg{regs=Regs}) ->
@@ -726,13 +780,13 @@ def_get(L, DefMap) ->
 def_is([#cg_alloc{anno=Anno0}=I0|Is], Regs, Def, Acc) ->
     I = I0#cg_alloc{anno=Anno0#{def_yregs=>Def}},
     def_is(Is, Regs, Def, [I|Acc]);
-def_is([#cg_set{op=kill_try_tag,args=[#b_var{name=Tag}]}=I|Is], Regs, Def0, Acc) ->
+def_is([#cg_set{op=kill_try_tag,args=[#b_var{}=Tag]}=I|Is], Regs, Def0, Acc) ->
     Def = ordsets:del_element(Tag, Def0),
     def_is(Is, Regs, Def, [I|Acc]);
-def_is([#cg_set{op=catch_end,args=[#b_var{name=Tag}|_]}=I|Is], Regs, Def0, Acc) ->
+def_is([#cg_set{op=catch_end,args=[#b_var{}=Tag|_]}=I|Is], Regs, Def0, Acc) ->
     Def = ordsets:del_element(Tag, Def0),
     def_is(Is, Regs, Def, [I|Acc]);
-def_is([#cg_set{anno=Anno0,op=call,dst=#b_var{name=Dst}}=I0|Is],
+def_is([#cg_set{anno=Anno0,op=call,dst=Dst}=I0|Is],
        Regs, Def0, Acc) ->
     #{live_yregs:=LiveYregVars} = Anno0,
     LiveRegs = gb_sets:from_list([maps:get(V, Regs) || V <- LiveYregVars]),
@@ -747,7 +801,7 @@ def_is([#cg_set{anno=Anno0,op=call,dst=#b_var{name=Dst}}=I0|Is],
     Def1 = ordsets:subtract(Def0, Kill),
     Def = def_add_yreg(Dst, Def1, Regs),
     def_is(Is, Regs, Def, [I|Acc]);
-def_is([#cg_set{anno=Anno0,op={bif,Bif},dst=#b_var{name=Dst},args=Args}=I0|Is],
+def_is([#cg_set{anno=Anno0,op={bif,Bif},dst=Dst,args=Args}=I0|Is],
        Regs, Def0, Acc) ->
     Arity = length(Args),
     I = case is_gc_bif(Bif, Args) orelse not erl_bifs:is_safe(erlang, Bif, Arity) of
@@ -758,7 +812,7 @@ def_is([#cg_set{anno=Anno0,op={bif,Bif},dst=#b_var{name=Dst},args=Args}=I0|Is],
         end,
     Def = def_add_yreg(Dst, Def0, Regs),
     def_is(Is, Regs, Def, [I|Acc]);
-def_is([#cg_set{anno=Anno0,dst=#b_var{name=Dst}}=I0|Is], Regs, Def0, Acc) ->
+def_is([#cg_set{anno=Anno0,dst=Dst}=I0|Is], Regs, Def0, Acc) ->
     I = case need_y_init(I0) of
             true ->
                 I0#cg_set{anno=Anno0#{def_yregs=>Def0}};
@@ -793,6 +847,8 @@ def_successors([], _, DefMap) -> DefMap.
 
 need_y_init(#cg_set{anno=#{clobbers:=Clobbers}}) -> Clobbers;
 need_y_init(#cg_set{op=bs_get}) -> true;
+need_y_init(#cg_set{op=bs_get_position}) -> true;
+need_y_init(#cg_set{op=bs_get_tail}) -> true;
 need_y_init(#cg_set{op=bs_init}) -> true;
 need_y_init(#cg_set{op=bs_skip,args=[#b_literal{val=Type}|_]}) ->
     case Type of
@@ -816,13 +872,35 @@ opt_allocate(Linear, #cg{regs=Regs}) ->
 
 opt_allocate_1([{L,#cg_blk{is=[#cg_alloc{stack=Stk}=I0|Is]}=Blk0}|Bs]=Bs0, Regs)
   when is_integer(Stk) ->
-    Yregs = opt_alloc_def(Bs0, gb_sets:singleton(L), []),
-    I = I0#cg_alloc{def_yregs=Yregs},
-    [{L,Blk0#cg_blk{is=[I|Is]}}|opt_allocate_1(Bs, Regs)];
+    %% Collect the variables that are initialized by copy
+    %% instruction in this block.
+    case ordsets:from_list(opt_allocate_defs(Is, Regs)) of
+        Yregs when length(Yregs) =:= Stk ->
+            %% Those copy instructions are sufficient to fully
+            %% initialize the stack frame.
+            I = I0#cg_alloc{def_yregs=Yregs},
+            [{L,Blk0#cg_blk{is=[I|Is]}}|opt_allocate_1(Bs, Regs)];
+        Yregs0 ->
+            %% Determine a conservative approximation of the Y
+            %% registers that are guaranteed to be initialized by all
+            %% successors of this block, and to it add the variables
+            %% initialized by copy instructions in this block.
+            Yregs1 = opt_alloc_def(Bs0, gb_sets:singleton(L), []),
+            Yregs = ordsets:union(Yregs0, Yregs1),
+            I = I0#cg_alloc{def_yregs=Yregs},
+            [{L,Blk0#cg_blk{is=[I|Is]}}|opt_allocate_1(Bs, Regs)]
+    end;
 opt_allocate_1([B|Bs], Regs) ->
     [B|opt_allocate_1(Bs, Regs)];
 opt_allocate_1([], _) -> [].
 
+opt_allocate_defs([#cg_set{op=copy,dst=Dst}|Is], Regs) ->
+    case is_yreg(Dst, Regs) of
+        true -> [Dst|opt_allocate_defs(Is, Regs)];
+        false -> []
+    end;
+opt_allocate_defs(_, _Regs) -> [].
+
 opt_alloc_def([{L,#cg_blk{is=Is,last=Last}}|Bs], Ws0, Def0) ->
     case gb_sets:is_member(L, Ws0) of
         false ->
@@ -993,8 +1071,8 @@ cg_block([#cg_set{op={bif,Name},dst=Dst0,args=Args0}]=Is0, {Dst0,Fail}, St0) ->
         {z,_} ->
             %% The result of the BIF call will only be used once. Convert to
             %% a test instruction.
-            Test = bif_to_test(Name, Args, ensure_label(Fail, St0)),
-            {Test,St0};
+            {Test,St1} = bif_to_test(Name, Args, ensure_label(Fail, St0), St0),
+            {Test,St1};
         _ ->
             %% Must explicitly call the BIF since the result will be used
             %% more than once.
@@ -1021,12 +1099,13 @@ cg_block([#cg_set{op=bs_init,dst=Dst0,args=Args0,anno=Anno}=I,
           #cg_set{op=succeeded,dst=Bool}], {Bool,Fail0}, St) ->
     Fail = bif_fail(Fail0),
     Line = line(Anno),
+    Alloc = map_get(alloc, Anno),
     [#b_literal{val=Kind}|Args1] = Args0,
     case Kind of
         new ->
             [Dst,Size,{integer,Unit}] = beam_args([Dst0|Args1], St),
             Live = get_live(I),
-            {[Line|cg_bs_init(Dst, Size, Unit, Live, Fail)],St};
+            {[Line|cg_bs_init(Dst, Size, Alloc, Unit, Live, Fail)],St};
         private_append ->
             [Dst,Src,Bits,{integer,Unit}] = beam_args([Dst0|Args1], St),
             Flags = {field_flags,[]},
@@ -1036,17 +1115,23 @@ cg_block([#cg_set{op=bs_init,dst=Dst0,args=Args0,anno=Anno}=I,
             [Dst,Src,Bits,{integer,Unit}] = beam_args([Dst0|Args1], St),
             Flags = {field_flags,[]},
             Live = get_live(I),
-            Is = [Line,{bs_append,Fail,Bits,0,Live,Unit,Src,Flags,Dst}],
+            Is = [Line,{bs_append,Fail,Bits,Alloc,Live,Unit,Src,Flags,Dst}],
             {Is,St}
     end;
 cg_block([#cg_set{anno=Anno,op=bs_start_match,dst=Ctx0,args=[Bin0]}=I,
           #cg_set{op=succeeded,dst=Bool}], {Bool,Fail}, St) ->
-    #{num_slots:=Slots} = Anno,
     [Dst,Bin1] = beam_args([Ctx0,Bin0], St),
     {Bin,Pre} = force_reg(Bin1, Dst),
     Live = get_live(I),
-    Is = Pre ++ [{test,bs_start_match2,Fail,Live,[Bin,Slots],Dst}],
-    {Is,St};
+    %% num_slots is only set when using the old instructions.
+    case maps:find(num_slots, Anno) of
+        {ok, Slots} ->
+            Is = Pre ++ [{test,bs_start_match2,Fail,Live,[Bin,Slots],Dst}],
+            {Is,St};
+        error ->
+            Is = Pre ++ [{test,bs_start_match3,Fail,Live,[Bin],Dst}],
+            {Is,St}
+    end;
 cg_block([#cg_set{op=bs_get}=Set,
           #cg_set{op=succeeded,dst=Bool}], {Bool,Fail}, St) ->
     {cg_bs_get(Fail, Set, St),St};
@@ -1178,18 +1263,119 @@ cg_copy_1([#cg_set{dst=Dst0,args=Args}|T], St) ->
     end;
 cg_copy_1([], _St) -> [].
 
+-define(IS_LITERAL(Val), (Val =:= nil orelse
+                          element(1, Val) =:= integer orelse
+                          element(1, Val) =:= float orelse
+                          element(1, Val) =:= atom orelse
+                          element(1, Val) =:= literal)).
+
+bif_to_test('or', [V1,V2], {f,Lbl}=Fail, St0) when Lbl =/= 0 ->
+    {SuccLabel,St} = new_label(St0),
+    {[{test,is_eq_exact,{f,SuccLabel},[V1,{atom,false}]},
+      {test,is_eq_exact,Fail,[V2,{atom,true}]},
+      {label,SuccLabel}],St};
+bif_to_test(Op, Args, Fail, St) ->
+    {bif_to_test(Op, Args, Fail),St}.
+
+bif_to_test('and', [V1,V2], Fail) ->
+    [{test,is_eq_exact,Fail,[V1,{atom,true}]},
+     {test,is_eq_exact,Fail,[V2,{atom,true}]}];
 bif_to_test('not', [Var], Fail) ->
     [{test,is_eq_exact,Fail,[Var,{atom,false}]}];
 bif_to_test(Name, Args, Fail) ->
-    [beam_utils:bif_to_test(Name, Args, Fail)].
+    [bif_to_test_1(Name, Args, Fail)].
+
+bif_to_test_1(is_atom,     [_]=Ops, Fail) ->
+    {test,is_atom,Fail,Ops};
+bif_to_test_1(is_boolean,  [_]=Ops, Fail) ->
+    {test,is_boolean,Fail,Ops};
+bif_to_test_1(is_binary,   [_]=Ops, Fail) ->
+    {test,is_binary,Fail,Ops};
+bif_to_test_1(is_bitstring,[_]=Ops, Fail) ->
+    {test,is_bitstr,Fail,Ops};
+bif_to_test_1(is_float,    [_]=Ops, Fail) ->
+    {test,is_float,Fail,Ops};
+bif_to_test_1(is_function, [_]=Ops, Fail) ->
+    {test,is_function,Fail,Ops};
+bif_to_test_1(is_function, [_,_]=Ops, Fail) ->
+    {test,is_function2,Fail,Ops};
+bif_to_test_1(is_integer,  [_]=Ops, Fail) ->
+    {test,is_integer,Fail,Ops};
+bif_to_test_1(is_list,     [_]=Ops, Fail) ->
+    {test,is_list,Fail,Ops};
+bif_to_test_1(is_map,      [_]=Ops, Fail) ->
+    {test,is_map,Fail,Ops};
+bif_to_test_1(is_number,   [_]=Ops, Fail) ->
+    {test,is_number,Fail,Ops};
+bif_to_test_1(is_pid,      [_]=Ops, Fail) ->
+    {test,is_pid,Fail,Ops};
+bif_to_test_1(is_port,     [_]=Ops, Fail) ->
+    {test,is_port,Fail,Ops};
+bif_to_test_1(is_reference, [_]=Ops, Fail) ->
+    {test,is_reference,Fail,Ops};
+bif_to_test_1(is_tuple,    [_]=Ops, Fail) ->
+    {test,is_tuple,Fail,Ops};
+bif_to_test_1('=<', [A,B], Fail) ->
+    {test,is_ge,Fail,[B,A]};
+bif_to_test_1('>', [A,B], Fail) ->
+    {test,is_lt,Fail,[B,A]};
+bif_to_test_1('<', [_,_]=Ops, Fail) ->
+    {test,is_lt,Fail,Ops};
+bif_to_test_1('>=', [_,_]=Ops, Fail) ->
+    {test,is_ge,Fail,Ops};
+bif_to_test_1('==', [C,A], Fail) when ?IS_LITERAL(C) ->
+    {test,is_eq,Fail,[A,C]};
+bif_to_test_1('==', [_,_]=Ops, Fail) ->
+    {test,is_eq,Fail,Ops};
+bif_to_test_1('/=', [C,A], Fail) when ?IS_LITERAL(C) ->
+    {test,is_ne,Fail,[A,C]};
+bif_to_test_1('/=', [_,_]=Ops, Fail) ->
+    {test,is_ne,Fail,Ops};
+bif_to_test_1('=:=', [C,A], Fail) when ?IS_LITERAL(C) ->
+    {test,is_eq_exact,Fail,[A,C]};
+bif_to_test_1('=:=', [_,_]=Ops, Fail) ->
+    {test,is_eq_exact,Fail,Ops};
+bif_to_test_1('=/=', [C,A], Fail) when ?IS_LITERAL(C) ->
+    {test,is_ne_exact,Fail,[A,C]};
+bif_to_test_1('=/=', [_,_]=Ops, Fail) ->
+    {test,is_ne_exact,Fail,Ops}.
 
 opt_call_moves(Is0, Arity) ->
     {Moves0,Is} = splitwith(fun({move,_,_}) -> true;
+                               ({kill,_}) -> true;
                                (_) -> false
                             end, Is0),
     Moves = opt_call_moves_1(Moves0, Arity),
     Moves ++ Is.
 
+opt_call_moves_1([{move,Src,{x,_}=Tmp}=M1|[{kill,_}|_]=Is], Arity) ->
+    %% There could be a {move,Tmp,{x,0}} instruction after the
+    %% kill/1 instructions (moved to there by opt_move_to_x0/1).
+    case splitwith(fun({kill,_}) -> true;
+                      (_) -> false
+                   end, Is) of
+        {Kills,[{move,{x,_}=Tmp,{x,0}}=M2]} ->
+            %% The two move/2 instructions (M1 and M2) can be combined
+            %% to one. The question is, though, is it safe to place
+            %% them after the kill/1 instructions?
+            case is_killed(Src, Kills, Arity) of
+                true ->
+                    %% Src (a Y register) is killed by one of the
+                    %% kill/1 instructions. Thus M1 and M2
+                    %% must be placed before the kill/1 instructions
+                    %% (essentially undoing what opt_move_to_x0/1
+                    %% did, which turned out to be a pessimization
+                    %% in this case).
+                    opt_call_moves_1([M1,M2|Kills], Arity);
+                false ->
+                    %% Src is not killed by any of the kill/1
+                    %% instructions. Thus it is safe to place
+                    %% M1 and M2 after the kill/1 instructions.
+                    opt_call_moves_1(Kills++[M1,M2], Arity)
+            end;
+        {_,_} ->
+            [M1|Is]
+    end;
 opt_call_moves_1([{move,Src,{x,_}=Tmp}=M1,{move,Tmp,Dst}=M2|Is], Arity) ->
     case is_killed(Tmp, Is, Arity) of
         true ->
@@ -1203,6 +1389,10 @@ opt_call_moves_1([M|Ms], Arity) ->
     [M|opt_call_moves_1(Ms, Arity)];
 opt_call_moves_1([], _Arity) -> [].
 
+is_killed(Y, [{kill,Y}|_], _) ->
+    true;
+is_killed(R, [{kill,_}|Is], Arity) ->
+    is_killed(R, Is, Arity);
 is_killed(R, [{move,R,_}|_], _) ->
     false;
 is_killed(R, [{move,_,R}|_], _) ->
@@ -1210,7 +1400,9 @@ is_killed(R, [{move,_,R}|_], _) ->
 is_killed(R, [{move,_,_}|Is], Arity) ->
     is_killed(R, Is, Arity);
 is_killed({x,X}, [], Arity) ->
-    X >= Arity.
+    X >= Arity;
+is_killed({y,_}, [], _) ->
+    false.
 
 cg_alloc(#cg_alloc{stack=none,words=#need{h=0,f=0}}, _St) ->
     [];
@@ -1257,22 +1449,35 @@ cg_call(#cg_set{anno=Anno,op=call,dst=Dst0,args=[#b_local{}=Func0|Args0]},
     Line = call_line(Where, local, Anno),
     Call = build_call(call, Arity, {f,FuncLbl}, Context, Dst),
     Is = setup_args(Args, Anno, Context, St) ++ Line ++ Call,
-    {Is,St};
-cg_call(#cg_set{anno=Anno,op=call,dst=Dst0,args=[#b_remote{}=Func0|Args0]},
+    case Anno of
+        #{ result_type := Info } ->
+            {Is ++ [{'%', {type_info, Dst, Info}}], St};
+        #{} ->
+            {Is, St}
+    end;
+cg_call(#cg_set{anno=Anno0,op=call,dst=Dst0,args=[#b_remote{}=Func0|Args0]},
         Where, Context, St) ->
     [Dst|Args] = beam_args([Dst0|Args0], St),
     #b_remote{mod=Mod0,name=Name0,arity=Arity} = Func0,
     case {beam_arg(Mod0, St),beam_arg(Name0, St)} of
         {{atom,Mod},{atom,Name}} ->
             Func = {extfunc,Mod,Name,Arity},
-            Line = call_line(Where, Func, Anno),
+            Line = call_line(Where, Func, Anno0),
             Call = build_call(call_ext, Arity, Func, Context, Dst),
+            Anno = case erl_bifs:is_exit_bif(Mod, Name, Arity) of
+                       true ->
+                           %% There is no need to kill Y registers
+                           %% before calling an exit BIF.
+                           maps:remove(kill_yregs, Anno0);
+                       false ->
+                           Anno0
+                   end,
             Is = setup_args(Args, Anno, Context, St) ++ Line ++ Call,
             {Is,St};
         {Mod,Name} ->
             Apply = build_apply(Arity, Context, Dst),
-            Is = setup_args(Args++[Mod,Name], Anno, Context, St) ++
-                [line(Anno)] ++ Apply,
+            Is = setup_args(Args++[Mod,Name], Anno0, Context, St) ++
+                [line(Anno0)] ++ Apply,
             {Is,St}
     end;
 cg_call(#cg_set{anno=Anno,op=call,dst=Dst0,args=Args0},
@@ -1325,6 +1530,12 @@ build_apply(Arity, none, Dst) ->
 cg_instr(put_map, [{atom,assoc},SrcMap|Ss], Dst, Set) ->
     Live = get_live(Set),
     [{put_map_assoc,{f,0},SrcMap,Dst,Live,{list,Ss}}];
+cg_instr(bs_get_tail, [Src], Dst, Set) ->
+    Live = get_live(Set),
+    [{bs_get_tail,Src,Dst,Live}];
+cg_instr(bs_get_position, [Ctx], Dst, Set) ->
+    Live = get_live(Set),
+    [{bs_get_position,Ctx,Dst,Live}];
 cg_instr(Op, Args, Dst, _Set) ->
     cg_instr(Op, Args, Dst).
 
@@ -1340,10 +1551,10 @@ cg_instr(bs_restore, [Ctx,Slot], _Dst) ->
 cg_instr(bs_save, [Ctx,Slot], _Dst) ->
     {integer,N} = Slot,
     [{bs_save2,Ctx,N}];
+cg_instr(bs_set_position, [Ctx,Pos], _Dst) ->
+    [{bs_set_position,Ctx,Pos}];
 cg_instr(build_stacktrace, Args, Dst) ->
     setup_args(Args) ++ [build_stacktrace|copy({x,0}, Dst)];
-cg_instr(context_to_binary, [Src], _Dst) ->
-    [{bs_context_to_binary,Src}];
 cg_instr(set_tuple_element=Op, [New,Tuple,{integer,Index}], _Dst) ->
     [{Op,New,Tuple,Index}];
 cg_instr({float,clearerror}, [], _Dst) ->
@@ -1360,6 +1571,8 @@ cg_instr(get_tuple_element=Op, [Src,{integer,N}], Dst) ->
     [{Op,Src,N,Dst}];
 cg_instr(put_list=Op, [Hd,Tl], Dst) ->
     [{Op,Hd,Tl,Dst}];
+cg_instr(put_tuple, Elements, Dst) ->
+    [{put_tuple2,Dst,{list,Elements}}];
 cg_instr(put_tuple_arity, [{integer,Arity}], Dst) ->
     [{put_tuple,Arity,Dst}];
 cg_instr(put_tuple_elements, Elements, _Dst) ->
@@ -1475,13 +1688,13 @@ cg_bs_put(Fail, [{atom,Type},{literal,Flags}|Args]) ->
             [{Op,Fail,{field_flags,Flags},Src}]
     end.
 
-cg_bs_init(Dst, Size0, Unit, Live, Fail) ->
+cg_bs_init(Dst, Size0, Alloc, Unit, Live, Fail) ->
     Op = case Unit of
              1 -> bs_init_bits;
              8 -> bs_init2
          end,
     Size = cg_bs_init_size(Size0),
-    [{Op,Fail,Size,0,Live,{field_flags,[]},Dst}].
+    [{Op,Fail,Size,Alloc,Live,{field_flags,[]},Dst}].
 
 cg_bs_init_size({x,_}=R) -> R;
 cg_bs_init_size({y,_}=R) -> R;
@@ -1517,6 +1730,14 @@ copy(Src, Dst) -> [{move,Src,Dst}].
 
 force_reg({literal,_}=Lit, Reg) ->
     {Reg,[{move,Lit,Reg}]};
+force_reg({integer,_}=Lit, Reg) ->
+    {Reg,[{move,Lit,Reg}]};
+force_reg({atom,_}=Lit, Reg) ->
+    {Reg,[{move,Lit,Reg}]};
+force_reg({float,_}=Lit, Reg) ->
+    {Reg,[{move,Lit,Reg}]};
+force_reg(nil=Lit, Reg) ->
+    {Reg,[{move,Lit,Reg}]};
 force_reg({Kind,_}=R, _) when Kind =:= x; Kind =:= y ->
     {R,[]}.
 
@@ -1600,12 +1821,41 @@ phi_copies([#b_set{dst=Dst,args=PhiArgs}|Sets], L) ->
     [#cg_set{op=copy,dst=Dst,args=CopyArgs}|phi_copies(Sets, L)];
 phi_copies([], _) -> [].
 
+%% opt_move_to_x0([Instruction]) -> [Instruction].
+%%  Simple peep-hole optimization to move a {move,Any,{x,0}} past
+%%  any kill up to the next call instruction. (To give the loader
+%%  an opportunity to combine the 'move' and the 'call' instructions.)
+
+opt_move_to_x0(Moves) ->
+    opt_move_to_x0(Moves, []).
+
+opt_move_to_x0([{move,_,{x,0}}=I|Is0], Acc0) ->
+    case move_past_kill(Is0, I, Acc0) of
+       impossible -> opt_move_to_x0(Is0, [I|Acc0]);
+       {Is,Acc} -> opt_move_to_x0(Is, Acc)
+    end;
+opt_move_to_x0([I|Is], Acc) ->
+    opt_move_to_x0(Is, [I|Acc]);
+opt_move_to_x0([], Acc) -> reverse(Acc).
+
+move_past_kill([{kill,Src}|_], {move,Src,_}, _) ->
+    impossible;
+move_past_kill([{kill,_}=I|Is], Move, Acc) ->
+    move_past_kill(Is, Move, [I|Acc]);
+move_past_kill(Is, Move, Acc) ->
+    {Is,[Move|Acc]}.
+
 %% setup_args(Args, Anno, Context) -> [Instruction].
 %% setup_args(Args) -> [Instruction].
 %%  Set up X registers for a call.
 
 setup_args(Args, Anno, none, St) ->
-    setup_args(Args) ++ kill_yregs(Anno, St);
+    case {setup_args(Args),kill_yregs(Anno, St)} of
+        {Moves,[]} ->
+            Moves;
+        {Moves,Kills} ->
+            opt_move_to_x0(Moves ++ Kills)
+    end;
 setup_args(Args, _, _, _) ->
     setup_args(Args).
 
@@ -1694,7 +1944,7 @@ get_register(V, Regs) ->
 beam_args(As, St) ->
     [beam_arg(A, St) || A <- As].
 
-beam_arg(#b_var{name=Name}, #cg{regs=Regs}) ->
+beam_arg(#b_var{}=Name, #cg{regs=Regs}) ->
     maps:get(Name, Regs);
 beam_arg(#b_literal{val=Val}, _) ->
     if