aboutsummaryrefslogtreecommitdiffstats
path: root/lib/compiler/src/beam_ssa_pre_codegen.erl
diff options
context:
space:
mode:
Diffstat (limited to 'lib/compiler/src/beam_ssa_pre_codegen.erl')
-rw-r--r--lib/compiler/src/beam_ssa_pre_codegen.erl1084
1 files changed, 739 insertions, 345 deletions
diff --git a/lib/compiler/src/beam_ssa_pre_codegen.erl b/lib/compiler/src/beam_ssa_pre_codegen.erl
index bbc3739eb5..bad43a9c4e 100644
--- a/lib/compiler/src/beam_ssa_pre_codegen.erl
+++ b/lib/compiler/src/beam_ssa_pre_codegen.erl
@@ -23,11 +23,10 @@
%% it has been annotated and transformed to help the code generator.
%%
%% * Some instructions are translated to other instructions closer to
-%% the BEAM instructions. For example, the put_tuple instruction is
-%% broken apart into the put_tuple_arity and put_tuple_elements
-%% instructions. Similary, the binary matching instructions are
-%% transformed from the optimization-friendly internal format to
-%% instruction more similar to the actual BEAM instructions.
+%% the BEAM instructions. For example, the binary matching
+%% instructions are transformed from the optimization-friendly
+%% internal format to instruction more similar to the actual BEAM
+%% instructions.
%%
%% * Blocks that will need an instruction for allocating a stack frame
%% are annotated with a {frame_size,Size} annotation.
@@ -73,20 +72,20 @@
-import(lists, [all/2,any/2,append/1,duplicate/2,
foldl/3,last/1,map/2,member/2,partition/2,
- reverse/1,reverse/2,sort/1,zip/2]).
+ reverse/1,reverse/2,sort/1,splitwith/2,zip/2]).
-spec module(beam_ssa:b_module(), [compile:option()]) ->
{'ok',beam_ssa:b_module()}.
module(#b_module{body=Fs0}=Module, Opts) ->
- ExtraAnnos = proplists:get_bool(dprecg, Opts),
- Ps = passes(ExtraAnnos),
- Fs = functions(Fs0, Ps),
+ UseBSM3 = not proplists:get_bool(no_bsm3, Opts),
+ Ps = passes(Opts),
+ Fs = functions(Fs0, Ps, UseBSM3),
{ok,Module#b_module{body=Fs}}.
-functions([F|Fs], Ps) ->
- [function(F, Ps)|functions(Fs, Ps)];
-functions([], _Ps) -> [].
+functions([F|Fs], Ps, UseBSM3) ->
+ [function(F, Ps, UseBSM3)|functions(Fs, Ps, UseBSM3)];
+functions([], _Ps, _UseBSM3) -> [].
-type b_var() :: beam_ssa:b_var().
-type var_name() :: beam_ssa:var_name().
@@ -104,22 +103,28 @@ functions([], _Ps) -> [].
-record(st, {ssa :: beam_ssa:block_map(),
args :: [b_var()],
cnt :: beam_ssa:label(),
+ use_bsm3 :: boolean(),
frames=[] :: [beam_ssa:label()],
intervals=[] :: [{b_var(),[range()]}],
- aliases=[] :: [{b_var(),b_var()}],
res=[] :: [{b_var(),reservation()}] | #{b_var():=reservation()},
regs=#{} :: #{b_var():=ssa_register()},
extra_annos=[] :: [{atom(),term()}]
}).
-define(PASS(N), {N,fun N/1}).
-passes(ExtraAnnos) ->
+passes(Opts) ->
+ AddPrecgAnnos = proplists:get_bool(dprecg, Opts),
+ FixTuples = proplists:get_bool(no_put_tuple2, Opts),
Ps = [?PASS(assert_no_critical_edges),
%% Preliminaries.
?PASS(fix_bs),
?PASS(sanitize),
- ?PASS(fix_tuples),
+ case FixTuples of
+ false -> ignore;
+ true -> ?PASS(fix_tuples)
+ end,
+ ?PASS(use_set_tuple_element),
?PASS(place_frames),
?PASS(fix_receives),
@@ -127,6 +132,10 @@ passes(ExtraAnnos) ->
?PASS(find_yregs),
?PASS(reserve_yregs),
+ %% Handle legacy binary match instruction that don't
+ %% accept a Y register as destination.
+ ?PASS(legacy_bs),
+
%% Improve reuse of Y registers to potentially
%% reduce the size of the stack frame.
?PASS(copy_retval),
@@ -135,30 +144,25 @@ passes(ExtraAnnos) ->
%% Calculate live intervals.
?PASS(number_instructions),
?PASS(live_intervals),
- ?PASS(remove_unsuitable_aliases),
?PASS(reserve_regs),
- ?PASS(merge_intervals),
%% If needed for a .precg file, save the live intervals
%% so they can be included in an annotation.
- case ExtraAnnos of
+ case AddPrecgAnnos of
false -> ignore;
true -> ?PASS(save_live_intervals)
end,
%% Allocate registers.
?PASS(linear_scan),
- ?PASS(fix_aliased_regs),
?PASS(frame_size),
?PASS(turn_yregs)],
- case ExtraAnnos of
- false -> [P || P <- Ps, P =/= ignore];
- true -> Ps
- end.
+ [P || P <- Ps, P =/= ignore].
-function(#b_function{anno=Anno,args=Args,bs=Blocks0,cnt=Count0}=F0, Ps) ->
+function(#b_function{anno=Anno,args=Args,bs=Blocks0,cnt=Count0}=F0,
+ Ps, UseBSM3) ->
try
- St0 = #st{ssa=Blocks0,args=Args,cnt=Count0},
+ St0 = #st{ssa=Blocks0,args=Args,use_bsm3=UseBSM3,cnt=Count0},
St = compile:run_sub_passes(Ps, St0),
#st{ssa=Blocks,cnt=Count,regs=Regs,extra_annos=ExtraAnnos} = St,
F1 = add_extra_annos(F0, ExtraAnnos),
@@ -174,14 +178,6 @@ function(#b_function{anno=Anno,args=Args,bs=Blocks0,cnt=Count0}=F0, Ps) ->
save_live_intervals(#st{intervals=Intervals}=St) ->
St#st{extra_annos=[{live_intervals,Intervals}]}.
-fix_aliased_regs(#st{aliases=Aliases,regs=Regs}=St) ->
- St#st{regs=fix_aliased_regs(Aliases, Regs)}.
-
-fix_aliased_regs([{Alias,V}|Aliases], Regs) ->
- #{V:=Reg} = Regs,
- fix_aliased_regs(Aliases, Regs#{Alias=>Reg});
-fix_aliased_regs([], Regs) -> Regs.
-
%% Add extra annotations when a .precg listing file is being produced.
add_extra_annos(F, Annos) ->
foldl(fun({Name,Value}, Acc) ->
@@ -214,7 +210,7 @@ assert_no_ces(_, _, Blocks) -> Blocks.
%% * Combine bs_match and bs_extract instructions to bs_get
%% instructions.
-fix_bs(#st{ssa=Blocks,cnt=Count0}=St) ->
+fix_bs(#st{ssa=Blocks,cnt=Count0,use_bsm3=UseBSM3}=St) ->
F = fun(#b_set{op=bs_start_match,dst=Dst}, A) ->
%% Mark the root of the match context list.
[{Dst,{context,Dst}}|A];
@@ -232,8 +228,13 @@ fix_bs(#st{ssa=Blocks,cnt=Count0}=St) ->
CtxChain = maps:from_list(M),
Linear0 = beam_ssa:linearize(Blocks),
- %% Insert bs_save / bs_restore instructions where needed.
- {Linear1,Count} = bs_save_restore(Linear0, CtxChain, Count0),
+ %% Insert position instructions where needed.
+ {Linear1,Count} = case UseBSM3 of
+ true ->
+ bs_pos_bsm3(Linear0, CtxChain, Count0);
+ false ->
+ bs_pos_bsm2(Linear0, CtxChain, Count0)
+ end,
%% Rename instructions.
Linear = bs_instrs(Linear1, CtxChain, []),
@@ -241,10 +242,54 @@ fix_bs(#st{ssa=Blocks,cnt=Count0}=St) ->
St#st{ssa=maps:from_list(Linear),cnt=Count}
end.
+%% Insert bs_get_position and bs_set_position instructions as needed.
+bs_pos_bsm3(Linear0, CtxChain, Count0) ->
+ Rs0 = bs_restores(Linear0, CtxChain, #{}, #{}),
+ Rs = maps:values(Rs0),
+ S0 = sofs:relation(Rs, [{context,save_point}]),
+ S1 = sofs:relation_to_family(S0),
+ S = sofs:to_external(S1),
+
+ {SavePoints,Count1} = make_bs_pos_dict(S, Count0, []),
+ {Gets,Count2} = make_bs_setpos_map(Rs, SavePoints, Count1, []),
+ {Sets,Count} = make_bs_getpos_map(maps:to_list(Rs0), SavePoints, Count2, []),
+
+ %% Now insert all saves and restores.
+ {bs_insert_bsm3(Linear0, Gets, Sets, SavePoints),Count}.
+
+make_bs_setpos_map([{Ctx,Save}=Ps|T], SavePoints, Count, Acc) ->
+ SavePoint = get_savepoint(Ps, SavePoints),
+ I = #b_set{op=bs_get_position,dst=SavePoint,args=[Ctx]},
+ make_bs_setpos_map(T, SavePoints, Count+1, [{Save,I}|Acc]);
+make_bs_setpos_map([], _, Count, Acc) ->
+ {maps:from_list(Acc),Count}.
+
+make_bs_getpos_map([{Bef,{Ctx,_}=Ps}|T], SavePoints, Count, Acc) ->
+ Ignored = #b_var{name={'@ssa_ignored',Count}},
+ Args = [Ctx, get_savepoint(Ps, SavePoints)],
+ I = #b_set{op=bs_set_position,dst=Ignored,args=Args},
+ make_bs_getpos_map(T, SavePoints, Count+1, [{Bef,I}|Acc]);
+make_bs_getpos_map([], _, Count, Acc) ->
+ {maps:from_list(Acc),Count}.
+
+get_savepoint({_,_}=Ps, SavePoints) ->
+ Name = {'@ssa_bs_position', map_get(Ps, SavePoints)},
+ #b_var{name=Name}.
-%% Insert bs_save and bs_restore instructions as needed.
+make_bs_pos_dict([{Ctx,Pts}|T], Count0, Acc0) ->
+ {Acc, Count} = make_bs_pos_dict_1(Pts, Ctx, Count0, Acc0),
+ make_bs_pos_dict(T, Count, Acc);
+make_bs_pos_dict([], Count, Acc) ->
+ {maps:from_list(Acc), Count}.
-bs_save_restore(Linear0, CtxChain, Count0) ->
+make_bs_pos_dict_1([H|T], Ctx, I, Acc) ->
+ make_bs_pos_dict_1(T, Ctx, I+1, [{{Ctx,H},I}|Acc]);
+make_bs_pos_dict_1([], Ctx, I, Acc) ->
+ {[{Ctx,I}|Acc], I}.
+
+%% As bs_position but without OTP-22 instructions. This is only used when
+%% cross-compiling to older versions.
+bs_pos_bsm2(Linear0, CtxChain, Count0) ->
Rs0 = bs_restores(Linear0, CtxChain, #{}, #{}),
Rs = maps:values(Rs0),
S0 = sofs:relation(Rs, [{context,save_point}]),
@@ -255,7 +300,7 @@ bs_save_restore(Linear0, CtxChain, Count0) ->
{Restores,Count} = make_restore_map(maps:to_list(Rs0), Slots, Count1, []),
%% Now insert all saves and restores.
- {bs_insert(Linear0, Saves, Restores, Slots),Count}.
+ {bs_insert_bsm2(Linear0, Saves, Restores, Slots),Count}.
make_save_map([{Ctx,Save}=Ps|T], Slots, Count, Acc) ->
Ignored = #b_var{name={'@ssa_ignored',Count}},
@@ -279,7 +324,7 @@ make_restore_map([], _, Count, Acc) ->
make_slot({Same,Same}, _Slots) ->
#b_literal{val=start};
make_slot({_,_}=Ps, Slots) ->
- #b_literal{val=maps:get(Ps, Slots)}.
+ #b_literal{val=map_get(Ps, Slots)}.
make_save_point_dict([{Ctx,Pts}|T], Acc0) ->
Acc = make_save_point_dict_1(Pts, Ctx, 0, Acc0),
@@ -308,8 +353,7 @@ bs_restores([], _, _, Rs) -> Rs.
bs_update_successors(#b_br{succ=Succ,fail=Fail}, SPos, FPos, D) ->
join_positions([{Succ,SPos},{Fail,FPos}], D);
-bs_update_successors(#b_switch{fail=Fail,list=List}, SPos, FPos, D) ->
- SPos = FPos, %Assertion.
+bs_update_successors(#b_switch{fail=Fail,list=List}, SPos, _FPos, D) ->
Update = [{L,SPos} || {_,L} <- List] ++ [{Fail,SPos}],
join_positions(Update, D);
bs_update_successors(#b_ret{}, _, _, D) -> D.
@@ -388,10 +432,19 @@ bs_restores_is([#b_set{op=bs_extract,args=[FromPos|_]}|Is],
Start = bs_subst_ctx(FromPos, CtxChain),
#{Start:=FromPos} = PosMap, %Assertion.
bs_restores_is(Is, CtxChain, PosMap, Rs);
+bs_restores_is([#b_set{op=call,dst=Dst,args=Args}|Is],
+ CtxChain, PosMap0, Rs0) ->
+ {Rs,PosMap1} = bs_restore_args(Args, PosMap0, CtxChain, Dst, Rs0),
+ PosMap = bs_invalidate_pos(Args, PosMap1, CtxChain),
+ bs_restores_is(Is, CtxChain, PosMap, Rs);
+bs_restores_is([#b_set{op=landingpad}|Is], CtxChain, PosMap0, Rs) ->
+ %% We can land here from any point, so all positions are invalid.
+ PosMap = maps:map(fun(_Start,_Pos) -> unknown end, PosMap0),
+ bs_restores_is(Is, CtxChain, PosMap, Rs);
bs_restores_is([#b_set{op=Op,dst=Dst,args=Args}|Is],
CtxChain, PosMap0, Rs0)
when Op =:= bs_test_tail;
- Op =:= call ->
+ Op =:= bs_get_tail ->
{Rs,PosMap} = bs_restore_args(Args, PosMap0, CtxChain, Dst, Rs0),
bs_restores_is(Is, CtxChain, PosMap, Rs);
bs_restores_is([_|Is], CtxChain, PosMap, Rs) ->
@@ -399,7 +452,6 @@ bs_restores_is([_|Is], CtxChain, PosMap, Rs) ->
bs_restores_is([], _CtxChain, PosMap, Rs) ->
{PosMap,Rs}.
-
bs_match_type(#b_set{args=[#b_literal{val=skip},_Ctx,
#b_literal{val=binary},_Flags,
#b_literal{val=all},#b_literal{val=U}]}) ->
@@ -410,6 +462,23 @@ bs_match_type(#b_set{args=[#b_literal{val=skip},_Ctx,
bs_match_type(_) ->
plain.
+%% Call instructions leave the match position in an undefined state,
+%% requiring us to invalidate each affected argument.
+bs_invalidate_pos([#b_var{}=Arg|Args], PosMap0, CtxChain) ->
+ Start = bs_subst_ctx(Arg, CtxChain),
+ case PosMap0 of
+ #{Start:=_} ->
+ PosMap = PosMap0#{Start:=unknown},
+ bs_invalidate_pos(Args, PosMap, CtxChain);
+ #{} ->
+ %% Not a match context.
+ bs_invalidate_pos(Args, PosMap0, CtxChain)
+ end;
+bs_invalidate_pos([_|Args], PosMap, CtxChain) ->
+ bs_invalidate_pos(Args, PosMap, CtxChain);
+bs_invalidate_pos([], PosMap, _CtxChain) ->
+ PosMap.
+
bs_restore_args([#b_var{}=Arg|Args], PosMap0, CtxChain, Dst, Rs0) ->
Start = bs_subst_ctx(Arg, CtxChain),
case PosMap0 of
@@ -432,33 +501,45 @@ bs_restore_args([], PosMap, _CtxChain, _Dst, Rs) ->
%% Insert all bs_save and bs_restore instructions.
-bs_insert([{L,#b_blk{is=Is0}=Blk}|Bs0], Saves, Restores, Slots) ->
- Is = bs_insert_is_1(Is0, Restores, Slots),
+bs_insert_bsm3(Blocks, Saves, Restores, SavePoints) ->
+ bs_insert_1(Blocks, Saves, Restores, SavePoints, fun(I) -> I end).
+
+bs_insert_bsm2(Blocks, Saves, Restores, SavePoints) ->
+ %% The old instructions require bs_start_match to be annotated with the
+ %% number of position slots it needs.
+ bs_insert_1(Blocks, Saves, Restores, SavePoints,
+ fun(#b_set{op=bs_start_match,dst=Dst}=I0) ->
+ NumSlots = case SavePoints of
+ #{Dst:=NumSlots0} -> NumSlots0;
+ #{} -> 0
+ end,
+ beam_ssa:add_anno(num_slots, NumSlots, I0);
+ (I) ->
+ I
+ end).
+
+bs_insert_1([{L,#b_blk{is=Is0}=Blk}|Bs0], Saves, Restores, Slots, XFrm) ->
+ Is = bs_insert_is_1(Is0, Restores, Slots, XFrm),
Bs = bs_insert_saves(Is, Bs0, Saves),
- [{L,Blk#b_blk{is=Is}}|bs_insert(Bs, Saves, Restores, Slots)];
-bs_insert([], _, _, _) -> [].
+ [{L,Blk#b_blk{is=Is}}|bs_insert_1(Bs, Saves, Restores, Slots, XFrm)];
+bs_insert_1([], _, _, _, _) -> [].
-bs_insert_is_1([#b_set{op=Op,dst=Dst}=I0|Is], Restores, Slots) ->
+bs_insert_is_1([#b_set{op=Op,dst=Dst}=I0|Is], Restores, SavePoints, XFrm) ->
+ I = XFrm(I0),
if
Op =:= bs_test_tail;
+ Op =:= bs_get_tail;
Op =:= bs_match;
Op =:= call ->
Rs = case Restores of
#{Dst:=R} -> [R];
#{} -> []
end,
- Rs ++ [I0|bs_insert_is_1(Is, Restores, Slots)];
- Op =:= bs_start_match ->
- NumSlots = case Slots of
- #{Dst:=NumSlots0} -> NumSlots0;
- #{} -> 0
- end,
- I = beam_ssa:add_anno(num_slots, NumSlots, I0),
- [I|bs_insert_is_1(Is, Restores, Slots)];
+ Rs ++ [I|bs_insert_is_1(Is, Restores, SavePoints, XFrm)];
true ->
- [I0|bs_insert_is_1(Is, Restores, Slots)]
+ [I|bs_insert_is_1(Is, Restores, SavePoints, XFrm)]
end;
-bs_insert_is_1([], _, _) -> [].
+bs_insert_is_1([], _, _, _) -> [].
bs_insert_saves([#b_set{dst=Dst}|Is], Bs, Saves) ->
case Saves of
@@ -504,6 +585,8 @@ bs_instrs_is([#b_set{op=Op,args=Args0}=I0|Is], CtxChain, Acc) ->
I1#b_set{op=bs_skip,args=[Type,Ctx|As]};
{bs_match,[#b_literal{val=string},Ctx|As]} ->
I1#b_set{op=bs_match_string,args=[Ctx|As]};
+ {bs_get_tail,[Ctx|As]} ->
+ I1#b_set{op=bs_get_tail,args=[Ctx|As]};
{_,_} ->
I1
end,
@@ -534,6 +617,59 @@ bs_subst_ctx(#b_var{}=Var, CtxChain) ->
bs_subst_ctx(Other, _CtxChain) ->
Other.
+%% legacy_bs(St0) -> St.
+%% Binary matching instructions in OTP 21 and earlier don't support
+%% a Y register as destination. If St#st.use_bsm3 is false,
+%% we will need to rewrite those instructions so that the result
+%% is first put in an X register and then moved to a Y register
+%% if the operation succeeded.
+
+legacy_bs(#st{use_bsm3=false,ssa=Blocks0,cnt=Count0,res=Res}=St) ->
+ IsYreg = maps:from_list([{V,true} || {V,{y,_}} <- Res]),
+ Linear0 = beam_ssa:linearize(Blocks0),
+ {Linear,Count} = legacy_bs(Linear0, IsYreg, Count0, #{}, []),
+ Blocks = maps:from_list(Linear),
+ St#st{ssa=Blocks,cnt=Count};
+legacy_bs(#st{use_bsm3=true}=St) -> St.
+
+legacy_bs([{L,Blk}|Bs], IsYreg, Count0, Copies0, Acc) ->
+ #b_blk{is=Is0,last=Last} = Blk,
+ Is1 = case Copies0 of
+ #{L:=Copy} -> [Copy|Is0];
+ #{} -> Is0
+ end,
+ {Is,Count,Copies} = legacy_bs_is(Is1, Last, IsYreg, Count0, Copies0, []),
+ legacy_bs(Bs, IsYreg, Count, Copies, [{L,Blk#b_blk{is=Is}}|Acc]);
+legacy_bs([], _IsYreg, Count, _Copies, Acc) ->
+ {Acc,Count}.
+
+legacy_bs_is([#b_set{op=Op,dst=Dst}=I0,
+ #b_set{op=succeeded,dst=SuccDst,args=[Dst]}=SuccI0],
+ Last, IsYreg, Count0, Copies0, Acc) ->
+ NeedsFix = is_map_key(Dst, IsYreg) andalso
+ case Op of
+ bs_get -> true;
+ bs_init -> true;
+ _ -> false
+ end,
+ case NeedsFix of
+ true ->
+ TempDst = #b_var{name={'@bs_temp_dst',Count0}},
+ Count = Count0 + 1,
+ I = I0#b_set{dst=TempDst},
+ SuccI = SuccI0#b_set{args=[TempDst]},
+ Copy = #b_set{op=copy,dst=Dst,args=[TempDst]},
+ #b_br{bool=SuccDst,succ=SuccL} = Last,
+ Copies = Copies0#{SuccL=>Copy},
+ legacy_bs_is([], Last, IsYreg, Count, Copies, [SuccI,I|Acc]);
+ false ->
+ legacy_bs_is([], Last, IsYreg, Count0, Copies0, [SuccI0,I0|Acc])
+ end;
+legacy_bs_is([I|Is], Last, IsYreg, Count, Copies, Acc) ->
+ legacy_bs_is(Is, Last, IsYreg, Count, Copies, [I|Acc]);
+legacy_bs_is([], _Last, _IsYreg, Count, Copies, Acc) ->
+ {reverse(Acc),Count,Copies}.
+
%% sanitize(St0) -> St.
%% Remove constructs that can cause problems later:
%%
@@ -549,7 +685,7 @@ sanitize(#st{ssa=Blocks0,cnt=Count0}=St) ->
St#st{ssa=Blocks,cnt=Count}.
sanitize([L|Ls], Count0, Blocks0, Values0) ->
- #b_blk{is=Is0} = Blk0 = maps:get(L, Blocks0),
+ #b_blk{is=Is0} = Blk0 = map_get(L, Blocks0),
case sanitize_is(Is0, Count0, Values0, false, []) of
no_change ->
sanitize(Ls, Count0, Blocks0, Values0);
@@ -574,23 +710,24 @@ sanitize([], Count, Blocks0, Values) ->
false -> remove_unreachable(Ls, Blocks, Reachable, [])
end,Count}.
-sanitize_is([#b_set{op=get_map_element,
- args=[#b_literal{}=Map,Key]}=I0|Is],
- Count0, Values, _Changed, Acc) ->
- {MapVarName,Count} = new_var_name('@ssa_map', Count0),
- MapVar = #b_var{name=MapVarName},
- I = I0#b_set{args=[MapVar,Key]},
- Copy = #b_set{op=copy,dst=MapVar,args=[Map]},
- sanitize_is(Is, Count, Values, true, [I,Copy|Acc]);
-sanitize_is([#b_set{op=Op,dst=#b_var{name=Dst},args=Args0}=I0|Is0],
- Count, Values, Changed, Acc) ->
- Args = map(fun(#b_var{name=V}=Var) ->
- case Values of
- #{V:=New} -> New;
- #{} -> Var
- end;
- (Lit) -> Lit
- end, Args0),
+sanitize_is([#b_set{op=get_map_element,args=Args0}=I0|Is],
+ Count0, Values, Changed, Acc) ->
+ case sanitize_args(Args0, Values) of
+ [#b_literal{}=Map,Key] ->
+ %% Bind the literal map to a variable.
+ {MapVar,Count} = new_var('@ssa_map', Count0),
+ I = I0#b_set{args=[MapVar,Key]},
+ Copy = #b_set{op=copy,dst=MapVar,args=[Map]},
+ sanitize_is(Is, Count, Values, true, [I,Copy|Acc]);
+ [_,_]=Args0 ->
+ sanitize_is(Is, Count0, Values, Changed, [I0|Acc]);
+ [_,_]=Args ->
+ I = I0#b_set{args=Args},
+ sanitize_is(Is, Count0, Values, Changed, [I|Acc])
+ end;
+sanitize_is([#b_set{op=Op,dst=Dst,args=Args0}=I0|Is0],
+ Count, Values, Changed0, Acc) ->
+ Args = sanitize_args(Args0, Values),
case sanitize_instr(Op, Args, I0) of
{value,Value0} ->
Value = #b_literal{val=Value0},
@@ -598,7 +735,9 @@ sanitize_is([#b_set{op=Op,dst=#b_var{name=Dst},args=Args0}=I0|Is0],
{ok,I} ->
sanitize_is(Is0, Count, Values, true, [I|Acc]);
ok ->
- sanitize_is(Is0, Count, Values, Changed, [I0|Acc])
+ I = I0#b_set{args=Args},
+ Changed = Changed0 orelse Args =/= Args0,
+ sanitize_is(Is0, Count, Values, Changed, [I|Acc])
end;
sanitize_is([], Count, Values, Changed, Acc) ->
case Changed of
@@ -608,6 +747,14 @@ sanitize_is([], Count, Values, Changed, Acc) ->
no_change
end.
+sanitize_args(Args, Values) ->
+ map(fun(Var) ->
+ case Values of
+ #{Var:=New} -> New;
+ #{} -> Var
+ end
+ end, Args).
+
sanitize_instr({bif,Bif}, [#b_literal{val=Lit}], _I) ->
case erl_bifs:is_pure(erlang, Bif, 1) of
false ->
@@ -671,7 +818,7 @@ sanitize_badarg(I) ->
I#b_set{op=call,args=[Func,#b_literal{val=badarg}]}.
remove_unreachable([L|Ls], Blocks, Reachable, Acc) ->
- #b_blk{is=Is0} = Blk0 = maps:get(L, Blocks),
+ #b_blk{is=Is0} = Blk0 = map_get(L, Blocks),
case split_phis(Is0) of
{[_|_]=Phis,Rest} ->
Is = [prune_phi(Phi, Reachable) || Phi <- Phis] ++ Rest,
@@ -693,15 +840,16 @@ prune_phi(#b_set{args=Args0}=Phi, Reachable) ->
%%%
%% fix_tuples(St0) -> St.
-%% We must split tuple creation into two instruction to mirror the
-%% the way tuples are created in BEAM. Each put_tuple instruction is
-%% split into put_tuple_arity followed by put_tuple_elements.
+%% If compatibility with a previous version of Erlang has been
+%% requested, tuple creation must be split into two instruction to
+%% mirror the the way tuples are created in BEAM prior to OTP 22.
+%% Each put_tuple instruction is split into put_tuple_arity followed
+%% by put_tuple_elements.
fix_tuples(#st{ssa=Blocks0,cnt=Count0}=St) ->
F = fun (#b_set{op=put_tuple,args=Args}=Put, C0) ->
Arity = #b_literal{val=length(Args)},
- {VarName,C} = new_var_name('@ssa_ignore', C0),
- Ignore = #b_var{name=VarName},
+ {Ignore,C} = new_var('@ssa_ignore', C0),
{[Put#b_set{op=put_tuple_arity,args=[Arity]},
#b_set{dst=Ignore,op=put_tuple_elements,args=Args}],C};
(I, C) -> {[I],C}
@@ -710,6 +858,202 @@ fix_tuples(#st{ssa=Blocks0,cnt=Count0}=St) ->
St#st{ssa=Blocks,cnt=Count}.
%%%
+%%% Introduce the set_tuple_element instructions to make
+%%% multiple-field record updates faster.
+%%%
+%%% The expansion of record field updates, when more than one field is
+%%% updated, but not a majority of the fields, will create a sequence of
+%%% calls to `erlang:setelement(Index, Value, Tuple)` where Tuple in the
+%%% first call is the original record tuple, and in the subsequent calls
+%%% Tuple is the result of the previous call. Furthermore, all Index
+%%% values are constant positive integers, and the first call to
+%%% `setelement` will have the greatest index. Thus all the following
+%%% calls do not actually need to test at run-time whether Tuple has type
+%%% tuple, nor that the index is within the tuple bounds.
+%%%
+%%% Since this optimization introduces destructive updates, it used to
+%%% be done as the very last Core Erlang pass before going to
+%%% lower-level code. However, it turns out that this kind of destructive
+%%% updates are awkward also in SSA code and can prevent or complicate
+%%% type analysis and aggressive optimizations.
+%%%
+%%% NOTE: Because there no write barriers in the system, this kind of
+%%% optimization can only be done when we are sure that garbage
+%%% collection will not be triggered between the creation of the tuple
+%%% and the destructive updates - otherwise we might insert pointers
+%%% from an older generation to a newer.
+%%%
+
+use_set_tuple_element(#st{ssa=Blocks0}=St) ->
+ Uses = count_uses(Blocks0),
+ RPO = reverse(beam_ssa:rpo(Blocks0)),
+ Blocks = use_ste_1(RPO, Uses, Blocks0),
+ St#st{ssa=Blocks}.
+
+use_ste_1([L|Ls], Uses, Blocks0) ->
+ {Blk0,Blocks} = use_ste_across(L, Uses, Blocks0),
+ #b_blk{is=Is0} = Blk0,
+ case use_ste_is(Is0, Uses) of
+ Is0 ->
+ use_ste_1(Ls, Uses, Blocks);
+ Is ->
+ Blk = Blk0#b_blk{is=Is},
+ use_ste_1(Ls, Uses, Blocks#{L:=Blk})
+ end;
+use_ste_1([], _, Blocks) -> Blocks.
+
+%%% Optimize within a single block.
+
+use_ste_is([#b_set{}=I|Is0], Uses) ->
+ Is = use_ste_is(Is0, Uses),
+ case extract_ste(I) of
+ none ->
+ [I|Is];
+ Extracted ->
+ use_ste_call(Extracted, I, Is, Uses)
+ end;
+use_ste_is([], _Uses) -> [].
+
+use_ste_call({Dst0,Pos0,_Var0,_Val0}, Call1, Is0, Uses) ->
+ case get_ste_call(Is0, []) of
+ {Prefix,{Dst1,Pos1,Dst0,Val1},Call2,Is}
+ when Pos1 > 0, Pos0 > Pos1 ->
+ case is_single_use(Dst0, Uses) of
+ true ->
+ Call = Call1#b_set{dst=Dst1},
+ Args = [Val1,Dst1,#b_literal{val=Pos1-1}],
+ Dsetel = Call2#b_set{op=set_tuple_element,
+ dst=Dst0,
+ args=Args},
+ [Call|Prefix] ++ [Dsetel|Is];
+ false ->
+ [Call1|Is0]
+ end;
+ _ ->
+ [Call1|Is0]
+ end.
+
+get_ste_call([#b_set{op=get_tuple_element}=I|Is], Acc) ->
+ get_ste_call(Is, [I|Acc]);
+get_ste_call([#b_set{op=call}=I|Is], Acc) ->
+ case extract_ste(I) of
+ none ->
+ none;
+ Extracted ->
+ {reverse(Acc),Extracted,I,Is}
+ end;
+get_ste_call(_, _) -> none.
+
+extract_ste(#b_set{op=call,dst=Dst,
+ args=[#b_remote{mod=#b_literal{val=M},
+ name=#b_literal{val=F}}|Args]}) ->
+ case {M,F,Args} of
+ {erlang,setelement,[#b_literal{val=Pos},Tuple,Val]} ->
+ {Dst,Pos,Tuple,Val};
+ {_,_,_} ->
+ none
+ end;
+extract_ste(#b_set{}) -> none.
+
+%%% Optimize accross blocks within a try/catch block.
+
+use_ste_across(L, Uses, Blocks) ->
+ case map_get(L, Blocks) of
+ #b_blk{last=#b_br{bool=#b_var{}}}=Blk ->
+ try
+ use_ste_across_1(L, Blk, Uses, Blocks)
+ catch
+ throw:not_possible ->
+ {Blk,Blocks}
+ end;
+ #b_blk{}=Blk ->
+ {Blk,Blocks}
+ end.
+
+use_ste_across_1(L, Blk0, Uses, Blocks0) ->
+ #b_blk{is=IsThis,last=#b_br{bool=Bool,succ=Next}} = Blk0,
+ case reverse(IsThis) of
+ [#b_set{op=succeeded,dst=Bool,args=[Result]}=Succ0,
+ #b_set{op=call,args=[#b_remote{}|_],dst=Result}=Call1|Prefix] ->
+ case is_single_use(Bool, Uses) andalso
+ is_n_uses(2, Result, Uses) of
+ true -> ok;
+ false -> throw(not_possible)
+ end,
+ Call2 = use_ste_across_next(Next, Uses, Blocks0),
+ Is = [Call1,Call2],
+ case use_ste_is(Is, decrement_uses(Result, Uses)) of
+ [#b_set{}=Call,#b_set{op=set_tuple_element}=Ste] ->
+ Blocks1 = use_ste_fix_next(Ste, Next, Blocks0),
+ Succ = Succ0#b_set{args=[Call#b_set.dst]},
+ Blk = Blk0#b_blk{is=reverse(Prefix, [Call,Succ])},
+ Blocks = Blocks1#{L:=Blk},
+ {Blk,Blocks};
+ _ ->
+ throw(not_possible)
+ end;
+ _ ->
+ throw(not_possible)
+ end.
+
+use_ste_across_next(Next, Uses, Blocks) ->
+ case map_get(Next, Blocks) of
+ #b_blk{is=[#b_set{op=call,dst=Result,args=[#b_remote{}|_]}=Call,
+ #b_set{op=succeeded,dst=Bool,args=[Result]}],
+ last=#b_br{bool=Bool}} ->
+ case is_single_use(Bool, Uses) andalso
+ is_n_uses(2, Result, Uses) of
+ true -> ok;
+ false -> throw(not_possible)
+ end,
+ Call;
+ #b_blk{} ->
+ throw(not_possible)
+ end.
+
+use_ste_fix_next(Ste, Next, Blocks) ->
+ Blk0 = map_get(Next, Blocks),
+ #b_blk{is=[#b_set{op=call},#b_set{op=succeeded}],last=Br0} = Blk0,
+ Br = beam_ssa:normalize(Br0#b_br{bool=#b_literal{val=true}}),
+ Blk = Blk0#b_blk{is=[Ste],last=Br},
+ Blocks#{Next:=Blk}.
+
+%% Count how many times each variable is used.
+
+count_uses(Blocks) ->
+ count_uses_blk(maps:values(Blocks), #{}).
+
+count_uses_blk([#b_blk{is=Is,last=Last}|Bs], CountMap0) ->
+ F = fun(I, CountMap) ->
+ foldl(fun(Var, Acc) ->
+ case Acc of
+ #{Var:=3} -> Acc;
+ #{Var:=C} -> Acc#{Var:=C+1};
+ #{} -> Acc#{Var=>1}
+ end
+ end, CountMap, beam_ssa:used(I))
+ end,
+ CountMap = F(Last, foldl(F, CountMap0, Is)),
+ count_uses_blk(Bs, CountMap);
+count_uses_blk([], CountMap) -> CountMap.
+
+decrement_uses(V, Uses) ->
+ #{V:=C} = Uses,
+ Uses#{V:=C-1}.
+
+is_n_uses(N, V, Uses) ->
+ case Uses of
+ #{V:=N} -> true;
+ #{} -> false
+ end.
+
+is_single_use(V, Uses) ->
+ case Uses of
+ #{V:=1} -> true;
+ #{} -> false
+ end.
+
+%%%
%%% Find out where frames should be placed.
%%%
@@ -727,7 +1071,7 @@ fix_tuples(#st{ssa=Blocks0,cnt=Count0}=St) ->
%% a stack frame or set up a stack frame with a different size.
place_frames(#st{ssa=Blocks}=St) ->
- Doms = beam_ssa:dominators(Blocks),
+ {Doms,_} = beam_ssa:dominators(Blocks),
Ls = beam_ssa:rpo(Blocks),
Tried = gb_sets:empty(),
Frames0 = [],
@@ -735,7 +1079,7 @@ place_frames(#st{ssa=Blocks}=St) ->
St#st{frames=Frames}.
place_frames_1([L|Ls], Blocks, Doms, Tried0, Frames0) ->
- Blk = maps:get(L, Blocks),
+ Blk = map_get(L, Blocks),
case need_frame(Blk) of
true ->
%% This block needs a frame. Try to place it here.
@@ -846,15 +1190,15 @@ place_frame_here(L, Blocks, Doms, Frames) ->
%% Return all predecessors referenced in phi nodes.
phi_predecessors(L, Blocks) ->
- #b_blk{is=Is} = maps:get(L, Blocks),
+ #b_blk{is=Is} = map_get(L, Blocks),
[P || #b_set{op=phi,args=Args} <- Is, {_,P} <- Args].
%% is_dominated_by(Label, DominatedBy, Dominators) -> true|false.
%% Test whether block Label is dominated by block DominatedBy.
is_dominated_by(L, DomBy, Doms) ->
- DominatedBy = maps:get(L, Doms),
- ordsets:is_element(DomBy, DominatedBy).
+ DominatedBy = map_get(L, Doms),
+ member(DomBy, DominatedBy).
%% need_frame(#b_blk{}) -> true|false.
%% Test whether any of the instructions in the block requires a stack frame.
@@ -864,12 +1208,12 @@ need_frame(#b_blk{is=Is,last=#b_ret{arg=Ret}}) ->
need_frame(#b_blk{is=Is}) ->
need_frame_1(Is, body).
-need_frame_1([#b_set{op=make_fun,dst=#b_var{name=Fun}}|Is], {return,_}=Context) ->
+need_frame_1([#b_set{op=make_fun,dst=Fun}|Is], {return,_}=Context) ->
%% Since make_fun clobbers X registers, a stack frame is needed if
%% any of the following instructions use any other variable than
%% the one holding the reference to the created fun.
need_frame_1(Is, Context) orelse
- case beam_ssa:used(#b_blk{is=Is,last=#b_ret{arg=#b_var{name=Fun}}}) of
+ case beam_ssa:used(#b_blk{is=Is,last=#b_ret{arg=Fun}}) of
[Fun] -> false;
[_|_] -> true
end;
@@ -884,7 +1228,7 @@ need_frame_1([#b_set{op=call,args=[Func|_]}|Is], Context) ->
case Func of
#b_remote{mod=#b_literal{val=Mod},
name=#b_literal{val=Name},
- arity=Arity} ->
+ arity=Arity} when is_atom(Mod), is_atom(Name) ->
case erl_bifs:is_exit_bif(Mod, Name, Arity) of
true ->
false;
@@ -896,11 +1240,11 @@ need_frame_1([#b_set{op=call,args=[Func|_]}|Is], Context) ->
#b_remote{} ->
%% This is an apply(), which always needs a frame.
true;
- #b_var{} ->
- %% A fun call always needs a frame.
- true;
+ #b_local{} ->
+ Context =:= body orelse Is =/= [];
_ ->
- Context =:= body orelse Is =/= []
+ %% A fun call always needs a frame.
+ true
end;
need_frame_1([I|Is], Context) ->
beam_ssa:clobbers_xregs(I) orelse need_frame_1(Is, Context);
@@ -932,11 +1276,11 @@ is_trap_bif(_, _, _) -> false.
%%% used during matching.
%%%
%%% Depending on where variables are defined and used, they must
-%%% be handling in two different ways.
+%%% be handled in two different ways.
%%%
%%% Variables that are always defined in the receive (before branching
%%% out into the different clauses of the receive) and used after the
-%%% receive, must be handled in the following way: Before each
+%%% receive must be handled in the following way: Before each
%%% remove_message instruction, each such variable must be copied, and
%%% all variables must be consolidated using a phi node in the
%%% common exit block for the receive.
@@ -984,15 +1328,13 @@ recv_common(Defs, Exit, Blocks) ->
%% in the exit block following the receive.
recv_fix_common([Msg0|T], Exit, Rm, Blocks0, Count0) ->
- {Msg1,Count1} = new_var_name('@recv', Count0),
- Msg = #b_var{name=Msg1},
+ {Msg,Count1} = new_var('@recv', Count0),
Blocks1 = beam_ssa:rename_vars(#{Msg0=>Msg}, [Exit], Blocks0),
N = length(Rm),
- {MsgVars0,Count} = new_var_names(duplicate(N, '@recv'), Count1),
- MsgVars = [#b_var{name=V} || V <- MsgVars0],
+ {MsgVars,Count} = new_vars(duplicate(N, '@recv'), Count1),
PhiArgs = fix_exit_phi_args(MsgVars, Rm, Exit, Blocks1),
Phi = #b_set{op=phi,dst=Msg,args=PhiArgs},
- ExitBlk0 = maps:get(Exit, Blocks1),
+ ExitBlk0 = map_get(Exit, Blocks1),
ExitBlk = ExitBlk0#b_blk{is=[Phi|ExitBlk0#b_blk.is]},
Blocks2 = Blocks1#{Exit:=ExitBlk},
Blocks = recv_fix_common_1(MsgVars, Rm, Msg0, Blocks2),
@@ -1003,8 +1345,8 @@ recv_fix_common([], _, _, Blocks, Count) ->
recv_fix_common_1([V|Vs], [Rm|Rms], Msg, Blocks0) ->
Ren = #{Msg=>V},
Blocks1 = beam_ssa:rename_vars(Ren, [Rm], Blocks0),
- #b_blk{is=Is0} = Blk0 = maps:get(Rm, Blocks1),
- Copy = #b_set{op=copy,dst=V,args=[#b_var{name=Msg}]},
+ #b_blk{is=Is0} = Blk0 = map_get(Rm, Blocks1),
+ Copy = #b_set{op=copy,dst=V,args=[Msg]},
Is = insert_after_phis(Is0, [Copy]),
Blk = Blk0#b_blk{is=Is},
Blocks = Blocks1#{Rm:=Blk},
@@ -1013,14 +1355,19 @@ recv_fix_common_1([], [], _Msg, Blocks) -> Blocks.
fix_exit_phi_args([V|Vs], [Rm|Rms], Exit, Blocks) ->
Path = beam_ssa:rpo([Rm], Blocks),
- Pred = exit_predecessor(Path, Exit),
- [{V,Pred}|fix_exit_phi_args(Vs, Rms, Exit, Blocks)];
+ Preds = exit_predecessors(Path, Exit, Blocks),
+ [{V,Pred} || Pred <- Preds] ++ fix_exit_phi_args(Vs, Rms, Exit, Blocks);
fix_exit_phi_args([], [], _, _) -> [].
-exit_predecessor([Pred,Exit|_], Exit) ->
- Pred;
-exit_predecessor([_|Bs], Exit) ->
- exit_predecessor(Bs, Exit).
+exit_predecessors([L|Ls], Exit, Blocks) ->
+ Blk = map_get(L, Blocks),
+ case member(Exit, beam_ssa:successors(Blk)) of
+ true ->
+ [L|exit_predecessors(Ls, Exit, Blocks)];
+ false ->
+ exit_predecessors(Ls, Exit, Blocks)
+ end;
+exit_predecessors([], _Exit, _Blocks) -> [].
%% fix_receive([Label], Defs, Blocks0, Count0) -> {Blocks,Count}.
%% Add a copy instruction for all variables that are matched out and
@@ -1030,16 +1377,14 @@ fix_receive([L|Ls], Defs, Blocks0, Count0) ->
{RmDefs,Used0} = beam_ssa:def_used([L], Blocks0),
Def = ordsets:subtract(Defs, RmDefs),
Used = ordsets:intersection(Def, Used0),
- {NewVs,Count} = new_var_names(Used, Count0),
- NewVars = [#b_var{name=V} || V <- NewVs],
+ {NewVars,Count} = new_vars([Base || #b_var{name=Base} <- Used], Count0),
Ren = zip(Used, NewVars),
Blocks1 = beam_ssa:rename_vars(Ren, [L], Blocks0),
- #b_blk{is=Is0} = Blk1 = maps:get(L, Blocks1),
- CopyIs = [#b_set{op=copy,dst=New,args=[#b_var{name=Old}]} ||
- {Old,New} <- Ren],
+ #b_blk{is=Is0} = Blk1 = map_get(L, Blocks1),
+ CopyIs = [#b_set{op=copy,dst=New,args=[Old]} || {Old,New} <- Ren],
Is = insert_after_phis(Is0, CopyIs),
Blk = Blk1#b_blk{is=Is},
- Blocks = maps:put(L, Blk, Blocks1),
+ Blocks = Blocks1#{L:=Blk},
fix_receive(Ls, Defs, Blocks, Count);
fix_receive([], _Defs, Blocks, Count) ->
{Blocks,Count}.
@@ -1064,7 +1409,7 @@ find_loop_exit_1(_, _, Exit) -> Exit.
find_rm_blocks(L, Blocks) ->
Seen = gb_sets:singleton(L),
- Blk = maps:get(L, Blocks),
+ Blk = map_get(L, Blocks),
Succ = beam_ssa:successors(Blk),
find_rm_blocks_1(Succ, Seen, Blocks).
@@ -1074,7 +1419,7 @@ find_rm_blocks_1([L|Ls], Seen0, Blocks) ->
find_rm_blocks_1(Ls, Seen0, Blocks);
false ->
Seen = gb_sets:insert(L, Seen0),
- Blk = maps:get(L, Blocks),
+ Blk = map_get(L, Blocks),
case find_rm_act(Blk#b_blk.is) of
prune ->
%% Looping back. Don't look at any successors.
@@ -1126,7 +1471,7 @@ find_rm_act([]) ->
find_yregs(#st{frames=[]}=St) ->
St;
find_yregs(#st{frames=[_|_]=Frames,args=Args,ssa=Blocks0}=St) ->
- FrameDefs = find_defs(Frames, Blocks0, [V || #b_var{name=V} <- Args]),
+ FrameDefs = find_defs(Frames, Blocks0, [V || #b_var{}=V <- Args]),
Blocks = find_yregs_1(FrameDefs, Blocks0),
St#st{ssa=Blocks}.
@@ -1136,16 +1481,16 @@ find_yregs_1([{F,Defs}|Fs], Blocks0) ->
Ls = beam_ssa:rpo([F], Blocks0),
Yregs0 = [],
Yregs = find_yregs_2(Ls, Blocks0, D0, Yregs0),
- Blk0 = maps:get(F, Blocks0),
+ Blk0 = map_get(F, Blocks0),
Blk = beam_ssa:add_anno(yregs, Yregs, Blk0),
Blocks = Blocks0#{F:=Blk},
find_yregs_1(Fs, Blocks);
find_yregs_1([], Blocks) -> Blocks.
find_yregs_2([L|Ls], Blocks0, D0, Yregs0) ->
- Blk0 = maps:get(L, Blocks0),
+ Blk0 = map_get(L, Blocks0),
#b_blk{is=Is,last=Last} = Blk0,
- Ys0 = maps:get(L, D0),
+ Ys0 = map_get(L, D0),
{Yregs1,Ys} = find_yregs_is(Is, Ys0, Yregs0),
Yregs = find_yregs_terminator(Last, Ys, Yregs1),
Successors = beam_ssa:successors(Blk0),
@@ -1172,7 +1517,7 @@ find_defs_1([L|Ls], Blocks, Frames, Seen0, Defs0, Acc0) ->
false ->
Seen1 = gb_sets:insert(L, Seen0),
{Acc,Seen} = find_defs_1(Ls, Blocks, Frames, Seen1, Defs0, Acc0),
- #b_blk{is=Is} = Blk = maps:get(L, Blocks),
+ #b_blk{is=Is} = Blk = map_get(L, Blocks),
Defs = find_defs_is(Is, Defs0),
Successors = beam_ssa:successors(Blk),
find_defs_1(Successors, Blocks, Frames, Seen, Defs, Acc)
@@ -1181,7 +1526,7 @@ find_defs_1([L|Ls], Blocks, Frames, Seen0, Defs0, Acc0) ->
find_defs_1([], _, _, Seen, _, Acc) ->
{Acc,Seen}.
-find_defs_is([#b_set{dst=#b_var{name=Dst}}|Is], Acc) ->
+find_defs_is([#b_set{dst=Dst}|Is], Acc) ->
find_defs_is(Is, [Dst|Acc]);
find_defs_is([], Acc) -> Acc.
@@ -1191,15 +1536,15 @@ find_update_succ([S|Ss], #dk{d=Defs0,k=Killed0}=DK0, D0) ->
Defs = ordsets:intersection(Defs0, Defs1),
Killed = ordsets:union(Killed0, Killed1),
DK = #dk{d=Defs,k=Killed},
- D = maps:put(S, DK, D0),
+ D = D0#{S:=DK},
find_update_succ(Ss, DK0, D);
#{} ->
- D = maps:put(S, DK0, D0),
+ D = D0#{S=>DK0},
find_update_succ(Ss, DK0, D)
end;
find_update_succ([], _, D) -> D.
-find_yregs_is([#b_set{dst=#b_var{name=Dst}}=I|Is], #dk{d=Defs0,k=Killed0}=Ys, Yregs0) ->
+find_yregs_is([#b_set{dst=Dst}=I|Is], #dk{d=Defs0,k=Killed0}=Ys, Yregs0) ->
Used = beam_ssa:used(I),
Yregs1 = ordsets:intersection(Used, Killed0),
Yregs = ordsets:union(Yregs0, Yregs1),
@@ -1284,7 +1629,7 @@ copy_retval(#st{frames=Frames,ssa=Blocks0,cnt=Count0}=St) ->
St#st{ssa=Blocks,cnt=Count}.
copy_retval_1([F|Fs], Blocks0, Count0) ->
- #b_blk{anno=#{yregs:=Yregs0},is=Is} = maps:get(F, Blocks0),
+ #b_blk{anno=#{yregs:=Yregs0},is=Is} = map_get(F, Blocks0),
Yregs1 = gb_sets:from_list(Yregs0),
Yregs = collect_yregs(Is, Yregs1),
Ls = beam_ssa:rpo([F], Blocks0),
@@ -1293,7 +1638,7 @@ copy_retval_1([F|Fs], Blocks0, Count0) ->
copy_retval_1([], Blocks, Count) ->
{Blocks,Count}.
-collect_yregs([#b_set{op=copy,dst=#b_var{name=Y},args=[#b_var{name=X}]}|Is],
+collect_yregs([#b_set{op=copy,dst=Y,args=[#b_var{}=X]}|Is],
Yregs0) ->
true = gb_sets:is_member(X, Yregs0), %Assertion.
Yregs = gb_sets:insert(Y, gb_sets:delete(X, Yregs0)),
@@ -1303,7 +1648,7 @@ collect_yregs([#b_set{}|Is], Yregs) ->
collect_yregs([], Yregs) -> Yregs.
copy_retval_2([L|Ls], Yregs, Copy0, Blocks0, Count0) ->
- #b_blk{is=Is0,last=Last} = Blk = maps:get(L, Blocks0),
+ #b_blk{is=Is0,last=Last} = Blk = map_get(L, Blocks0),
RC = case {Last,Ls} of
{#b_br{succ=Succ,fail=?BADARG_BLOCK},[Succ|_]} ->
true;
@@ -1330,17 +1675,20 @@ copy_retval_is([#b_set{op=put_tuple_elements,args=Args0}=I0], false, _Yregs,
Copy, Count, Acc) ->
I = I0#b_set{args=copy_sub_args(Args0, Copy)},
{reverse(Acc, [I|acc_copy([], Copy)]),Count};
+copy_retval_is([#b_set{op=Op}=I0], false, Yregs, Copy, Count0, Acc0)
+ when Op =:= call; Op =:= make_fun ->
+ {I,Count,Acc} = place_retval_copy(I0, Yregs, Copy, Count0, Acc0),
+ {reverse(Acc, [I]),Count};
copy_retval_is([#b_set{}]=Is, false, _Yregs, Copy, Count, Acc) ->
{reverse(Acc, acc_copy(Is, Copy)),Count};
copy_retval_is([#b_set{},#b_set{op=succeeded}]=Is, false, _Yregs, Copy, Count, Acc) ->
{reverse(Acc, acc_copy(Is, Copy)),Count};
-copy_retval_is([#b_set{op=Op,dst=#b_var{name=RetVal}=Dst}=I0|Is], RC, Yregs,
+copy_retval_is([#b_set{op=Op,dst=#b_var{name=RetName}=Dst}=I0|Is], RC, Yregs,
Copy0, Count0, Acc0) when Op =:= call; Op =:= make_fun ->
{I1,Count1,Acc} = place_retval_copy(I0, Yregs, Copy0, Count0, Acc0),
- case gb_sets:is_member(RetVal, Yregs) of
+ case gb_sets:is_member(Dst, Yregs) of
true ->
- {NewVarName,Count} = new_var_name(RetVal, Count1),
- NewVar = #b_var{name=NewVarName},
+ {NewVar,Count} = new_var(RetName, Count1),
Copy = #b_set{op=copy,dst=Dst,args=[NewVar]},
I = I1#b_set{dst=NewVar},
copy_retval_is(Is, RC, Yregs, Copy, Count, [I|Acc]);
@@ -1390,16 +1738,15 @@ copy_retval_is([], RC, _, Copy, Count, Acc) ->
place_retval_copy(I, _Yregs, none, Count, Acc) ->
{I,Count,Acc};
place_retval_copy(#b_set{args=[F|Args0]}=I, Yregs, Copy, Count0, Acc0) ->
- #b_set{dst=#b_var{name=Avoid}} = Copy,
+ #b_set{dst=Avoid} = Copy,
{Args,Acc1,Count} = copy_func_args(Args0, Yregs, Avoid, Acc0, [], Count0),
Acc = [Copy|Acc1],
{I#b_set{args=[F|Args]},Count,Acc}.
-copy_func_args([#b_var{name=V}=A|As], Yregs, Avoid, CopyAcc, Acc, Count0) ->
- case gb_sets:is_member(V, Yregs) of
- true when V =/= Avoid ->
- {NewVarName,Count} = new_var_name(V, Count0),
- NewVar = #b_var{name=NewVarName},
+copy_func_args([#b_var{name=AName}=A|As], Yregs, Avoid, CopyAcc, Acc, Count0) ->
+ case gb_sets:is_member(A, Yregs) of
+ true when A =/= Avoid ->
+ {NewVar,Count} = new_var(AName, Count0),
Copy = #b_set{op=copy,dst=NewVar,args=[A]},
copy_func_args(As, Yregs, Avoid, [Copy|CopyAcc], [NewVar|Acc], Count);
_ ->
@@ -1443,7 +1790,7 @@ opt_get_list(#st{ssa=Blocks,res=Res}=St) ->
St#st{ssa=opt_get_list_1(Ls, ResMap, Blocks)}.
opt_get_list_1([L|Ls], Res, Blocks0) ->
- #b_blk{is=Is0} = Blk = maps:get(L, Blocks0),
+ #b_blk{is=Is0} = Blk = map_get(L, Blocks0),
case opt_get_list_is(Is0, Res, [], false) of
no ->
opt_get_list_1(Ls, Res, Blocks0);
@@ -1453,9 +1800,9 @@ opt_get_list_1([L|Ls], Res, Blocks0) ->
end;
opt_get_list_1([], _, Blocks) -> Blocks.
-opt_get_list_is([#b_set{op=get_hd,dst=#b_var{name=Hd},
+opt_get_list_is([#b_set{op=get_hd,dst=Hd,
args=[Cons]}=GetHd,
- #b_set{op=get_tl,dst=#b_var{name=Tl},
+ #b_set{op=get_tl,dst=Tl,
args=[Cons]}=GetTl|Is],
Res, Acc, Changed) ->
%% Note that when this pass is run, only Y registers have
@@ -1497,12 +1844,12 @@ number_instructions(#st{ssa=Blocks0}=St) ->
St#st{ssa=number_is_1(Ls, 1, Blocks0)}.
number_is_1([L|Ls], N0, Blocks0) ->
- #b_blk{is=Is0,last=Last0} = Bl0 = maps:get(L, Blocks0),
+ #b_blk{is=Is0,last=Last0} = Bl0 = map_get(L, Blocks0),
{Is,N1} = number_is_2(Is0, N0, []),
Last = beam_ssa:add_anno(n, N1, Last0),
N = N1 + 2,
Bl = Bl0#b_blk{is=Is,last=Last},
- Blocks = maps:put(L, Bl, Blocks0),
+ Blocks = Blocks0#{L:=Bl},
number_is_1(Ls, N, Blocks);
number_is_1([], _, Blocks) -> Blocks.
@@ -1519,13 +1866,13 @@ number_is_2([], N, Acc) ->
%%%
live_intervals(#st{args=Args,ssa=Blocks}=St) ->
- Vars0 = [{V,{0,1}} || #b_var{name=V} <- Args],
+ Vars0 = [{V,{0,1}} || #b_var{}=V <- Args],
F = fun(L, _, A) -> live_interval_blk(L, Blocks, A) end,
LiveMap0 = #{},
- Acc0 = {[],[],LiveMap0},
- {Vars,Aliases,_} = beam_ssa:fold_po(F, Acc0, Blocks),
+ Acc0 = {[],LiveMap0},
+ {Vars,_} = beam_ssa:fold_po(F, Acc0, Blocks),
Intervals = merge_ranges(rel2fam(Vars0++Vars)),
- St#st{intervals=Intervals,aliases=Aliases}.
+ St#st{intervals=Intervals}.
merge_ranges([{V,Rs}|T]) ->
[{V,merge_ranges_1(Rs)}|merge_ranges(T)];
@@ -1537,20 +1884,19 @@ merge_ranges_1([R|Rs]) ->
[R|merge_ranges_1(Rs)];
merge_ranges_1([]) -> [].
-live_interval_blk(L, Blocks, {Vars0,Aliases0,LiveMap0}) ->
+live_interval_blk(L, Blocks, {Vars0,LiveMap0}) ->
Live0 = [],
Successors = beam_ssa:successors(L, Blocks),
Live1 = update_successors(Successors, L, Blocks, LiveMap0, Live0),
%% Add ranges for all variables that are live in the successors.
- #b_blk{is=Is,last=Last} = maps:get(L, Blocks),
+ #b_blk{is=Is,last=Last} = map_get(L, Blocks),
End = beam_ssa:get_anno(n, Last),
Use = [{V,{use,End+1}} || V <- Live1],
%% Determine used and defined variables in this block.
FirstNumber = first_number(Is, Last),
- {UseDef0,Aliases} = live_interval_blk_1([Last|reverse(Is)],
- FirstNumber, Aliases0, Use),
+ UseDef0 = live_interval_blk_1([Last|reverse(Is)], FirstNumber, Use),
UseDef = rel2fam(UseDef0),
%% Update what is live at the beginning of this block and
@@ -1563,7 +1909,7 @@ live_interval_blk(L, Blocks, {Vars0,Aliases0,LiveMap0}) ->
%% Construct the ranges for this block.
Vars = make_block_ranges(UseDef, FirstNumber, Vars0),
- {Vars,Aliases,LiveMap}.
+ {Vars,LiveMap}.
make_block_ranges([{V,[{def,Def}]}|Vs], First, Acc) ->
make_block_ranges(Vs, First, [{V,{Def,Def}}|Acc]);
@@ -1575,37 +1921,29 @@ make_block_ranges([{V,[{use,_}|_]=Uses}|Vs], First, Acc) ->
make_block_ranges(Vs, First, [{V,{First,Last}}|Acc]);
make_block_ranges([], _, Acc) -> Acc.
-live_interval_blk_1([#b_set{op=phi,dst=#b_var{name=Dst}}|Is],
- FirstNumber, Aliases, Acc0) ->
+live_interval_blk_1([#b_set{op=phi,dst=Dst}|Is], FirstNumber, Acc0) ->
Acc = [{Dst,{def,FirstNumber}}|Acc0],
- live_interval_blk_1(Is, FirstNumber, Aliases, Acc);
-live_interval_blk_1([#b_set{op=bs_start_match}=I|Is], FirstNumber,
- Aliases0, Acc0) ->
+ live_interval_blk_1(Is, FirstNumber, Acc);
+live_interval_blk_1([#b_set{op=bs_start_match}=I|Is],
+ FirstNumber, Acc0) ->
N = beam_ssa:get_anno(n, I),
- #b_set{dst=#b_var{name=Dst}} = I,
+ #b_set{dst=Dst} = I,
Acc1 = [{Dst,{def,N}}|Acc0],
- Aliases = case beam_ssa:get_anno(reuse_for_context, I) of
- true ->
- #b_set{args=[#b_var{name=Src}]} = I,
- [{Dst,Src}|Aliases0];
- false ->
- Aliases0
- end,
Acc = [{V,{use,N}} || V <- beam_ssa:used(I)] ++ Acc1,
- live_interval_blk_1(Is, FirstNumber, Aliases, Acc);
-live_interval_blk_1([I|Is], FirstNumber, Aliases, Acc0) ->
+ live_interval_blk_1(Is, FirstNumber, Acc);
+live_interval_blk_1([I|Is], FirstNumber, Acc0) ->
N = beam_ssa:get_anno(n, I),
Acc1 = case I of
- #b_set{dst=#b_var{name=Dst}} ->
+ #b_set{dst=Dst} ->
[{Dst,{def,N}}|Acc0];
_ ->
Acc0
end,
Used = beam_ssa:used(I),
Acc = [{V,{use,N}} || V <- Used] ++ Acc1,
- live_interval_blk_1(Is, FirstNumber, Aliases, Acc);
-live_interval_blk_1([], _FirstNumber, Aliases, Acc) ->
- {Acc,Aliases}.
+ live_interval_blk_1(Is, FirstNumber, Acc);
+live_interval_blk_1([], _FirstNumber, Acc) ->
+ Acc.
%% first_number([#b_set{}]) -> InstructionNumber.
%% Return the number for the first instruction for the block.
@@ -1621,7 +1959,7 @@ first_number([], Last) ->
update_successors([L|Ls], Pred, Blocks, LiveMap, Live0) ->
Live1 = ordsets:union(Live0, get_live(L, LiveMap)),
- #b_blk{is=Is} = maps:get(L, Blocks),
+ #b_blk{is=Is} = map_get(L, Blocks),
Live = update_live_phis(Is, Pred, Live1),
update_successors(Ls, Pred, Blocks, LiveMap, Live);
update_successors([], _, _, _, Live) -> Live.
@@ -1632,9 +1970,9 @@ get_live(L, LiveMap) ->
#{} -> []
end.
-update_live_phis([#b_set{op=phi,dst=#b_var{name=Killed},args=Args}|Is],
+update_live_phis([#b_set{op=phi,dst=Killed,args=Args}|Is],
Pred, Live0) ->
- Used = [V || {#b_var{name=V},L} <- Args, L =:= Pred],
+ Used = [V || {#b_var{}=V,L} <- Args, L =:= Pred],
Live1 = ordsets:union(ordsets:from_list(Used), Live0),
Live = ordsets:del_element(Killed, Live1),
update_live_phis(Is, Pred, Live);
@@ -1647,7 +1985,7 @@ update_live_phis(_, _, Live) -> Live.
%% reserve_yregs(St0) -> St.
%% In each block that allocates a stack frame, insert instructions
%% that copy variables that must be in Y registers (given by
-%% YRegisters) to new variables.
+%% the `yregs` annotation) to new variables.
%%
%% Also allocate specific Y registers for try and catch tags.
%% The outermost try/catch tag is placed in y0, any directly
@@ -1659,7 +1997,7 @@ reserve_yregs(#st{frames=Frames}=St0) ->
foldl(fun reserve_yregs_1/2, St0, Frames).
reserve_yregs_1(L, #st{ssa=Blocks0,cnt=Count0,res=Res0}=St) ->
- Blk = maps:get(L, Blocks0),
+ Blk = map_get(L, Blocks0),
Yregs = beam_ssa:get_anno(yregs, Blk),
{Def,Used} = beam_ssa:def_used([L], Blocks0),
UsedYregs = ordsets:intersection(Yregs, Used),
@@ -1685,7 +2023,7 @@ reserve_try_tags_1([L|Ls], Blocks, Seen0, ActMap0) ->
reserve_try_tags_1(Ls, Blocks, Seen0, ActMap0);
false ->
Seen1 = gb_sets:insert(L, Seen0),
- #b_blk{is=Is} = Blk = maps:get(L, Blocks),
+ #b_blk{is=Is} = Blk = map_get(L, Blocks),
Active0 = get_active(L, ActMap0),
Active = reserve_try_tags_is(Is, Active0),
Successors = beam_ssa:successors(Blk),
@@ -1702,10 +2040,10 @@ get_active(L, ActMap) ->
#{} -> #{}
end.
-reserve_try_tags_is([#b_set{op=new_try_tag,dst=#b_var{name=V}}|Is], Active) ->
+reserve_try_tags_is([#b_set{op=new_try_tag,dst=V}|Is], Active) ->
N = map_size(Active),
reserve_try_tags_is(Is, Active#{V=>N});
-reserve_try_tags_is([#b_set{op=kill_try_tag,args=[#b_var{name=Tag}]}|Is], Active) ->
+reserve_try_tags_is([#b_set{op=kill_try_tag,args=[Tag]}|Is], Active) ->
reserve_try_tags_is(Is, maps:remove(Tag, Active));
reserve_try_tags_is([_|Is], Active) ->
reserve_try_tags_is(Is, Active);
@@ -1725,17 +2063,15 @@ update_act_map([], _, ActMap) -> ActMap.
rename_vars([], _, Blocks, Count) ->
{[],Blocks,Count};
rename_vars(Vs, L, Blocks0, Count0) ->
- {NewVs,Count} = new_var_names(Vs, Count0),
- NewVars = [#b_var{name=V} || V <- NewVs],
+ {NewVars,Count} = new_vars([Base || #b_var{name=Base} <- Vs], Count0),
Ren = zip(Vs, NewVars),
Blocks1 = beam_ssa:rename_vars(Ren, [L], Blocks0),
- #b_blk{is=Is0} = Blk0 = maps:get(L, Blocks1),
- CopyIs = [#b_set{op=copy,dst=New,args=[#b_var{name=Old}]} ||
- {Old,New} <- Ren],
+ #b_blk{is=Is0} = Blk0 = map_get(L, Blocks1),
+ CopyIs = [#b_set{op=copy,dst=New,args=[Old]} || {Old,New} <- Ren],
Is = insert_after_phis(Is0, CopyIs),
Blk = Blk0#b_blk{is=Is},
- Blocks = maps:put(L, Blk, Blocks1),
- {NewVs,Blocks,Count}.
+ Blocks = Blocks1#{L:=Blk},
+ {NewVars,Blocks,Count}.
insert_after_phis([#b_set{op=phi}=I|Is], InsertIs) ->
[I|insert_after_phis(Is, InsertIs)];
@@ -1756,7 +2092,7 @@ frame_size(#st{frames=Frames,regs=Regs,ssa=Blocks0}=St) ->
frame_size_1(L, Regs, Blocks0) ->
Def = beam_ssa:def([L], Blocks0),
- Yregs0 = [maps:get(V, Regs) || V <- Def, is_yreg(maps:get(V, Regs))],
+ Yregs0 = [map_get(V, Regs) || V <- Def, is_yreg(map_get(V, Regs))],
Yregs = ordsets:from_list(Yregs0),
FrameSize = length(ordsets:from_list(Yregs)),
if
@@ -1768,17 +2104,17 @@ frame_size_1(L, Regs, Blocks0) ->
true ->
ok
end,
- Blk0 = maps:get(L, Blocks0),
+ Blk0 = map_get(L, Blocks0),
Blk = beam_ssa:add_anno(frame_size, FrameSize, Blk0),
%% Insert an annotation for frame deallocation on
%% each #b_ret{}.
- Blocks = maps:put(L, Blk, Blocks0),
+ Blocks = Blocks0#{L:=Blk},
Reachable = beam_ssa:rpo([L], Blocks),
frame_deallocate(Reachable, FrameSize, Blocks).
frame_deallocate([L|Ls], Size, Blocks0) ->
- Blk0 = maps:get(L, Blocks0),
+ Blk0 = map_get(L, Blocks0),
Blk = case Blk0 of
#b_blk{last=#b_ret{}=Ret0} ->
Ret = beam_ssa:add_anno(deallocate, Size, Ret0),
@@ -1786,7 +2122,7 @@ frame_deallocate([L|Ls], Size, Blocks0) ->
#b_blk{} ->
Blk0
end,
- Blocks = maps:put(L, Blk, Blocks0),
+ Blocks = Blocks0#{L:=Blk},
frame_deallocate(Ls, Size, Blocks);
frame_deallocate([], _, Blocks) -> Blocks.
@@ -1799,7 +2135,7 @@ frame_deallocate([], _, Blocks) -> Blocks.
turn_yregs(#st{frames=Frames,regs=Regs0,ssa=Blocks}=St) ->
Regs1 = foldl(fun(L, A) ->
- Blk = maps:get(L, Blocks),
+ Blk = map_get(L, Blocks),
FrameSize = beam_ssa:get_anno(frame_size, Blk),
Def = beam_ssa:def([L], Blocks),
[turn_yregs_1(Def, FrameSize, Regs0)|A]
@@ -1808,7 +2144,7 @@ turn_yregs(#st{frames=Frames,regs=Regs0,ssa=Blocks}=St) ->
St#st{regs=Regs}.
turn_yregs_1(Def, FrameSize, Regs) ->
- Yregs0 = [{maps:get(V, Regs),V} || V <- Def, is_yreg(maps:get(V, Regs))],
+ Yregs0 = [{map_get(V, Regs),V} || V <- Def, is_yreg(map_get(V, Regs))],
Yregs1 = rel2fam(Yregs0),
FrameSize = length(Yregs1),
Yregs2 = [{{y,FrameSize-Y-1},Vs} || {{y,Y},Vs} <- Yregs1],
@@ -1842,7 +2178,7 @@ reserve_regs(#st{args=Args,ssa=Blocks,intervals=Intervals,res=Res0}=St) ->
Res = maps:from_list(Res3),
St#st{res=reserve_xregs(Blocks, Res)}.
-reserve_arg_regs([#b_var{name=Arg}|Is], N, Acc) ->
+reserve_arg_regs([#b_var{}=Arg|Is], N, Acc) ->
reserve_arg_regs(Is, N+1, [{Arg,{x,N}}|Acc]);
reserve_arg_regs([], _, Acc) -> Acc.
@@ -1854,25 +2190,42 @@ reserve_zregs(Blocks, Intervals, Res) ->
end,
beam_ssa:fold_rpo(F, [0], Res, Blocks).
+reserve_zreg([#b_set{op=Op,dst=Dst}],
+ #b_br{bool=Dst}, _ShortLived, A) when Op =:= call;
+ Op =:= get_tuple_element ->
+ %% If type optimization has determined that the result of these
+ %% instructions can be used directly in a branch, we must avoid reserving a
+ %% z register or code generation will fail.
+ A;
reserve_zreg([#b_set{op={bif,tuple_size},dst=Dst},
- #b_set{op={bif,'=:='},args=[Dst,Val]}], _Last, ShortLived, A0) ->
- case Val of
- #b_literal{val=Arity} when Arity bsr 32 =:= 0 ->
+ #b_set{op={bif,'=:='},args=[Dst,Val]}], Last, ShortLived, A0) ->
+ case {Val,Last} of
+ {#b_literal{val=Arity},#b_br{bool=#b_var{}}} when Arity bsr 32 =:= 0 ->
%% These two instructions can be combined to a test_arity
%% instruction provided that the arity variable is short-lived.
reserve_zreg_1(Dst, ShortLived, A0);
- _ ->
+ {_,_} ->
+ %% Either the arity is too big, or the boolean value is not
+ %% used in a conditional branch.
A0
end;
reserve_zreg([#b_set{op={bif,tuple_size},dst=Dst}],
#b_switch{}, ShortLived, A) ->
reserve_zreg_1(Dst, ShortLived, A);
-reserve_zreg([#b_set{op=Op,dst=#b_var{name=Dst}}|Is], Last, ShortLived, A0) ->
+reserve_zreg([#b_set{op={bif,'xor'}}], _Last, _ShortLived, A) ->
+ %% There is no short, easy way to rewrite 'xor' to a series of
+ %% test instructions.
+ A;
+reserve_zreg([#b_set{op={bif,is_record}}], _Last, _ShortLived, A) ->
+ %% There is no short, easy way to rewrite is_record/2 to a series of
+ %% test instructions.
+ A;
+reserve_zreg([#b_set{op=Op,dst=Dst}|Is], Last, ShortLived, A0) ->
IsZReg = case Op of
- context_to_binary -> true;
bs_match_string -> true;
- bs_restore -> true;
bs_save -> true;
+ bs_restore -> true;
+ bs_set_position -> true;
{float,clearerror} -> true;
kill_try_tag -> true;
landingpad -> true;
@@ -1893,7 +2246,7 @@ reserve_zreg([], #b_br{bool=Bool}, ShortLived, A) ->
reserve_zreg_1(Bool, ShortLived, A);
reserve_zreg([], _, _, A) -> A.
-reserve_zreg_1(#b_var{name=V}, ShortLived, A) ->
+reserve_zreg_1(#b_var{}=V, ShortLived, A) ->
case cerl_sets:is_element(V, ShortLived) of
true -> [{V,z}|A];
false -> A
@@ -1906,7 +2259,7 @@ reserve_fregs(Blocks, Res) ->
end,
beam_ssa:fold_rpo(F, [0], Res, Blocks).
-reserve_freg([#b_set{op={float,Op},dst=#b_var{name=V}}|Is], Res) ->
+reserve_freg([#b_set{op={float,Op},dst=V}|Is], Res) ->
case Op of
get ->
reserve_freg(Is, Res);
@@ -1931,23 +2284,95 @@ reserve_freg([], Res) -> Res.
%% will allocate the lowest free X register for the variable.
reserve_xregs(Blocks, Res) ->
- F = fun(L, #b_blk{is=Is,last=Last}, R) ->
- {Xs0,Used0} = reserve_terminator(L, Last, Blocks, R),
- reserve_xregs_is(reverse(Is), R, Xs0, Used0)
- end,
- beam_ssa:fold_po(F, Res, Blocks).
-
-reserve_xregs_is([#b_set{op=Op,dst=#b_var{name=Dst},args=Args}=I|Is], Res0, Xs0, Used0) ->
- Xs1 = case is_gc_safe(I) of
- true ->
- Xs0;
- false ->
- %% There may be a garbage collection after executing this
- %% instruction. We will need prune the list of preferred
- %% X registers.
- res_xregs_prune(Xs0, Used0, Res0)
- end,
- Res = reserve_xreg(Dst, Xs1, Res0),
+ Ls = reverse(beam_ssa:rpo(Blocks)),
+ reserve_xregs(Ls, Blocks, #{}, Res).
+
+reserve_xregs([L|Ls], Blocks, XsMap0, Res0) ->
+ #b_blk{anno=Anno,is=Is0,last=Last} = map_get(L, Blocks),
+
+ %% Calculate mapping from variable name to the preferred
+ %% register.
+ Xs0 = reserve_terminator(L, Is0, Last, Blocks, XsMap0, Res0),
+
+ %% We need to figure out where the code generator will
+ %% place instructions that will do a garbage collection.
+ %% Insert 'gc' markers as pseudo-instructions in the
+ %% instruction sequence.
+ Is1 = reverse(Is0),
+ Is2 = res_place_gc_instrs(Is1, []),
+ Is = res_place_allocate(Anno, Is2),
+
+ %% Add register hints for variables that are defined
+ %% in the (reversed) instruction sequence.
+ {Res,Xs} = reserve_xregs_is(Is, Res0, Xs0, []),
+
+ XsMap = XsMap0#{L=>Xs},
+ reserve_xregs(Ls, Blocks, XsMap, Res);
+reserve_xregs([], _, _, Res) -> Res.
+
+%% Insert explicit 'gc' markers points where there will
+%% be a garbage collection. (Note that the instruction
+%% sequence passed to this function is reversed.)
+
+res_place_gc_instrs([#b_set{op=phi}=I|Is], Acc) ->
+ res_place_gc_instrs(Is, [I|Acc]);
+res_place_gc_instrs([#b_set{op=Op}=I|Is], Acc)
+ when Op =:= call; Op =:= make_fun ->
+ case Acc of
+ [] ->
+ res_place_gc_instrs(Is, [I|Acc]);
+ [GC|_] when GC =:= gc; GC =:= test_heap ->
+ res_place_gc_instrs(Is, [I,gc|Acc]);
+ [_|_] ->
+ res_place_gc_instrs(Is, [I,gc|Acc])
+ end;
+res_place_gc_instrs([#b_set{op=Op,args=Args}=I|Is], Acc0) ->
+ case beam_ssa_codegen:classify_heap_need(Op, Args) of
+ neutral ->
+ case Acc0 of
+ [test_heap|Acc] ->
+ res_place_gc_instrs(Is, [test_heap,I|Acc]);
+ Acc ->
+ res_place_gc_instrs(Is, [I|Acc])
+ end;
+ {put,_} ->
+ case Acc0 of
+ [test_heap|Acc] ->
+ res_place_gc_instrs(Is, [test_heap,I|Acc]);
+ Acc ->
+ res_place_gc_instrs(Is, [test_heap,I|Acc])
+ end;
+ _ ->
+ res_place_gc_instrs(Is, [gc,I|Acc0])
+ end;
+res_place_gc_instrs([], Acc) ->
+ %% Reverse and replace 'test_heap' markers with 'gc'.
+ %% (The distinction is no longer useful.)
+ res_place_gc_instrs_rev(Acc, []).
+
+res_place_gc_instrs_rev([test_heap|Is], [gc|_]=Acc) ->
+ res_place_gc_instrs_rev(Is, Acc);
+res_place_gc_instrs_rev([test_heap|Is], Acc) ->
+ res_place_gc_instrs_rev(Is, [gc|Acc]);
+res_place_gc_instrs_rev([gc|Is], [gc|_]=Acc) ->
+ res_place_gc_instrs_rev(Is, Acc);
+res_place_gc_instrs_rev([I|Is], Acc) ->
+ res_place_gc_instrs_rev(Is, [I|Acc]);
+res_place_gc_instrs_rev([], Acc) -> Acc.
+
+res_place_allocate(#{yregs:=_}, Is) ->
+ %% There will be an 'allocate' instruction inserted here.
+ Is ++ [gc];
+res_place_allocate(#{}, Is) -> Is.
+
+reserve_xregs_is([gc|Is], Res, Xs0, Used) ->
+ %% At this point, the code generator will place an instruction
+ %% that does a garbage collection. We must prune the remembered
+ %% registers.
+ Xs = res_xregs_prune(Xs0, Used, Res),
+ reserve_xregs_is(Is, Res, Xs, Used);
+reserve_xregs_is([#b_set{op=Op,dst=Dst,args=Args}=I|Is], Res0, Xs0, Used0) ->
+ Res = reserve_xreg(Dst, Xs0, Res0),
Used1 = ordsets:union(Used0, beam_ssa:used(I)),
Used = ordsets:del_element(Dst, Used1),
case Op of
@@ -1958,28 +2383,74 @@ reserve_xregs_is([#b_set{op=Op,dst=#b_var{name=Dst},args=Args}=I|Is], Res0, Xs0,
Xs = reserve_call_args(tl(Args)),
reserve_xregs_is(Is, Res, Xs, Used);
_ ->
- reserve_xregs_is(Is, Res, Xs1, Used)
+ reserve_xregs_is(Is, Res, Xs0, Used)
end;
-reserve_xregs_is([], Res, _Xs, _Used) -> Res.
-
-reserve_terminator(L, #b_br{bool=#b_literal{val=true},succ=Succ}, Blocks, Res) ->
- case maps:get(Succ, Blocks) of
+reserve_xregs_is([], Res, Xs, _Used) ->
+ {Res,Xs}.
+
+%% Pick up register hints from the successors of this blocks.
+reserve_terminator(_L, _Is, #b_br{bool=#b_var{},succ=Succ,fail=?BADARG_BLOCK},
+ _Blocks, XsMap, _Res) ->
+ %% We know that no variables are used at ?BADARG_BLOCK, so
+ %% any register hints from the success blocks are safe to use.
+ map_get(Succ, XsMap);
+reserve_terminator(L, Is, #b_br{bool=#b_var{},succ=Succ,fail=Fail},
+ Blocks, XsMap, Res) when Succ =/= Fail ->
+ #{Succ:=SuccBlk,Fail:=FailBlk} = Blocks,
+ case {SuccBlk,FailBlk} of
+ {#b_blk{is=[],last=#b_br{succ=PhiL,fail=PhiL}},
+ #b_blk{is=[],last=#b_br{succ=PhiL,fail=PhiL}}} ->
+ %% Both branches ultimately transfer to the same
+ %% block (via two blocks with no instructions).
+ %% Pick up register hints from the phi nodes
+ %% in the common block.
+ #{PhiL:=#b_blk{is=PhiIs}} = Blocks,
+ Xs = res_xregs_from_phi(PhiIs, Succ, Res, #{}),
+ res_xregs_from_phi(PhiIs, Fail, Res, Xs);
+ {_,_} when Is =/= [] ->
+ case last(Is) of
+ #b_set{op=succeeded,args=[Arg]} ->
+ %% We know that Arg will not be used at the failure
+ %% label, so we can pick up register hints from the
+ %% success label.
+ Br = #b_br{bool=#b_literal{val=true},succ=Succ,fail=Succ},
+ case reserve_terminator(L, [], Br, Blocks, XsMap, Res) of
+ #{Arg:=Reg} -> #{Arg=>Reg};
+ #{} -> #{}
+ end;
+ _ ->
+ %% Register hints from the success block may not
+ %% be safe at the failure block, and vice versa.
+ #{}
+ end;
+ {_,_} ->
+ %% Register hints from the success block may not
+ %% be safe at the failure block, and vice versa.
+ #{}
+ end;
+reserve_terminator(L, Is, #b_br{bool=#b_literal{val=true},succ=Succ},
+ Blocks, XsMap, Res) ->
+ case map_get(Succ, Blocks) of
#b_blk{is=[],last=Last} ->
- reserve_terminator(Succ, Last, Blocks, Res);
- #b_blk{is=[_|_]=Is} ->
- {res_xregs_from_phi(Is, L, Res, #{}),[]}
+ reserve_terminator(Succ, Is, Last, Blocks, XsMap, Res);
+ #b_blk{is=[_|_]=PhiIs} ->
+ res_xregs_from_phi(PhiIs, L, Res, #{})
end;
-reserve_terminator(_, Last, _, _) ->
- {#{},beam_ssa:used(Last)}.
+reserve_terminator(_, _, _, _, _, _) -> #{}.
-res_xregs_from_phi([#b_set{op=phi,dst=#b_var{name=Dst},args=Args}|Is],
+%% Pick up a reservation from a phi node.
+res_xregs_from_phi([#b_set{op=phi,dst=Dst,args=Args}|Is],
Pred, Res, Acc) ->
- case [V || {#b_var{name=V},L} <- Args, L =:= Pred] of
+ case [V || {#b_var{}=V,L} <- Args, L =:= Pred] of
[] ->
+ %% The value of the phi node for this predecessor
+ %% is a literal. Nothing to do here.
res_xregs_from_phi(Is, Pred, Res, Acc);
[V] ->
case Res of
#{Dst:={prefer,Reg}} ->
+ %% Try placing V in the same register as for
+ %% the phi node.
res_xregs_from_phi(Is, Pred, Res, Acc#{V=>Reg});
#{Dst:=_} ->
res_xregs_from_phi(Is, Pred, Res, Acc)
@@ -1990,8 +2461,8 @@ res_xregs_from_phi(_, _, _, Acc) -> Acc.
reserve_call_args(Args) ->
reserve_call_args(Args, 0, #{}).
-reserve_call_args([#b_var{name=Name}|As], X, Xs) ->
- reserve_call_args(As, X+1, Xs#{Name=>{x,X}});
+reserve_call_args([#b_var{}=Var|As], X, Xs) ->
+ reserve_call_args(As, X+1, Xs#{Var=>{x,X}});
reserve_call_args([#b_literal{}|As], X, Xs) ->
reserve_call_args(As, X+1, Xs);
reserve_call_args([], _, Xs) -> Xs.
@@ -1999,12 +2470,12 @@ reserve_call_args([], _, Xs) -> Xs.
reserve_xreg(V, Xs, Res) ->
case Res of
#{V:=_} ->
- %% Already reserved.
+ %% Already reserved (but not as an X register).
Res;
#{} ->
case Xs of
#{V:=X} ->
- %% Add a hint that a specific X register is
+ %% Add a hint that this specific X register is
%% preferred, unless it is already in use.
Res#{V=>{prefer,X}};
#{} ->
@@ -2013,23 +2484,15 @@ reserve_xreg(V, Xs, Res) ->
end
end.
-is_gc_safe(#b_set{op=phi}) ->
- false;
-is_gc_safe(#b_set{op=Op,args=Args}) ->
- case beam_ssa_codegen:classify_heap_need(Op, Args) of
- neutral -> true;
- {put,_} -> true;
- _ -> false
- end.
-
%% res_xregs_prune(PreferredRegs, Used, Res) -> PreferredRegs.
-%% Prune the list of preferred to only include X registers that
-%% are guaranteed to survice a garbage collection.
+%% Prune the list of preferred registers, to make sure that
+%% there are no "holes" (uninitialized X registers) when
+%% invoking the garbage collector.
-res_xregs_prune(Xs, Used, Res) ->
+res_xregs_prune(Xs, Used, Res) when map_size(Xs) =/= 0 ->
%% The number of safe registers is the number of the X registers
%% used after this point. The actual number of safe registers may
- %% be highter than this number, but this is a conservative safe
+ %% be higher than this number, but this is a conservative safe
%% estimate.
NumSafe = foldl(fun(V, N) ->
case Res of
@@ -2041,83 +2504,8 @@ res_xregs_prune(Xs, Used, Res) ->
%% Remove unsafe registers from the list of potential
%% preferred registers.
- maps:filter(fun(_, {x,X}) -> X < NumSafe end, Xs).
-
-%%%
-%%% Remove unsuitable aliases.
-%%%
-%%% If a binary is matched more than once, we must not put the
-%%% the match context in the same register as the binary to
-%%% avoid the following situation:
-%%%
-%%% {test,bs_start_match2,{f,3},1,[{x,0},0],{x,0}}.
-%%% .
-%%% .
-%%% .
-%%% {test,bs_start_match2,{f,6},1,[{x,0},0],{x,1}}. %% ILLEGAL!
-%%%
-%%% The second instruction is illegal because a match context source
-%%% is only allowed if source and destination registers are identical.
-%%%
-
-remove_unsuitable_aliases(#st{aliases=[_|_]=Aliases0,ssa=Blocks}=St) ->
- R = rem_unsuitable(maps:values(Blocks)),
- Unsuitable0 = [V || {V,[_,_|_]} <- rel2fam(R)],
- Unsuitable = gb_sets:from_list(Unsuitable0),
- Aliases =[P || {_,V}=P <- Aliases0,
- not gb_sets:is_member(V, Unsuitable)],
- St#st{aliases=Aliases};
-remove_unsuitable_aliases(#st{aliases=[]}=St) -> St.
-
-rem_unsuitable([#b_blk{is=Is}|Bs]) ->
- Vs = [{V,Dst} ||
- #b_set{op=bs_start_match,dst=#b_var{name=Dst},
- args=[#b_var{name=V}]} <- Is],
- Vs ++ rem_unsuitable(Bs);
-rem_unsuitable([]) -> [].
-
-%%%
-%%% Merge intervals.
-%%%
-
-merge_intervals(#st{aliases=Aliases0,intervals=Intervals0,
- res=Reserved}=St) ->
- Aliases1 = [A || A <- Aliases0,
- is_suitable_alias(A, Reserved)],
- case Aliases1 of
- [] ->
- St#st{aliases=Aliases1};
- [_|_] ->
- Intervals1 = maps:from_list(Intervals0),
- {Intervals,Aliases} =
- merge_intervals_1(Aliases1, Intervals1, []),
- St#st{aliases=Aliases,intervals=Intervals}
- end.
-
-merge_intervals_1([{Alias,V}|Vs], Intervals0, Acc) ->
- #{Alias:=Int1,V:=Int2} = Intervals0,
- Int3 = lists:merge(Int1, Int2),
- Int = merge_intervals_2(Int3),
- Intervals1 = maps:remove(Alias, Intervals0),
- Intervals = Intervals1#{V:=Int},
- merge_intervals_1(Vs, Intervals, [{Alias,V}|Acc]);
-merge_intervals_1([], Intervals, Acc) ->
- {maps:to_list(Intervals),Acc}.
-
-merge_intervals_2([{A1,B1},{A2,B2}|Is]) when A2 =< B1 ->
- merge_intervals_2([{min(A1, A2),max(B1, B2)}|Is]);
-merge_intervals_2([{_A1,B1}=R|[{A2,_B2}|_]=Is]) when B1 < A2 ->
- [R|merge_intervals_2(Is)];
-merge_intervals_2([_]=Is) -> Is.
-
-is_suitable_alias({V1,V2}, Reserved) ->
- #{V1:=Res1,V2:=Res2} = Reserved,
- case {Res1,Res2} of
- {x,x} -> true;
- {x,{x,_}} -> true;
- {{x,_},x} -> true;
- {_,_} -> false
- end.
+ maps:filter(fun(_, {x,X}) -> X < NumSafe end, Xs);
+res_xregs_prune(Xs, _Used, _Res) -> Xs.
%%%
%%% Register allocation using linear scan.
@@ -2152,7 +2540,13 @@ linear_scan(#st{intervals=Intervals0,res=Res}=St0) ->
Free = init_free(maps:to_list(Res)),
Intervals1 = [init_interval(Int, Res) || Int <- Intervals0],
Intervals = sort(Intervals1),
- IsReserved = fun (#i{reg=Reg}) -> Reg =/= none end,
+ IsReserved = fun(#i{reg=Reg}) ->
+ case Reg of
+ none -> false;
+ {prefer,{_,_}} -> false;
+ {_,_} -> true
+ end
+ end,
{UnhandledRes,Unhandled} = partition(IsReserved, Intervals),
L = #l{unhandled_res=UnhandledRes,
unhandled_any=Unhandled,free=Free},
@@ -2160,7 +2554,7 @@ linear_scan(#st{intervals=Intervals0,res=Res}=St0) ->
St#st{regs=maps:from_list(Regs)}.
init_interval({V,[{Start,_}|_]=Rs}, Res) ->
- Info = maps:get(V, Res),
+ Info = map_get(V, Res),
Pool = case Info of
{prefer,{x,_}} -> x;
x -> x;
@@ -2361,16 +2755,16 @@ free_reg(#i{reg={_,_}=Reg}=I, L) ->
update_pool(I, FreeRegs, L).
get_pool(#i{pool=Pool}, #l{free=Free}) ->
- maps:get(Pool, Free).
+ map_get(Pool, Free).
update_pool(#i{pool=Pool}, New, #l{free=Free0}=L) ->
- Free = maps:put(Pool, New, Free0),
+ Free = Free0#{Pool:=New},
L#l{free=Free}.
get_next_free(#i{pool=Pool}, #l{free=Free0}=L0) ->
K = {next,Pool},
- N = maps:get(K, Free0),
- Free = maps:put(K, N+1, Free0),
+ N = map_get(K, Free0),
+ Free = Free0#{K:=N+1},
L = L0#l{free=Free},
if
is_integer(Pool) -> {{y,N},L};
@@ -2406,7 +2800,7 @@ are_overlapping_1({_,_}, []) -> false.
is_loop_header(L, Blocks) ->
%% We KNOW that a loop header must start with a peek_message
%% instruction.
- case maps:get(L, Blocks) of
+ case map_get(L, Blocks) of
#b_blk{is=[#b_set{op=peek_message}|_]} -> true;
_ -> false
end.
@@ -2417,21 +2811,21 @@ rel2fam(S0) ->
sofs:to_external(S).
split_phis(Is) ->
- partition(fun(#b_set{op=Op}) -> Op =:= phi end, Is).
+ splitwith(fun(#b_set{op=Op}) -> Op =:= phi end, Is).
is_yreg({y,_}) -> true;
is_yreg({x,_}) -> false;
is_yreg({z,_}) -> false;
is_yreg({fr,_}) -> false.
-new_var_names([V0|Vs0], Count0) ->
- {V,Count1} = new_var_name(V0, Count0),
- {Vs,Count} = new_var_names(Vs0, Count1),
+new_vars([Base|Vs0], Count0) ->
+ {V,Count1} = new_var(Base, Count0),
+ {Vs,Count} = new_vars(Vs0, Count1),
{[V|Vs],Count};
-new_var_names([], Count) -> {[],Count}.
+new_vars([], Count) -> {[],Count}.
-new_var_name({Base,Int}, Count) ->
+new_var({Base,Int}, Count) ->
true = is_integer(Int), %Assertion.
- {{Base,Count},Count+1};
-new_var_name(Base, Count) ->
- {{Base,Count},Count+1}.
+ {#b_var{name={Base,Count}},Count+1};
+new_var(Base, Count) ->
+ {#b_var{name={Base,Count}},Count+1}.