diff options
Diffstat (limited to 'lib/compiler/src/beam_ssa_pre_codegen.erl')
-rw-r--r-- | lib/compiler/src/beam_ssa_pre_codegen.erl | 1084 |
1 files changed, 739 insertions, 345 deletions
diff --git a/lib/compiler/src/beam_ssa_pre_codegen.erl b/lib/compiler/src/beam_ssa_pre_codegen.erl index bbc3739eb5..bad43a9c4e 100644 --- a/lib/compiler/src/beam_ssa_pre_codegen.erl +++ b/lib/compiler/src/beam_ssa_pre_codegen.erl @@ -23,11 +23,10 @@ %% it has been annotated and transformed to help the code generator. %% %% * Some instructions are translated to other instructions closer to -%% the BEAM instructions. For example, the put_tuple instruction is -%% broken apart into the put_tuple_arity and put_tuple_elements -%% instructions. Similary, the binary matching instructions are -%% transformed from the optimization-friendly internal format to -%% instruction more similar to the actual BEAM instructions. +%% the BEAM instructions. For example, the binary matching +%% instructions are transformed from the optimization-friendly +%% internal format to instruction more similar to the actual BEAM +%% instructions. %% %% * Blocks that will need an instruction for allocating a stack frame %% are annotated with a {frame_size,Size} annotation. @@ -73,20 +72,20 @@ -import(lists, [all/2,any/2,append/1,duplicate/2, foldl/3,last/1,map/2,member/2,partition/2, - reverse/1,reverse/2,sort/1,zip/2]). + reverse/1,reverse/2,sort/1,splitwith/2,zip/2]). -spec module(beam_ssa:b_module(), [compile:option()]) -> {'ok',beam_ssa:b_module()}. module(#b_module{body=Fs0}=Module, Opts) -> - ExtraAnnos = proplists:get_bool(dprecg, Opts), - Ps = passes(ExtraAnnos), - Fs = functions(Fs0, Ps), + UseBSM3 = not proplists:get_bool(no_bsm3, Opts), + Ps = passes(Opts), + Fs = functions(Fs0, Ps, UseBSM3), {ok,Module#b_module{body=Fs}}. -functions([F|Fs], Ps) -> - [function(F, Ps)|functions(Fs, Ps)]; -functions([], _Ps) -> []. +functions([F|Fs], Ps, UseBSM3) -> + [function(F, Ps, UseBSM3)|functions(Fs, Ps, UseBSM3)]; +functions([], _Ps, _UseBSM3) -> []. -type b_var() :: beam_ssa:b_var(). -type var_name() :: beam_ssa:var_name(). @@ -104,22 +103,28 @@ functions([], _Ps) -> []. -record(st, {ssa :: beam_ssa:block_map(), args :: [b_var()], cnt :: beam_ssa:label(), + use_bsm3 :: boolean(), frames=[] :: [beam_ssa:label()], intervals=[] :: [{b_var(),[range()]}], - aliases=[] :: [{b_var(),b_var()}], res=[] :: [{b_var(),reservation()}] | #{b_var():=reservation()}, regs=#{} :: #{b_var():=ssa_register()}, extra_annos=[] :: [{atom(),term()}] }). -define(PASS(N), {N,fun N/1}). -passes(ExtraAnnos) -> +passes(Opts) -> + AddPrecgAnnos = proplists:get_bool(dprecg, Opts), + FixTuples = proplists:get_bool(no_put_tuple2, Opts), Ps = [?PASS(assert_no_critical_edges), %% Preliminaries. ?PASS(fix_bs), ?PASS(sanitize), - ?PASS(fix_tuples), + case FixTuples of + false -> ignore; + true -> ?PASS(fix_tuples) + end, + ?PASS(use_set_tuple_element), ?PASS(place_frames), ?PASS(fix_receives), @@ -127,6 +132,10 @@ passes(ExtraAnnos) -> ?PASS(find_yregs), ?PASS(reserve_yregs), + %% Handle legacy binary match instruction that don't + %% accept a Y register as destination. + ?PASS(legacy_bs), + %% Improve reuse of Y registers to potentially %% reduce the size of the stack frame. ?PASS(copy_retval), @@ -135,30 +144,25 @@ passes(ExtraAnnos) -> %% Calculate live intervals. ?PASS(number_instructions), ?PASS(live_intervals), - ?PASS(remove_unsuitable_aliases), ?PASS(reserve_regs), - ?PASS(merge_intervals), %% If needed for a .precg file, save the live intervals %% so they can be included in an annotation. - case ExtraAnnos of + case AddPrecgAnnos of false -> ignore; true -> ?PASS(save_live_intervals) end, %% Allocate registers. ?PASS(linear_scan), - ?PASS(fix_aliased_regs), ?PASS(frame_size), ?PASS(turn_yregs)], - case ExtraAnnos of - false -> [P || P <- Ps, P =/= ignore]; - true -> Ps - end. + [P || P <- Ps, P =/= ignore]. -function(#b_function{anno=Anno,args=Args,bs=Blocks0,cnt=Count0}=F0, Ps) -> +function(#b_function{anno=Anno,args=Args,bs=Blocks0,cnt=Count0}=F0, + Ps, UseBSM3) -> try - St0 = #st{ssa=Blocks0,args=Args,cnt=Count0}, + St0 = #st{ssa=Blocks0,args=Args,use_bsm3=UseBSM3,cnt=Count0}, St = compile:run_sub_passes(Ps, St0), #st{ssa=Blocks,cnt=Count,regs=Regs,extra_annos=ExtraAnnos} = St, F1 = add_extra_annos(F0, ExtraAnnos), @@ -174,14 +178,6 @@ function(#b_function{anno=Anno,args=Args,bs=Blocks0,cnt=Count0}=F0, Ps) -> save_live_intervals(#st{intervals=Intervals}=St) -> St#st{extra_annos=[{live_intervals,Intervals}]}. -fix_aliased_regs(#st{aliases=Aliases,regs=Regs}=St) -> - St#st{regs=fix_aliased_regs(Aliases, Regs)}. - -fix_aliased_regs([{Alias,V}|Aliases], Regs) -> - #{V:=Reg} = Regs, - fix_aliased_regs(Aliases, Regs#{Alias=>Reg}); -fix_aliased_regs([], Regs) -> Regs. - %% Add extra annotations when a .precg listing file is being produced. add_extra_annos(F, Annos) -> foldl(fun({Name,Value}, Acc) -> @@ -214,7 +210,7 @@ assert_no_ces(_, _, Blocks) -> Blocks. %% * Combine bs_match and bs_extract instructions to bs_get %% instructions. -fix_bs(#st{ssa=Blocks,cnt=Count0}=St) -> +fix_bs(#st{ssa=Blocks,cnt=Count0,use_bsm3=UseBSM3}=St) -> F = fun(#b_set{op=bs_start_match,dst=Dst}, A) -> %% Mark the root of the match context list. [{Dst,{context,Dst}}|A]; @@ -232,8 +228,13 @@ fix_bs(#st{ssa=Blocks,cnt=Count0}=St) -> CtxChain = maps:from_list(M), Linear0 = beam_ssa:linearize(Blocks), - %% Insert bs_save / bs_restore instructions where needed. - {Linear1,Count} = bs_save_restore(Linear0, CtxChain, Count0), + %% Insert position instructions where needed. + {Linear1,Count} = case UseBSM3 of + true -> + bs_pos_bsm3(Linear0, CtxChain, Count0); + false -> + bs_pos_bsm2(Linear0, CtxChain, Count0) + end, %% Rename instructions. Linear = bs_instrs(Linear1, CtxChain, []), @@ -241,10 +242,54 @@ fix_bs(#st{ssa=Blocks,cnt=Count0}=St) -> St#st{ssa=maps:from_list(Linear),cnt=Count} end. +%% Insert bs_get_position and bs_set_position instructions as needed. +bs_pos_bsm3(Linear0, CtxChain, Count0) -> + Rs0 = bs_restores(Linear0, CtxChain, #{}, #{}), + Rs = maps:values(Rs0), + S0 = sofs:relation(Rs, [{context,save_point}]), + S1 = sofs:relation_to_family(S0), + S = sofs:to_external(S1), + + {SavePoints,Count1} = make_bs_pos_dict(S, Count0, []), + {Gets,Count2} = make_bs_setpos_map(Rs, SavePoints, Count1, []), + {Sets,Count} = make_bs_getpos_map(maps:to_list(Rs0), SavePoints, Count2, []), + + %% Now insert all saves and restores. + {bs_insert_bsm3(Linear0, Gets, Sets, SavePoints),Count}. + +make_bs_setpos_map([{Ctx,Save}=Ps|T], SavePoints, Count, Acc) -> + SavePoint = get_savepoint(Ps, SavePoints), + I = #b_set{op=bs_get_position,dst=SavePoint,args=[Ctx]}, + make_bs_setpos_map(T, SavePoints, Count+1, [{Save,I}|Acc]); +make_bs_setpos_map([], _, Count, Acc) -> + {maps:from_list(Acc),Count}. + +make_bs_getpos_map([{Bef,{Ctx,_}=Ps}|T], SavePoints, Count, Acc) -> + Ignored = #b_var{name={'@ssa_ignored',Count}}, + Args = [Ctx, get_savepoint(Ps, SavePoints)], + I = #b_set{op=bs_set_position,dst=Ignored,args=Args}, + make_bs_getpos_map(T, SavePoints, Count+1, [{Bef,I}|Acc]); +make_bs_getpos_map([], _, Count, Acc) -> + {maps:from_list(Acc),Count}. + +get_savepoint({_,_}=Ps, SavePoints) -> + Name = {'@ssa_bs_position', map_get(Ps, SavePoints)}, + #b_var{name=Name}. -%% Insert bs_save and bs_restore instructions as needed. +make_bs_pos_dict([{Ctx,Pts}|T], Count0, Acc0) -> + {Acc, Count} = make_bs_pos_dict_1(Pts, Ctx, Count0, Acc0), + make_bs_pos_dict(T, Count, Acc); +make_bs_pos_dict([], Count, Acc) -> + {maps:from_list(Acc), Count}. -bs_save_restore(Linear0, CtxChain, Count0) -> +make_bs_pos_dict_1([H|T], Ctx, I, Acc) -> + make_bs_pos_dict_1(T, Ctx, I+1, [{{Ctx,H},I}|Acc]); +make_bs_pos_dict_1([], Ctx, I, Acc) -> + {[{Ctx,I}|Acc], I}. + +%% As bs_position but without OTP-22 instructions. This is only used when +%% cross-compiling to older versions. +bs_pos_bsm2(Linear0, CtxChain, Count0) -> Rs0 = bs_restores(Linear0, CtxChain, #{}, #{}), Rs = maps:values(Rs0), S0 = sofs:relation(Rs, [{context,save_point}]), @@ -255,7 +300,7 @@ bs_save_restore(Linear0, CtxChain, Count0) -> {Restores,Count} = make_restore_map(maps:to_list(Rs0), Slots, Count1, []), %% Now insert all saves and restores. - {bs_insert(Linear0, Saves, Restores, Slots),Count}. + {bs_insert_bsm2(Linear0, Saves, Restores, Slots),Count}. make_save_map([{Ctx,Save}=Ps|T], Slots, Count, Acc) -> Ignored = #b_var{name={'@ssa_ignored',Count}}, @@ -279,7 +324,7 @@ make_restore_map([], _, Count, Acc) -> make_slot({Same,Same}, _Slots) -> #b_literal{val=start}; make_slot({_,_}=Ps, Slots) -> - #b_literal{val=maps:get(Ps, Slots)}. + #b_literal{val=map_get(Ps, Slots)}. make_save_point_dict([{Ctx,Pts}|T], Acc0) -> Acc = make_save_point_dict_1(Pts, Ctx, 0, Acc0), @@ -308,8 +353,7 @@ bs_restores([], _, _, Rs) -> Rs. bs_update_successors(#b_br{succ=Succ,fail=Fail}, SPos, FPos, D) -> join_positions([{Succ,SPos},{Fail,FPos}], D); -bs_update_successors(#b_switch{fail=Fail,list=List}, SPos, FPos, D) -> - SPos = FPos, %Assertion. +bs_update_successors(#b_switch{fail=Fail,list=List}, SPos, _FPos, D) -> Update = [{L,SPos} || {_,L} <- List] ++ [{Fail,SPos}], join_positions(Update, D); bs_update_successors(#b_ret{}, _, _, D) -> D. @@ -388,10 +432,19 @@ bs_restores_is([#b_set{op=bs_extract,args=[FromPos|_]}|Is], Start = bs_subst_ctx(FromPos, CtxChain), #{Start:=FromPos} = PosMap, %Assertion. bs_restores_is(Is, CtxChain, PosMap, Rs); +bs_restores_is([#b_set{op=call,dst=Dst,args=Args}|Is], + CtxChain, PosMap0, Rs0) -> + {Rs,PosMap1} = bs_restore_args(Args, PosMap0, CtxChain, Dst, Rs0), + PosMap = bs_invalidate_pos(Args, PosMap1, CtxChain), + bs_restores_is(Is, CtxChain, PosMap, Rs); +bs_restores_is([#b_set{op=landingpad}|Is], CtxChain, PosMap0, Rs) -> + %% We can land here from any point, so all positions are invalid. + PosMap = maps:map(fun(_Start,_Pos) -> unknown end, PosMap0), + bs_restores_is(Is, CtxChain, PosMap, Rs); bs_restores_is([#b_set{op=Op,dst=Dst,args=Args}|Is], CtxChain, PosMap0, Rs0) when Op =:= bs_test_tail; - Op =:= call -> + Op =:= bs_get_tail -> {Rs,PosMap} = bs_restore_args(Args, PosMap0, CtxChain, Dst, Rs0), bs_restores_is(Is, CtxChain, PosMap, Rs); bs_restores_is([_|Is], CtxChain, PosMap, Rs) -> @@ -399,7 +452,6 @@ bs_restores_is([_|Is], CtxChain, PosMap, Rs) -> bs_restores_is([], _CtxChain, PosMap, Rs) -> {PosMap,Rs}. - bs_match_type(#b_set{args=[#b_literal{val=skip},_Ctx, #b_literal{val=binary},_Flags, #b_literal{val=all},#b_literal{val=U}]}) -> @@ -410,6 +462,23 @@ bs_match_type(#b_set{args=[#b_literal{val=skip},_Ctx, bs_match_type(_) -> plain. +%% Call instructions leave the match position in an undefined state, +%% requiring us to invalidate each affected argument. +bs_invalidate_pos([#b_var{}=Arg|Args], PosMap0, CtxChain) -> + Start = bs_subst_ctx(Arg, CtxChain), + case PosMap0 of + #{Start:=_} -> + PosMap = PosMap0#{Start:=unknown}, + bs_invalidate_pos(Args, PosMap, CtxChain); + #{} -> + %% Not a match context. + bs_invalidate_pos(Args, PosMap0, CtxChain) + end; +bs_invalidate_pos([_|Args], PosMap, CtxChain) -> + bs_invalidate_pos(Args, PosMap, CtxChain); +bs_invalidate_pos([], PosMap, _CtxChain) -> + PosMap. + bs_restore_args([#b_var{}=Arg|Args], PosMap0, CtxChain, Dst, Rs0) -> Start = bs_subst_ctx(Arg, CtxChain), case PosMap0 of @@ -432,33 +501,45 @@ bs_restore_args([], PosMap, _CtxChain, _Dst, Rs) -> %% Insert all bs_save and bs_restore instructions. -bs_insert([{L,#b_blk{is=Is0}=Blk}|Bs0], Saves, Restores, Slots) -> - Is = bs_insert_is_1(Is0, Restores, Slots), +bs_insert_bsm3(Blocks, Saves, Restores, SavePoints) -> + bs_insert_1(Blocks, Saves, Restores, SavePoints, fun(I) -> I end). + +bs_insert_bsm2(Blocks, Saves, Restores, SavePoints) -> + %% The old instructions require bs_start_match to be annotated with the + %% number of position slots it needs. + bs_insert_1(Blocks, Saves, Restores, SavePoints, + fun(#b_set{op=bs_start_match,dst=Dst}=I0) -> + NumSlots = case SavePoints of + #{Dst:=NumSlots0} -> NumSlots0; + #{} -> 0 + end, + beam_ssa:add_anno(num_slots, NumSlots, I0); + (I) -> + I + end). + +bs_insert_1([{L,#b_blk{is=Is0}=Blk}|Bs0], Saves, Restores, Slots, XFrm) -> + Is = bs_insert_is_1(Is0, Restores, Slots, XFrm), Bs = bs_insert_saves(Is, Bs0, Saves), - [{L,Blk#b_blk{is=Is}}|bs_insert(Bs, Saves, Restores, Slots)]; -bs_insert([], _, _, _) -> []. + [{L,Blk#b_blk{is=Is}}|bs_insert_1(Bs, Saves, Restores, Slots, XFrm)]; +bs_insert_1([], _, _, _, _) -> []. -bs_insert_is_1([#b_set{op=Op,dst=Dst}=I0|Is], Restores, Slots) -> +bs_insert_is_1([#b_set{op=Op,dst=Dst}=I0|Is], Restores, SavePoints, XFrm) -> + I = XFrm(I0), if Op =:= bs_test_tail; + Op =:= bs_get_tail; Op =:= bs_match; Op =:= call -> Rs = case Restores of #{Dst:=R} -> [R]; #{} -> [] end, - Rs ++ [I0|bs_insert_is_1(Is, Restores, Slots)]; - Op =:= bs_start_match -> - NumSlots = case Slots of - #{Dst:=NumSlots0} -> NumSlots0; - #{} -> 0 - end, - I = beam_ssa:add_anno(num_slots, NumSlots, I0), - [I|bs_insert_is_1(Is, Restores, Slots)]; + Rs ++ [I|bs_insert_is_1(Is, Restores, SavePoints, XFrm)]; true -> - [I0|bs_insert_is_1(Is, Restores, Slots)] + [I|bs_insert_is_1(Is, Restores, SavePoints, XFrm)] end; -bs_insert_is_1([], _, _) -> []. +bs_insert_is_1([], _, _, _) -> []. bs_insert_saves([#b_set{dst=Dst}|Is], Bs, Saves) -> case Saves of @@ -504,6 +585,8 @@ bs_instrs_is([#b_set{op=Op,args=Args0}=I0|Is], CtxChain, Acc) -> I1#b_set{op=bs_skip,args=[Type,Ctx|As]}; {bs_match,[#b_literal{val=string},Ctx|As]} -> I1#b_set{op=bs_match_string,args=[Ctx|As]}; + {bs_get_tail,[Ctx|As]} -> + I1#b_set{op=bs_get_tail,args=[Ctx|As]}; {_,_} -> I1 end, @@ -534,6 +617,59 @@ bs_subst_ctx(#b_var{}=Var, CtxChain) -> bs_subst_ctx(Other, _CtxChain) -> Other. +%% legacy_bs(St0) -> St. +%% Binary matching instructions in OTP 21 and earlier don't support +%% a Y register as destination. If St#st.use_bsm3 is false, +%% we will need to rewrite those instructions so that the result +%% is first put in an X register and then moved to a Y register +%% if the operation succeeded. + +legacy_bs(#st{use_bsm3=false,ssa=Blocks0,cnt=Count0,res=Res}=St) -> + IsYreg = maps:from_list([{V,true} || {V,{y,_}} <- Res]), + Linear0 = beam_ssa:linearize(Blocks0), + {Linear,Count} = legacy_bs(Linear0, IsYreg, Count0, #{}, []), + Blocks = maps:from_list(Linear), + St#st{ssa=Blocks,cnt=Count}; +legacy_bs(#st{use_bsm3=true}=St) -> St. + +legacy_bs([{L,Blk}|Bs], IsYreg, Count0, Copies0, Acc) -> + #b_blk{is=Is0,last=Last} = Blk, + Is1 = case Copies0 of + #{L:=Copy} -> [Copy|Is0]; + #{} -> Is0 + end, + {Is,Count,Copies} = legacy_bs_is(Is1, Last, IsYreg, Count0, Copies0, []), + legacy_bs(Bs, IsYreg, Count, Copies, [{L,Blk#b_blk{is=Is}}|Acc]); +legacy_bs([], _IsYreg, Count, _Copies, Acc) -> + {Acc,Count}. + +legacy_bs_is([#b_set{op=Op,dst=Dst}=I0, + #b_set{op=succeeded,dst=SuccDst,args=[Dst]}=SuccI0], + Last, IsYreg, Count0, Copies0, Acc) -> + NeedsFix = is_map_key(Dst, IsYreg) andalso + case Op of + bs_get -> true; + bs_init -> true; + _ -> false + end, + case NeedsFix of + true -> + TempDst = #b_var{name={'@bs_temp_dst',Count0}}, + Count = Count0 + 1, + I = I0#b_set{dst=TempDst}, + SuccI = SuccI0#b_set{args=[TempDst]}, + Copy = #b_set{op=copy,dst=Dst,args=[TempDst]}, + #b_br{bool=SuccDst,succ=SuccL} = Last, + Copies = Copies0#{SuccL=>Copy}, + legacy_bs_is([], Last, IsYreg, Count, Copies, [SuccI,I|Acc]); + false -> + legacy_bs_is([], Last, IsYreg, Count0, Copies0, [SuccI0,I0|Acc]) + end; +legacy_bs_is([I|Is], Last, IsYreg, Count, Copies, Acc) -> + legacy_bs_is(Is, Last, IsYreg, Count, Copies, [I|Acc]); +legacy_bs_is([], _Last, _IsYreg, Count, Copies, Acc) -> + {reverse(Acc),Count,Copies}. + %% sanitize(St0) -> St. %% Remove constructs that can cause problems later: %% @@ -549,7 +685,7 @@ sanitize(#st{ssa=Blocks0,cnt=Count0}=St) -> St#st{ssa=Blocks,cnt=Count}. sanitize([L|Ls], Count0, Blocks0, Values0) -> - #b_blk{is=Is0} = Blk0 = maps:get(L, Blocks0), + #b_blk{is=Is0} = Blk0 = map_get(L, Blocks0), case sanitize_is(Is0, Count0, Values0, false, []) of no_change -> sanitize(Ls, Count0, Blocks0, Values0); @@ -574,23 +710,24 @@ sanitize([], Count, Blocks0, Values) -> false -> remove_unreachable(Ls, Blocks, Reachable, []) end,Count}. -sanitize_is([#b_set{op=get_map_element, - args=[#b_literal{}=Map,Key]}=I0|Is], - Count0, Values, _Changed, Acc) -> - {MapVarName,Count} = new_var_name('@ssa_map', Count0), - MapVar = #b_var{name=MapVarName}, - I = I0#b_set{args=[MapVar,Key]}, - Copy = #b_set{op=copy,dst=MapVar,args=[Map]}, - sanitize_is(Is, Count, Values, true, [I,Copy|Acc]); -sanitize_is([#b_set{op=Op,dst=#b_var{name=Dst},args=Args0}=I0|Is0], - Count, Values, Changed, Acc) -> - Args = map(fun(#b_var{name=V}=Var) -> - case Values of - #{V:=New} -> New; - #{} -> Var - end; - (Lit) -> Lit - end, Args0), +sanitize_is([#b_set{op=get_map_element,args=Args0}=I0|Is], + Count0, Values, Changed, Acc) -> + case sanitize_args(Args0, Values) of + [#b_literal{}=Map,Key] -> + %% Bind the literal map to a variable. + {MapVar,Count} = new_var('@ssa_map', Count0), + I = I0#b_set{args=[MapVar,Key]}, + Copy = #b_set{op=copy,dst=MapVar,args=[Map]}, + sanitize_is(Is, Count, Values, true, [I,Copy|Acc]); + [_,_]=Args0 -> + sanitize_is(Is, Count0, Values, Changed, [I0|Acc]); + [_,_]=Args -> + I = I0#b_set{args=Args}, + sanitize_is(Is, Count0, Values, Changed, [I|Acc]) + end; +sanitize_is([#b_set{op=Op,dst=Dst,args=Args0}=I0|Is0], + Count, Values, Changed0, Acc) -> + Args = sanitize_args(Args0, Values), case sanitize_instr(Op, Args, I0) of {value,Value0} -> Value = #b_literal{val=Value0}, @@ -598,7 +735,9 @@ sanitize_is([#b_set{op=Op,dst=#b_var{name=Dst},args=Args0}=I0|Is0], {ok,I} -> sanitize_is(Is0, Count, Values, true, [I|Acc]); ok -> - sanitize_is(Is0, Count, Values, Changed, [I0|Acc]) + I = I0#b_set{args=Args}, + Changed = Changed0 orelse Args =/= Args0, + sanitize_is(Is0, Count, Values, Changed, [I|Acc]) end; sanitize_is([], Count, Values, Changed, Acc) -> case Changed of @@ -608,6 +747,14 @@ sanitize_is([], Count, Values, Changed, Acc) -> no_change end. +sanitize_args(Args, Values) -> + map(fun(Var) -> + case Values of + #{Var:=New} -> New; + #{} -> Var + end + end, Args). + sanitize_instr({bif,Bif}, [#b_literal{val=Lit}], _I) -> case erl_bifs:is_pure(erlang, Bif, 1) of false -> @@ -671,7 +818,7 @@ sanitize_badarg(I) -> I#b_set{op=call,args=[Func,#b_literal{val=badarg}]}. remove_unreachable([L|Ls], Blocks, Reachable, Acc) -> - #b_blk{is=Is0} = Blk0 = maps:get(L, Blocks), + #b_blk{is=Is0} = Blk0 = map_get(L, Blocks), case split_phis(Is0) of {[_|_]=Phis,Rest} -> Is = [prune_phi(Phi, Reachable) || Phi <- Phis] ++ Rest, @@ -693,15 +840,16 @@ prune_phi(#b_set{args=Args0}=Phi, Reachable) -> %%% %% fix_tuples(St0) -> St. -%% We must split tuple creation into two instruction to mirror the -%% the way tuples are created in BEAM. Each put_tuple instruction is -%% split into put_tuple_arity followed by put_tuple_elements. +%% If compatibility with a previous version of Erlang has been +%% requested, tuple creation must be split into two instruction to +%% mirror the the way tuples are created in BEAM prior to OTP 22. +%% Each put_tuple instruction is split into put_tuple_arity followed +%% by put_tuple_elements. fix_tuples(#st{ssa=Blocks0,cnt=Count0}=St) -> F = fun (#b_set{op=put_tuple,args=Args}=Put, C0) -> Arity = #b_literal{val=length(Args)}, - {VarName,C} = new_var_name('@ssa_ignore', C0), - Ignore = #b_var{name=VarName}, + {Ignore,C} = new_var('@ssa_ignore', C0), {[Put#b_set{op=put_tuple_arity,args=[Arity]}, #b_set{dst=Ignore,op=put_tuple_elements,args=Args}],C}; (I, C) -> {[I],C} @@ -710,6 +858,202 @@ fix_tuples(#st{ssa=Blocks0,cnt=Count0}=St) -> St#st{ssa=Blocks,cnt=Count}. %%% +%%% Introduce the set_tuple_element instructions to make +%%% multiple-field record updates faster. +%%% +%%% The expansion of record field updates, when more than one field is +%%% updated, but not a majority of the fields, will create a sequence of +%%% calls to `erlang:setelement(Index, Value, Tuple)` where Tuple in the +%%% first call is the original record tuple, and in the subsequent calls +%%% Tuple is the result of the previous call. Furthermore, all Index +%%% values are constant positive integers, and the first call to +%%% `setelement` will have the greatest index. Thus all the following +%%% calls do not actually need to test at run-time whether Tuple has type +%%% tuple, nor that the index is within the tuple bounds. +%%% +%%% Since this optimization introduces destructive updates, it used to +%%% be done as the very last Core Erlang pass before going to +%%% lower-level code. However, it turns out that this kind of destructive +%%% updates are awkward also in SSA code and can prevent or complicate +%%% type analysis and aggressive optimizations. +%%% +%%% NOTE: Because there no write barriers in the system, this kind of +%%% optimization can only be done when we are sure that garbage +%%% collection will not be triggered between the creation of the tuple +%%% and the destructive updates - otherwise we might insert pointers +%%% from an older generation to a newer. +%%% + +use_set_tuple_element(#st{ssa=Blocks0}=St) -> + Uses = count_uses(Blocks0), + RPO = reverse(beam_ssa:rpo(Blocks0)), + Blocks = use_ste_1(RPO, Uses, Blocks0), + St#st{ssa=Blocks}. + +use_ste_1([L|Ls], Uses, Blocks0) -> + {Blk0,Blocks} = use_ste_across(L, Uses, Blocks0), + #b_blk{is=Is0} = Blk0, + case use_ste_is(Is0, Uses) of + Is0 -> + use_ste_1(Ls, Uses, Blocks); + Is -> + Blk = Blk0#b_blk{is=Is}, + use_ste_1(Ls, Uses, Blocks#{L:=Blk}) + end; +use_ste_1([], _, Blocks) -> Blocks. + +%%% Optimize within a single block. + +use_ste_is([#b_set{}=I|Is0], Uses) -> + Is = use_ste_is(Is0, Uses), + case extract_ste(I) of + none -> + [I|Is]; + Extracted -> + use_ste_call(Extracted, I, Is, Uses) + end; +use_ste_is([], _Uses) -> []. + +use_ste_call({Dst0,Pos0,_Var0,_Val0}, Call1, Is0, Uses) -> + case get_ste_call(Is0, []) of + {Prefix,{Dst1,Pos1,Dst0,Val1},Call2,Is} + when Pos1 > 0, Pos0 > Pos1 -> + case is_single_use(Dst0, Uses) of + true -> + Call = Call1#b_set{dst=Dst1}, + Args = [Val1,Dst1,#b_literal{val=Pos1-1}], + Dsetel = Call2#b_set{op=set_tuple_element, + dst=Dst0, + args=Args}, + [Call|Prefix] ++ [Dsetel|Is]; + false -> + [Call1|Is0] + end; + _ -> + [Call1|Is0] + end. + +get_ste_call([#b_set{op=get_tuple_element}=I|Is], Acc) -> + get_ste_call(Is, [I|Acc]); +get_ste_call([#b_set{op=call}=I|Is], Acc) -> + case extract_ste(I) of + none -> + none; + Extracted -> + {reverse(Acc),Extracted,I,Is} + end; +get_ste_call(_, _) -> none. + +extract_ste(#b_set{op=call,dst=Dst, + args=[#b_remote{mod=#b_literal{val=M}, + name=#b_literal{val=F}}|Args]}) -> + case {M,F,Args} of + {erlang,setelement,[#b_literal{val=Pos},Tuple,Val]} -> + {Dst,Pos,Tuple,Val}; + {_,_,_} -> + none + end; +extract_ste(#b_set{}) -> none. + +%%% Optimize accross blocks within a try/catch block. + +use_ste_across(L, Uses, Blocks) -> + case map_get(L, Blocks) of + #b_blk{last=#b_br{bool=#b_var{}}}=Blk -> + try + use_ste_across_1(L, Blk, Uses, Blocks) + catch + throw:not_possible -> + {Blk,Blocks} + end; + #b_blk{}=Blk -> + {Blk,Blocks} + end. + +use_ste_across_1(L, Blk0, Uses, Blocks0) -> + #b_blk{is=IsThis,last=#b_br{bool=Bool,succ=Next}} = Blk0, + case reverse(IsThis) of + [#b_set{op=succeeded,dst=Bool,args=[Result]}=Succ0, + #b_set{op=call,args=[#b_remote{}|_],dst=Result}=Call1|Prefix] -> + case is_single_use(Bool, Uses) andalso + is_n_uses(2, Result, Uses) of + true -> ok; + false -> throw(not_possible) + end, + Call2 = use_ste_across_next(Next, Uses, Blocks0), + Is = [Call1,Call2], + case use_ste_is(Is, decrement_uses(Result, Uses)) of + [#b_set{}=Call,#b_set{op=set_tuple_element}=Ste] -> + Blocks1 = use_ste_fix_next(Ste, Next, Blocks0), + Succ = Succ0#b_set{args=[Call#b_set.dst]}, + Blk = Blk0#b_blk{is=reverse(Prefix, [Call,Succ])}, + Blocks = Blocks1#{L:=Blk}, + {Blk,Blocks}; + _ -> + throw(not_possible) + end; + _ -> + throw(not_possible) + end. + +use_ste_across_next(Next, Uses, Blocks) -> + case map_get(Next, Blocks) of + #b_blk{is=[#b_set{op=call,dst=Result,args=[#b_remote{}|_]}=Call, + #b_set{op=succeeded,dst=Bool,args=[Result]}], + last=#b_br{bool=Bool}} -> + case is_single_use(Bool, Uses) andalso + is_n_uses(2, Result, Uses) of + true -> ok; + false -> throw(not_possible) + end, + Call; + #b_blk{} -> + throw(not_possible) + end. + +use_ste_fix_next(Ste, Next, Blocks) -> + Blk0 = map_get(Next, Blocks), + #b_blk{is=[#b_set{op=call},#b_set{op=succeeded}],last=Br0} = Blk0, + Br = beam_ssa:normalize(Br0#b_br{bool=#b_literal{val=true}}), + Blk = Blk0#b_blk{is=[Ste],last=Br}, + Blocks#{Next:=Blk}. + +%% Count how many times each variable is used. + +count_uses(Blocks) -> + count_uses_blk(maps:values(Blocks), #{}). + +count_uses_blk([#b_blk{is=Is,last=Last}|Bs], CountMap0) -> + F = fun(I, CountMap) -> + foldl(fun(Var, Acc) -> + case Acc of + #{Var:=3} -> Acc; + #{Var:=C} -> Acc#{Var:=C+1}; + #{} -> Acc#{Var=>1} + end + end, CountMap, beam_ssa:used(I)) + end, + CountMap = F(Last, foldl(F, CountMap0, Is)), + count_uses_blk(Bs, CountMap); +count_uses_blk([], CountMap) -> CountMap. + +decrement_uses(V, Uses) -> + #{V:=C} = Uses, + Uses#{V:=C-1}. + +is_n_uses(N, V, Uses) -> + case Uses of + #{V:=N} -> true; + #{} -> false + end. + +is_single_use(V, Uses) -> + case Uses of + #{V:=1} -> true; + #{} -> false + end. + +%%% %%% Find out where frames should be placed. %%% @@ -727,7 +1071,7 @@ fix_tuples(#st{ssa=Blocks0,cnt=Count0}=St) -> %% a stack frame or set up a stack frame with a different size. place_frames(#st{ssa=Blocks}=St) -> - Doms = beam_ssa:dominators(Blocks), + {Doms,_} = beam_ssa:dominators(Blocks), Ls = beam_ssa:rpo(Blocks), Tried = gb_sets:empty(), Frames0 = [], @@ -735,7 +1079,7 @@ place_frames(#st{ssa=Blocks}=St) -> St#st{frames=Frames}. place_frames_1([L|Ls], Blocks, Doms, Tried0, Frames0) -> - Blk = maps:get(L, Blocks), + Blk = map_get(L, Blocks), case need_frame(Blk) of true -> %% This block needs a frame. Try to place it here. @@ -846,15 +1190,15 @@ place_frame_here(L, Blocks, Doms, Frames) -> %% Return all predecessors referenced in phi nodes. phi_predecessors(L, Blocks) -> - #b_blk{is=Is} = maps:get(L, Blocks), + #b_blk{is=Is} = map_get(L, Blocks), [P || #b_set{op=phi,args=Args} <- Is, {_,P} <- Args]. %% is_dominated_by(Label, DominatedBy, Dominators) -> true|false. %% Test whether block Label is dominated by block DominatedBy. is_dominated_by(L, DomBy, Doms) -> - DominatedBy = maps:get(L, Doms), - ordsets:is_element(DomBy, DominatedBy). + DominatedBy = map_get(L, Doms), + member(DomBy, DominatedBy). %% need_frame(#b_blk{}) -> true|false. %% Test whether any of the instructions in the block requires a stack frame. @@ -864,12 +1208,12 @@ need_frame(#b_blk{is=Is,last=#b_ret{arg=Ret}}) -> need_frame(#b_blk{is=Is}) -> need_frame_1(Is, body). -need_frame_1([#b_set{op=make_fun,dst=#b_var{name=Fun}}|Is], {return,_}=Context) -> +need_frame_1([#b_set{op=make_fun,dst=Fun}|Is], {return,_}=Context) -> %% Since make_fun clobbers X registers, a stack frame is needed if %% any of the following instructions use any other variable than %% the one holding the reference to the created fun. need_frame_1(Is, Context) orelse - case beam_ssa:used(#b_blk{is=Is,last=#b_ret{arg=#b_var{name=Fun}}}) of + case beam_ssa:used(#b_blk{is=Is,last=#b_ret{arg=Fun}}) of [Fun] -> false; [_|_] -> true end; @@ -884,7 +1228,7 @@ need_frame_1([#b_set{op=call,args=[Func|_]}|Is], Context) -> case Func of #b_remote{mod=#b_literal{val=Mod}, name=#b_literal{val=Name}, - arity=Arity} -> + arity=Arity} when is_atom(Mod), is_atom(Name) -> case erl_bifs:is_exit_bif(Mod, Name, Arity) of true -> false; @@ -896,11 +1240,11 @@ need_frame_1([#b_set{op=call,args=[Func|_]}|Is], Context) -> #b_remote{} -> %% This is an apply(), which always needs a frame. true; - #b_var{} -> - %% A fun call always needs a frame. - true; + #b_local{} -> + Context =:= body orelse Is =/= []; _ -> - Context =:= body orelse Is =/= [] + %% A fun call always needs a frame. + true end; need_frame_1([I|Is], Context) -> beam_ssa:clobbers_xregs(I) orelse need_frame_1(Is, Context); @@ -932,11 +1276,11 @@ is_trap_bif(_, _, _) -> false. %%% used during matching. %%% %%% Depending on where variables are defined and used, they must -%%% be handling in two different ways. +%%% be handled in two different ways. %%% %%% Variables that are always defined in the receive (before branching %%% out into the different clauses of the receive) and used after the -%%% receive, must be handled in the following way: Before each +%%% receive must be handled in the following way: Before each %%% remove_message instruction, each such variable must be copied, and %%% all variables must be consolidated using a phi node in the %%% common exit block for the receive. @@ -984,15 +1328,13 @@ recv_common(Defs, Exit, Blocks) -> %% in the exit block following the receive. recv_fix_common([Msg0|T], Exit, Rm, Blocks0, Count0) -> - {Msg1,Count1} = new_var_name('@recv', Count0), - Msg = #b_var{name=Msg1}, + {Msg,Count1} = new_var('@recv', Count0), Blocks1 = beam_ssa:rename_vars(#{Msg0=>Msg}, [Exit], Blocks0), N = length(Rm), - {MsgVars0,Count} = new_var_names(duplicate(N, '@recv'), Count1), - MsgVars = [#b_var{name=V} || V <- MsgVars0], + {MsgVars,Count} = new_vars(duplicate(N, '@recv'), Count1), PhiArgs = fix_exit_phi_args(MsgVars, Rm, Exit, Blocks1), Phi = #b_set{op=phi,dst=Msg,args=PhiArgs}, - ExitBlk0 = maps:get(Exit, Blocks1), + ExitBlk0 = map_get(Exit, Blocks1), ExitBlk = ExitBlk0#b_blk{is=[Phi|ExitBlk0#b_blk.is]}, Blocks2 = Blocks1#{Exit:=ExitBlk}, Blocks = recv_fix_common_1(MsgVars, Rm, Msg0, Blocks2), @@ -1003,8 +1345,8 @@ recv_fix_common([], _, _, Blocks, Count) -> recv_fix_common_1([V|Vs], [Rm|Rms], Msg, Blocks0) -> Ren = #{Msg=>V}, Blocks1 = beam_ssa:rename_vars(Ren, [Rm], Blocks0), - #b_blk{is=Is0} = Blk0 = maps:get(Rm, Blocks1), - Copy = #b_set{op=copy,dst=V,args=[#b_var{name=Msg}]}, + #b_blk{is=Is0} = Blk0 = map_get(Rm, Blocks1), + Copy = #b_set{op=copy,dst=V,args=[Msg]}, Is = insert_after_phis(Is0, [Copy]), Blk = Blk0#b_blk{is=Is}, Blocks = Blocks1#{Rm:=Blk}, @@ -1013,14 +1355,19 @@ recv_fix_common_1([], [], _Msg, Blocks) -> Blocks. fix_exit_phi_args([V|Vs], [Rm|Rms], Exit, Blocks) -> Path = beam_ssa:rpo([Rm], Blocks), - Pred = exit_predecessor(Path, Exit), - [{V,Pred}|fix_exit_phi_args(Vs, Rms, Exit, Blocks)]; + Preds = exit_predecessors(Path, Exit, Blocks), + [{V,Pred} || Pred <- Preds] ++ fix_exit_phi_args(Vs, Rms, Exit, Blocks); fix_exit_phi_args([], [], _, _) -> []. -exit_predecessor([Pred,Exit|_], Exit) -> - Pred; -exit_predecessor([_|Bs], Exit) -> - exit_predecessor(Bs, Exit). +exit_predecessors([L|Ls], Exit, Blocks) -> + Blk = map_get(L, Blocks), + case member(Exit, beam_ssa:successors(Blk)) of + true -> + [L|exit_predecessors(Ls, Exit, Blocks)]; + false -> + exit_predecessors(Ls, Exit, Blocks) + end; +exit_predecessors([], _Exit, _Blocks) -> []. %% fix_receive([Label], Defs, Blocks0, Count0) -> {Blocks,Count}. %% Add a copy instruction for all variables that are matched out and @@ -1030,16 +1377,14 @@ fix_receive([L|Ls], Defs, Blocks0, Count0) -> {RmDefs,Used0} = beam_ssa:def_used([L], Blocks0), Def = ordsets:subtract(Defs, RmDefs), Used = ordsets:intersection(Def, Used0), - {NewVs,Count} = new_var_names(Used, Count0), - NewVars = [#b_var{name=V} || V <- NewVs], + {NewVars,Count} = new_vars([Base || #b_var{name=Base} <- Used], Count0), Ren = zip(Used, NewVars), Blocks1 = beam_ssa:rename_vars(Ren, [L], Blocks0), - #b_blk{is=Is0} = Blk1 = maps:get(L, Blocks1), - CopyIs = [#b_set{op=copy,dst=New,args=[#b_var{name=Old}]} || - {Old,New} <- Ren], + #b_blk{is=Is0} = Blk1 = map_get(L, Blocks1), + CopyIs = [#b_set{op=copy,dst=New,args=[Old]} || {Old,New} <- Ren], Is = insert_after_phis(Is0, CopyIs), Blk = Blk1#b_blk{is=Is}, - Blocks = maps:put(L, Blk, Blocks1), + Blocks = Blocks1#{L:=Blk}, fix_receive(Ls, Defs, Blocks, Count); fix_receive([], _Defs, Blocks, Count) -> {Blocks,Count}. @@ -1064,7 +1409,7 @@ find_loop_exit_1(_, _, Exit) -> Exit. find_rm_blocks(L, Blocks) -> Seen = gb_sets:singleton(L), - Blk = maps:get(L, Blocks), + Blk = map_get(L, Blocks), Succ = beam_ssa:successors(Blk), find_rm_blocks_1(Succ, Seen, Blocks). @@ -1074,7 +1419,7 @@ find_rm_blocks_1([L|Ls], Seen0, Blocks) -> find_rm_blocks_1(Ls, Seen0, Blocks); false -> Seen = gb_sets:insert(L, Seen0), - Blk = maps:get(L, Blocks), + Blk = map_get(L, Blocks), case find_rm_act(Blk#b_blk.is) of prune -> %% Looping back. Don't look at any successors. @@ -1126,7 +1471,7 @@ find_rm_act([]) -> find_yregs(#st{frames=[]}=St) -> St; find_yregs(#st{frames=[_|_]=Frames,args=Args,ssa=Blocks0}=St) -> - FrameDefs = find_defs(Frames, Blocks0, [V || #b_var{name=V} <- Args]), + FrameDefs = find_defs(Frames, Blocks0, [V || #b_var{}=V <- Args]), Blocks = find_yregs_1(FrameDefs, Blocks0), St#st{ssa=Blocks}. @@ -1136,16 +1481,16 @@ find_yregs_1([{F,Defs}|Fs], Blocks0) -> Ls = beam_ssa:rpo([F], Blocks0), Yregs0 = [], Yregs = find_yregs_2(Ls, Blocks0, D0, Yregs0), - Blk0 = maps:get(F, Blocks0), + Blk0 = map_get(F, Blocks0), Blk = beam_ssa:add_anno(yregs, Yregs, Blk0), Blocks = Blocks0#{F:=Blk}, find_yregs_1(Fs, Blocks); find_yregs_1([], Blocks) -> Blocks. find_yregs_2([L|Ls], Blocks0, D0, Yregs0) -> - Blk0 = maps:get(L, Blocks0), + Blk0 = map_get(L, Blocks0), #b_blk{is=Is,last=Last} = Blk0, - Ys0 = maps:get(L, D0), + Ys0 = map_get(L, D0), {Yregs1,Ys} = find_yregs_is(Is, Ys0, Yregs0), Yregs = find_yregs_terminator(Last, Ys, Yregs1), Successors = beam_ssa:successors(Blk0), @@ -1172,7 +1517,7 @@ find_defs_1([L|Ls], Blocks, Frames, Seen0, Defs0, Acc0) -> false -> Seen1 = gb_sets:insert(L, Seen0), {Acc,Seen} = find_defs_1(Ls, Blocks, Frames, Seen1, Defs0, Acc0), - #b_blk{is=Is} = Blk = maps:get(L, Blocks), + #b_blk{is=Is} = Blk = map_get(L, Blocks), Defs = find_defs_is(Is, Defs0), Successors = beam_ssa:successors(Blk), find_defs_1(Successors, Blocks, Frames, Seen, Defs, Acc) @@ -1181,7 +1526,7 @@ find_defs_1([L|Ls], Blocks, Frames, Seen0, Defs0, Acc0) -> find_defs_1([], _, _, Seen, _, Acc) -> {Acc,Seen}. -find_defs_is([#b_set{dst=#b_var{name=Dst}}|Is], Acc) -> +find_defs_is([#b_set{dst=Dst}|Is], Acc) -> find_defs_is(Is, [Dst|Acc]); find_defs_is([], Acc) -> Acc. @@ -1191,15 +1536,15 @@ find_update_succ([S|Ss], #dk{d=Defs0,k=Killed0}=DK0, D0) -> Defs = ordsets:intersection(Defs0, Defs1), Killed = ordsets:union(Killed0, Killed1), DK = #dk{d=Defs,k=Killed}, - D = maps:put(S, DK, D0), + D = D0#{S:=DK}, find_update_succ(Ss, DK0, D); #{} -> - D = maps:put(S, DK0, D0), + D = D0#{S=>DK0}, find_update_succ(Ss, DK0, D) end; find_update_succ([], _, D) -> D. -find_yregs_is([#b_set{dst=#b_var{name=Dst}}=I|Is], #dk{d=Defs0,k=Killed0}=Ys, Yregs0) -> +find_yregs_is([#b_set{dst=Dst}=I|Is], #dk{d=Defs0,k=Killed0}=Ys, Yregs0) -> Used = beam_ssa:used(I), Yregs1 = ordsets:intersection(Used, Killed0), Yregs = ordsets:union(Yregs0, Yregs1), @@ -1284,7 +1629,7 @@ copy_retval(#st{frames=Frames,ssa=Blocks0,cnt=Count0}=St) -> St#st{ssa=Blocks,cnt=Count}. copy_retval_1([F|Fs], Blocks0, Count0) -> - #b_blk{anno=#{yregs:=Yregs0},is=Is} = maps:get(F, Blocks0), + #b_blk{anno=#{yregs:=Yregs0},is=Is} = map_get(F, Blocks0), Yregs1 = gb_sets:from_list(Yregs0), Yregs = collect_yregs(Is, Yregs1), Ls = beam_ssa:rpo([F], Blocks0), @@ -1293,7 +1638,7 @@ copy_retval_1([F|Fs], Blocks0, Count0) -> copy_retval_1([], Blocks, Count) -> {Blocks,Count}. -collect_yregs([#b_set{op=copy,dst=#b_var{name=Y},args=[#b_var{name=X}]}|Is], +collect_yregs([#b_set{op=copy,dst=Y,args=[#b_var{}=X]}|Is], Yregs0) -> true = gb_sets:is_member(X, Yregs0), %Assertion. Yregs = gb_sets:insert(Y, gb_sets:delete(X, Yregs0)), @@ -1303,7 +1648,7 @@ collect_yregs([#b_set{}|Is], Yregs) -> collect_yregs([], Yregs) -> Yregs. copy_retval_2([L|Ls], Yregs, Copy0, Blocks0, Count0) -> - #b_blk{is=Is0,last=Last} = Blk = maps:get(L, Blocks0), + #b_blk{is=Is0,last=Last} = Blk = map_get(L, Blocks0), RC = case {Last,Ls} of {#b_br{succ=Succ,fail=?BADARG_BLOCK},[Succ|_]} -> true; @@ -1330,17 +1675,20 @@ copy_retval_is([#b_set{op=put_tuple_elements,args=Args0}=I0], false, _Yregs, Copy, Count, Acc) -> I = I0#b_set{args=copy_sub_args(Args0, Copy)}, {reverse(Acc, [I|acc_copy([], Copy)]),Count}; +copy_retval_is([#b_set{op=Op}=I0], false, Yregs, Copy, Count0, Acc0) + when Op =:= call; Op =:= make_fun -> + {I,Count,Acc} = place_retval_copy(I0, Yregs, Copy, Count0, Acc0), + {reverse(Acc, [I]),Count}; copy_retval_is([#b_set{}]=Is, false, _Yregs, Copy, Count, Acc) -> {reverse(Acc, acc_copy(Is, Copy)),Count}; copy_retval_is([#b_set{},#b_set{op=succeeded}]=Is, false, _Yregs, Copy, Count, Acc) -> {reverse(Acc, acc_copy(Is, Copy)),Count}; -copy_retval_is([#b_set{op=Op,dst=#b_var{name=RetVal}=Dst}=I0|Is], RC, Yregs, +copy_retval_is([#b_set{op=Op,dst=#b_var{name=RetName}=Dst}=I0|Is], RC, Yregs, Copy0, Count0, Acc0) when Op =:= call; Op =:= make_fun -> {I1,Count1,Acc} = place_retval_copy(I0, Yregs, Copy0, Count0, Acc0), - case gb_sets:is_member(RetVal, Yregs) of + case gb_sets:is_member(Dst, Yregs) of true -> - {NewVarName,Count} = new_var_name(RetVal, Count1), - NewVar = #b_var{name=NewVarName}, + {NewVar,Count} = new_var(RetName, Count1), Copy = #b_set{op=copy,dst=Dst,args=[NewVar]}, I = I1#b_set{dst=NewVar}, copy_retval_is(Is, RC, Yregs, Copy, Count, [I|Acc]); @@ -1390,16 +1738,15 @@ copy_retval_is([], RC, _, Copy, Count, Acc) -> place_retval_copy(I, _Yregs, none, Count, Acc) -> {I,Count,Acc}; place_retval_copy(#b_set{args=[F|Args0]}=I, Yregs, Copy, Count0, Acc0) -> - #b_set{dst=#b_var{name=Avoid}} = Copy, + #b_set{dst=Avoid} = Copy, {Args,Acc1,Count} = copy_func_args(Args0, Yregs, Avoid, Acc0, [], Count0), Acc = [Copy|Acc1], {I#b_set{args=[F|Args]},Count,Acc}. -copy_func_args([#b_var{name=V}=A|As], Yregs, Avoid, CopyAcc, Acc, Count0) -> - case gb_sets:is_member(V, Yregs) of - true when V =/= Avoid -> - {NewVarName,Count} = new_var_name(V, Count0), - NewVar = #b_var{name=NewVarName}, +copy_func_args([#b_var{name=AName}=A|As], Yregs, Avoid, CopyAcc, Acc, Count0) -> + case gb_sets:is_member(A, Yregs) of + true when A =/= Avoid -> + {NewVar,Count} = new_var(AName, Count0), Copy = #b_set{op=copy,dst=NewVar,args=[A]}, copy_func_args(As, Yregs, Avoid, [Copy|CopyAcc], [NewVar|Acc], Count); _ -> @@ -1443,7 +1790,7 @@ opt_get_list(#st{ssa=Blocks,res=Res}=St) -> St#st{ssa=opt_get_list_1(Ls, ResMap, Blocks)}. opt_get_list_1([L|Ls], Res, Blocks0) -> - #b_blk{is=Is0} = Blk = maps:get(L, Blocks0), + #b_blk{is=Is0} = Blk = map_get(L, Blocks0), case opt_get_list_is(Is0, Res, [], false) of no -> opt_get_list_1(Ls, Res, Blocks0); @@ -1453,9 +1800,9 @@ opt_get_list_1([L|Ls], Res, Blocks0) -> end; opt_get_list_1([], _, Blocks) -> Blocks. -opt_get_list_is([#b_set{op=get_hd,dst=#b_var{name=Hd}, +opt_get_list_is([#b_set{op=get_hd,dst=Hd, args=[Cons]}=GetHd, - #b_set{op=get_tl,dst=#b_var{name=Tl}, + #b_set{op=get_tl,dst=Tl, args=[Cons]}=GetTl|Is], Res, Acc, Changed) -> %% Note that when this pass is run, only Y registers have @@ -1497,12 +1844,12 @@ number_instructions(#st{ssa=Blocks0}=St) -> St#st{ssa=number_is_1(Ls, 1, Blocks0)}. number_is_1([L|Ls], N0, Blocks0) -> - #b_blk{is=Is0,last=Last0} = Bl0 = maps:get(L, Blocks0), + #b_blk{is=Is0,last=Last0} = Bl0 = map_get(L, Blocks0), {Is,N1} = number_is_2(Is0, N0, []), Last = beam_ssa:add_anno(n, N1, Last0), N = N1 + 2, Bl = Bl0#b_blk{is=Is,last=Last}, - Blocks = maps:put(L, Bl, Blocks0), + Blocks = Blocks0#{L:=Bl}, number_is_1(Ls, N, Blocks); number_is_1([], _, Blocks) -> Blocks. @@ -1519,13 +1866,13 @@ number_is_2([], N, Acc) -> %%% live_intervals(#st{args=Args,ssa=Blocks}=St) -> - Vars0 = [{V,{0,1}} || #b_var{name=V} <- Args], + Vars0 = [{V,{0,1}} || #b_var{}=V <- Args], F = fun(L, _, A) -> live_interval_blk(L, Blocks, A) end, LiveMap0 = #{}, - Acc0 = {[],[],LiveMap0}, - {Vars,Aliases,_} = beam_ssa:fold_po(F, Acc0, Blocks), + Acc0 = {[],LiveMap0}, + {Vars,_} = beam_ssa:fold_po(F, Acc0, Blocks), Intervals = merge_ranges(rel2fam(Vars0++Vars)), - St#st{intervals=Intervals,aliases=Aliases}. + St#st{intervals=Intervals}. merge_ranges([{V,Rs}|T]) -> [{V,merge_ranges_1(Rs)}|merge_ranges(T)]; @@ -1537,20 +1884,19 @@ merge_ranges_1([R|Rs]) -> [R|merge_ranges_1(Rs)]; merge_ranges_1([]) -> []. -live_interval_blk(L, Blocks, {Vars0,Aliases0,LiveMap0}) -> +live_interval_blk(L, Blocks, {Vars0,LiveMap0}) -> Live0 = [], Successors = beam_ssa:successors(L, Blocks), Live1 = update_successors(Successors, L, Blocks, LiveMap0, Live0), %% Add ranges for all variables that are live in the successors. - #b_blk{is=Is,last=Last} = maps:get(L, Blocks), + #b_blk{is=Is,last=Last} = map_get(L, Blocks), End = beam_ssa:get_anno(n, Last), Use = [{V,{use,End+1}} || V <- Live1], %% Determine used and defined variables in this block. FirstNumber = first_number(Is, Last), - {UseDef0,Aliases} = live_interval_blk_1([Last|reverse(Is)], - FirstNumber, Aliases0, Use), + UseDef0 = live_interval_blk_1([Last|reverse(Is)], FirstNumber, Use), UseDef = rel2fam(UseDef0), %% Update what is live at the beginning of this block and @@ -1563,7 +1909,7 @@ live_interval_blk(L, Blocks, {Vars0,Aliases0,LiveMap0}) -> %% Construct the ranges for this block. Vars = make_block_ranges(UseDef, FirstNumber, Vars0), - {Vars,Aliases,LiveMap}. + {Vars,LiveMap}. make_block_ranges([{V,[{def,Def}]}|Vs], First, Acc) -> make_block_ranges(Vs, First, [{V,{Def,Def}}|Acc]); @@ -1575,37 +1921,29 @@ make_block_ranges([{V,[{use,_}|_]=Uses}|Vs], First, Acc) -> make_block_ranges(Vs, First, [{V,{First,Last}}|Acc]); make_block_ranges([], _, Acc) -> Acc. -live_interval_blk_1([#b_set{op=phi,dst=#b_var{name=Dst}}|Is], - FirstNumber, Aliases, Acc0) -> +live_interval_blk_1([#b_set{op=phi,dst=Dst}|Is], FirstNumber, Acc0) -> Acc = [{Dst,{def,FirstNumber}}|Acc0], - live_interval_blk_1(Is, FirstNumber, Aliases, Acc); -live_interval_blk_1([#b_set{op=bs_start_match}=I|Is], FirstNumber, - Aliases0, Acc0) -> + live_interval_blk_1(Is, FirstNumber, Acc); +live_interval_blk_1([#b_set{op=bs_start_match}=I|Is], + FirstNumber, Acc0) -> N = beam_ssa:get_anno(n, I), - #b_set{dst=#b_var{name=Dst}} = I, + #b_set{dst=Dst} = I, Acc1 = [{Dst,{def,N}}|Acc0], - Aliases = case beam_ssa:get_anno(reuse_for_context, I) of - true -> - #b_set{args=[#b_var{name=Src}]} = I, - [{Dst,Src}|Aliases0]; - false -> - Aliases0 - end, Acc = [{V,{use,N}} || V <- beam_ssa:used(I)] ++ Acc1, - live_interval_blk_1(Is, FirstNumber, Aliases, Acc); -live_interval_blk_1([I|Is], FirstNumber, Aliases, Acc0) -> + live_interval_blk_1(Is, FirstNumber, Acc); +live_interval_blk_1([I|Is], FirstNumber, Acc0) -> N = beam_ssa:get_anno(n, I), Acc1 = case I of - #b_set{dst=#b_var{name=Dst}} -> + #b_set{dst=Dst} -> [{Dst,{def,N}}|Acc0]; _ -> Acc0 end, Used = beam_ssa:used(I), Acc = [{V,{use,N}} || V <- Used] ++ Acc1, - live_interval_blk_1(Is, FirstNumber, Aliases, Acc); -live_interval_blk_1([], _FirstNumber, Aliases, Acc) -> - {Acc,Aliases}. + live_interval_blk_1(Is, FirstNumber, Acc); +live_interval_blk_1([], _FirstNumber, Acc) -> + Acc. %% first_number([#b_set{}]) -> InstructionNumber. %% Return the number for the first instruction for the block. @@ -1621,7 +1959,7 @@ first_number([], Last) -> update_successors([L|Ls], Pred, Blocks, LiveMap, Live0) -> Live1 = ordsets:union(Live0, get_live(L, LiveMap)), - #b_blk{is=Is} = maps:get(L, Blocks), + #b_blk{is=Is} = map_get(L, Blocks), Live = update_live_phis(Is, Pred, Live1), update_successors(Ls, Pred, Blocks, LiveMap, Live); update_successors([], _, _, _, Live) -> Live. @@ -1632,9 +1970,9 @@ get_live(L, LiveMap) -> #{} -> [] end. -update_live_phis([#b_set{op=phi,dst=#b_var{name=Killed},args=Args}|Is], +update_live_phis([#b_set{op=phi,dst=Killed,args=Args}|Is], Pred, Live0) -> - Used = [V || {#b_var{name=V},L} <- Args, L =:= Pred], + Used = [V || {#b_var{}=V,L} <- Args, L =:= Pred], Live1 = ordsets:union(ordsets:from_list(Used), Live0), Live = ordsets:del_element(Killed, Live1), update_live_phis(Is, Pred, Live); @@ -1647,7 +1985,7 @@ update_live_phis(_, _, Live) -> Live. %% reserve_yregs(St0) -> St. %% In each block that allocates a stack frame, insert instructions %% that copy variables that must be in Y registers (given by -%% YRegisters) to new variables. +%% the `yregs` annotation) to new variables. %% %% Also allocate specific Y registers for try and catch tags. %% The outermost try/catch tag is placed in y0, any directly @@ -1659,7 +1997,7 @@ reserve_yregs(#st{frames=Frames}=St0) -> foldl(fun reserve_yregs_1/2, St0, Frames). reserve_yregs_1(L, #st{ssa=Blocks0,cnt=Count0,res=Res0}=St) -> - Blk = maps:get(L, Blocks0), + Blk = map_get(L, Blocks0), Yregs = beam_ssa:get_anno(yregs, Blk), {Def,Used} = beam_ssa:def_used([L], Blocks0), UsedYregs = ordsets:intersection(Yregs, Used), @@ -1685,7 +2023,7 @@ reserve_try_tags_1([L|Ls], Blocks, Seen0, ActMap0) -> reserve_try_tags_1(Ls, Blocks, Seen0, ActMap0); false -> Seen1 = gb_sets:insert(L, Seen0), - #b_blk{is=Is} = Blk = maps:get(L, Blocks), + #b_blk{is=Is} = Blk = map_get(L, Blocks), Active0 = get_active(L, ActMap0), Active = reserve_try_tags_is(Is, Active0), Successors = beam_ssa:successors(Blk), @@ -1702,10 +2040,10 @@ get_active(L, ActMap) -> #{} -> #{} end. -reserve_try_tags_is([#b_set{op=new_try_tag,dst=#b_var{name=V}}|Is], Active) -> +reserve_try_tags_is([#b_set{op=new_try_tag,dst=V}|Is], Active) -> N = map_size(Active), reserve_try_tags_is(Is, Active#{V=>N}); -reserve_try_tags_is([#b_set{op=kill_try_tag,args=[#b_var{name=Tag}]}|Is], Active) -> +reserve_try_tags_is([#b_set{op=kill_try_tag,args=[Tag]}|Is], Active) -> reserve_try_tags_is(Is, maps:remove(Tag, Active)); reserve_try_tags_is([_|Is], Active) -> reserve_try_tags_is(Is, Active); @@ -1725,17 +2063,15 @@ update_act_map([], _, ActMap) -> ActMap. rename_vars([], _, Blocks, Count) -> {[],Blocks,Count}; rename_vars(Vs, L, Blocks0, Count0) -> - {NewVs,Count} = new_var_names(Vs, Count0), - NewVars = [#b_var{name=V} || V <- NewVs], + {NewVars,Count} = new_vars([Base || #b_var{name=Base} <- Vs], Count0), Ren = zip(Vs, NewVars), Blocks1 = beam_ssa:rename_vars(Ren, [L], Blocks0), - #b_blk{is=Is0} = Blk0 = maps:get(L, Blocks1), - CopyIs = [#b_set{op=copy,dst=New,args=[#b_var{name=Old}]} || - {Old,New} <- Ren], + #b_blk{is=Is0} = Blk0 = map_get(L, Blocks1), + CopyIs = [#b_set{op=copy,dst=New,args=[Old]} || {Old,New} <- Ren], Is = insert_after_phis(Is0, CopyIs), Blk = Blk0#b_blk{is=Is}, - Blocks = maps:put(L, Blk, Blocks1), - {NewVs,Blocks,Count}. + Blocks = Blocks1#{L:=Blk}, + {NewVars,Blocks,Count}. insert_after_phis([#b_set{op=phi}=I|Is], InsertIs) -> [I|insert_after_phis(Is, InsertIs)]; @@ -1756,7 +2092,7 @@ frame_size(#st{frames=Frames,regs=Regs,ssa=Blocks0}=St) -> frame_size_1(L, Regs, Blocks0) -> Def = beam_ssa:def([L], Blocks0), - Yregs0 = [maps:get(V, Regs) || V <- Def, is_yreg(maps:get(V, Regs))], + Yregs0 = [map_get(V, Regs) || V <- Def, is_yreg(map_get(V, Regs))], Yregs = ordsets:from_list(Yregs0), FrameSize = length(ordsets:from_list(Yregs)), if @@ -1768,17 +2104,17 @@ frame_size_1(L, Regs, Blocks0) -> true -> ok end, - Blk0 = maps:get(L, Blocks0), + Blk0 = map_get(L, Blocks0), Blk = beam_ssa:add_anno(frame_size, FrameSize, Blk0), %% Insert an annotation for frame deallocation on %% each #b_ret{}. - Blocks = maps:put(L, Blk, Blocks0), + Blocks = Blocks0#{L:=Blk}, Reachable = beam_ssa:rpo([L], Blocks), frame_deallocate(Reachable, FrameSize, Blocks). frame_deallocate([L|Ls], Size, Blocks0) -> - Blk0 = maps:get(L, Blocks0), + Blk0 = map_get(L, Blocks0), Blk = case Blk0 of #b_blk{last=#b_ret{}=Ret0} -> Ret = beam_ssa:add_anno(deallocate, Size, Ret0), @@ -1786,7 +2122,7 @@ frame_deallocate([L|Ls], Size, Blocks0) -> #b_blk{} -> Blk0 end, - Blocks = maps:put(L, Blk, Blocks0), + Blocks = Blocks0#{L:=Blk}, frame_deallocate(Ls, Size, Blocks); frame_deallocate([], _, Blocks) -> Blocks. @@ -1799,7 +2135,7 @@ frame_deallocate([], _, Blocks) -> Blocks. turn_yregs(#st{frames=Frames,regs=Regs0,ssa=Blocks}=St) -> Regs1 = foldl(fun(L, A) -> - Blk = maps:get(L, Blocks), + Blk = map_get(L, Blocks), FrameSize = beam_ssa:get_anno(frame_size, Blk), Def = beam_ssa:def([L], Blocks), [turn_yregs_1(Def, FrameSize, Regs0)|A] @@ -1808,7 +2144,7 @@ turn_yregs(#st{frames=Frames,regs=Regs0,ssa=Blocks}=St) -> St#st{regs=Regs}. turn_yregs_1(Def, FrameSize, Regs) -> - Yregs0 = [{maps:get(V, Regs),V} || V <- Def, is_yreg(maps:get(V, Regs))], + Yregs0 = [{map_get(V, Regs),V} || V <- Def, is_yreg(map_get(V, Regs))], Yregs1 = rel2fam(Yregs0), FrameSize = length(Yregs1), Yregs2 = [{{y,FrameSize-Y-1},Vs} || {{y,Y},Vs} <- Yregs1], @@ -1842,7 +2178,7 @@ reserve_regs(#st{args=Args,ssa=Blocks,intervals=Intervals,res=Res0}=St) -> Res = maps:from_list(Res3), St#st{res=reserve_xregs(Blocks, Res)}. -reserve_arg_regs([#b_var{name=Arg}|Is], N, Acc) -> +reserve_arg_regs([#b_var{}=Arg|Is], N, Acc) -> reserve_arg_regs(Is, N+1, [{Arg,{x,N}}|Acc]); reserve_arg_regs([], _, Acc) -> Acc. @@ -1854,25 +2190,42 @@ reserve_zregs(Blocks, Intervals, Res) -> end, beam_ssa:fold_rpo(F, [0], Res, Blocks). +reserve_zreg([#b_set{op=Op,dst=Dst}], + #b_br{bool=Dst}, _ShortLived, A) when Op =:= call; + Op =:= get_tuple_element -> + %% If type optimization has determined that the result of these + %% instructions can be used directly in a branch, we must avoid reserving a + %% z register or code generation will fail. + A; reserve_zreg([#b_set{op={bif,tuple_size},dst=Dst}, - #b_set{op={bif,'=:='},args=[Dst,Val]}], _Last, ShortLived, A0) -> - case Val of - #b_literal{val=Arity} when Arity bsr 32 =:= 0 -> + #b_set{op={bif,'=:='},args=[Dst,Val]}], Last, ShortLived, A0) -> + case {Val,Last} of + {#b_literal{val=Arity},#b_br{bool=#b_var{}}} when Arity bsr 32 =:= 0 -> %% These two instructions can be combined to a test_arity %% instruction provided that the arity variable is short-lived. reserve_zreg_1(Dst, ShortLived, A0); - _ -> + {_,_} -> + %% Either the arity is too big, or the boolean value is not + %% used in a conditional branch. A0 end; reserve_zreg([#b_set{op={bif,tuple_size},dst=Dst}], #b_switch{}, ShortLived, A) -> reserve_zreg_1(Dst, ShortLived, A); -reserve_zreg([#b_set{op=Op,dst=#b_var{name=Dst}}|Is], Last, ShortLived, A0) -> +reserve_zreg([#b_set{op={bif,'xor'}}], _Last, _ShortLived, A) -> + %% There is no short, easy way to rewrite 'xor' to a series of + %% test instructions. + A; +reserve_zreg([#b_set{op={bif,is_record}}], _Last, _ShortLived, A) -> + %% There is no short, easy way to rewrite is_record/2 to a series of + %% test instructions. + A; +reserve_zreg([#b_set{op=Op,dst=Dst}|Is], Last, ShortLived, A0) -> IsZReg = case Op of - context_to_binary -> true; bs_match_string -> true; - bs_restore -> true; bs_save -> true; + bs_restore -> true; + bs_set_position -> true; {float,clearerror} -> true; kill_try_tag -> true; landingpad -> true; @@ -1893,7 +2246,7 @@ reserve_zreg([], #b_br{bool=Bool}, ShortLived, A) -> reserve_zreg_1(Bool, ShortLived, A); reserve_zreg([], _, _, A) -> A. -reserve_zreg_1(#b_var{name=V}, ShortLived, A) -> +reserve_zreg_1(#b_var{}=V, ShortLived, A) -> case cerl_sets:is_element(V, ShortLived) of true -> [{V,z}|A]; false -> A @@ -1906,7 +2259,7 @@ reserve_fregs(Blocks, Res) -> end, beam_ssa:fold_rpo(F, [0], Res, Blocks). -reserve_freg([#b_set{op={float,Op},dst=#b_var{name=V}}|Is], Res) -> +reserve_freg([#b_set{op={float,Op},dst=V}|Is], Res) -> case Op of get -> reserve_freg(Is, Res); @@ -1931,23 +2284,95 @@ reserve_freg([], Res) -> Res. %% will allocate the lowest free X register for the variable. reserve_xregs(Blocks, Res) -> - F = fun(L, #b_blk{is=Is,last=Last}, R) -> - {Xs0,Used0} = reserve_terminator(L, Last, Blocks, R), - reserve_xregs_is(reverse(Is), R, Xs0, Used0) - end, - beam_ssa:fold_po(F, Res, Blocks). - -reserve_xregs_is([#b_set{op=Op,dst=#b_var{name=Dst},args=Args}=I|Is], Res0, Xs0, Used0) -> - Xs1 = case is_gc_safe(I) of - true -> - Xs0; - false -> - %% There may be a garbage collection after executing this - %% instruction. We will need prune the list of preferred - %% X registers. - res_xregs_prune(Xs0, Used0, Res0) - end, - Res = reserve_xreg(Dst, Xs1, Res0), + Ls = reverse(beam_ssa:rpo(Blocks)), + reserve_xregs(Ls, Blocks, #{}, Res). + +reserve_xregs([L|Ls], Blocks, XsMap0, Res0) -> + #b_blk{anno=Anno,is=Is0,last=Last} = map_get(L, Blocks), + + %% Calculate mapping from variable name to the preferred + %% register. + Xs0 = reserve_terminator(L, Is0, Last, Blocks, XsMap0, Res0), + + %% We need to figure out where the code generator will + %% place instructions that will do a garbage collection. + %% Insert 'gc' markers as pseudo-instructions in the + %% instruction sequence. + Is1 = reverse(Is0), + Is2 = res_place_gc_instrs(Is1, []), + Is = res_place_allocate(Anno, Is2), + + %% Add register hints for variables that are defined + %% in the (reversed) instruction sequence. + {Res,Xs} = reserve_xregs_is(Is, Res0, Xs0, []), + + XsMap = XsMap0#{L=>Xs}, + reserve_xregs(Ls, Blocks, XsMap, Res); +reserve_xregs([], _, _, Res) -> Res. + +%% Insert explicit 'gc' markers points where there will +%% be a garbage collection. (Note that the instruction +%% sequence passed to this function is reversed.) + +res_place_gc_instrs([#b_set{op=phi}=I|Is], Acc) -> + res_place_gc_instrs(Is, [I|Acc]); +res_place_gc_instrs([#b_set{op=Op}=I|Is], Acc) + when Op =:= call; Op =:= make_fun -> + case Acc of + [] -> + res_place_gc_instrs(Is, [I|Acc]); + [GC|_] when GC =:= gc; GC =:= test_heap -> + res_place_gc_instrs(Is, [I,gc|Acc]); + [_|_] -> + res_place_gc_instrs(Is, [I,gc|Acc]) + end; +res_place_gc_instrs([#b_set{op=Op,args=Args}=I|Is], Acc0) -> + case beam_ssa_codegen:classify_heap_need(Op, Args) of + neutral -> + case Acc0 of + [test_heap|Acc] -> + res_place_gc_instrs(Is, [test_heap,I|Acc]); + Acc -> + res_place_gc_instrs(Is, [I|Acc]) + end; + {put,_} -> + case Acc0 of + [test_heap|Acc] -> + res_place_gc_instrs(Is, [test_heap,I|Acc]); + Acc -> + res_place_gc_instrs(Is, [test_heap,I|Acc]) + end; + _ -> + res_place_gc_instrs(Is, [gc,I|Acc0]) + end; +res_place_gc_instrs([], Acc) -> + %% Reverse and replace 'test_heap' markers with 'gc'. + %% (The distinction is no longer useful.) + res_place_gc_instrs_rev(Acc, []). + +res_place_gc_instrs_rev([test_heap|Is], [gc|_]=Acc) -> + res_place_gc_instrs_rev(Is, Acc); +res_place_gc_instrs_rev([test_heap|Is], Acc) -> + res_place_gc_instrs_rev(Is, [gc|Acc]); +res_place_gc_instrs_rev([gc|Is], [gc|_]=Acc) -> + res_place_gc_instrs_rev(Is, Acc); +res_place_gc_instrs_rev([I|Is], Acc) -> + res_place_gc_instrs_rev(Is, [I|Acc]); +res_place_gc_instrs_rev([], Acc) -> Acc. + +res_place_allocate(#{yregs:=_}, Is) -> + %% There will be an 'allocate' instruction inserted here. + Is ++ [gc]; +res_place_allocate(#{}, Is) -> Is. + +reserve_xregs_is([gc|Is], Res, Xs0, Used) -> + %% At this point, the code generator will place an instruction + %% that does a garbage collection. We must prune the remembered + %% registers. + Xs = res_xregs_prune(Xs0, Used, Res), + reserve_xregs_is(Is, Res, Xs, Used); +reserve_xregs_is([#b_set{op=Op,dst=Dst,args=Args}=I|Is], Res0, Xs0, Used0) -> + Res = reserve_xreg(Dst, Xs0, Res0), Used1 = ordsets:union(Used0, beam_ssa:used(I)), Used = ordsets:del_element(Dst, Used1), case Op of @@ -1958,28 +2383,74 @@ reserve_xregs_is([#b_set{op=Op,dst=#b_var{name=Dst},args=Args}=I|Is], Res0, Xs0, Xs = reserve_call_args(tl(Args)), reserve_xregs_is(Is, Res, Xs, Used); _ -> - reserve_xregs_is(Is, Res, Xs1, Used) + reserve_xregs_is(Is, Res, Xs0, Used) end; -reserve_xregs_is([], Res, _Xs, _Used) -> Res. - -reserve_terminator(L, #b_br{bool=#b_literal{val=true},succ=Succ}, Blocks, Res) -> - case maps:get(Succ, Blocks) of +reserve_xregs_is([], Res, Xs, _Used) -> + {Res,Xs}. + +%% Pick up register hints from the successors of this blocks. +reserve_terminator(_L, _Is, #b_br{bool=#b_var{},succ=Succ,fail=?BADARG_BLOCK}, + _Blocks, XsMap, _Res) -> + %% We know that no variables are used at ?BADARG_BLOCK, so + %% any register hints from the success blocks are safe to use. + map_get(Succ, XsMap); +reserve_terminator(L, Is, #b_br{bool=#b_var{},succ=Succ,fail=Fail}, + Blocks, XsMap, Res) when Succ =/= Fail -> + #{Succ:=SuccBlk,Fail:=FailBlk} = Blocks, + case {SuccBlk,FailBlk} of + {#b_blk{is=[],last=#b_br{succ=PhiL,fail=PhiL}}, + #b_blk{is=[],last=#b_br{succ=PhiL,fail=PhiL}}} -> + %% Both branches ultimately transfer to the same + %% block (via two blocks with no instructions). + %% Pick up register hints from the phi nodes + %% in the common block. + #{PhiL:=#b_blk{is=PhiIs}} = Blocks, + Xs = res_xregs_from_phi(PhiIs, Succ, Res, #{}), + res_xregs_from_phi(PhiIs, Fail, Res, Xs); + {_,_} when Is =/= [] -> + case last(Is) of + #b_set{op=succeeded,args=[Arg]} -> + %% We know that Arg will not be used at the failure + %% label, so we can pick up register hints from the + %% success label. + Br = #b_br{bool=#b_literal{val=true},succ=Succ,fail=Succ}, + case reserve_terminator(L, [], Br, Blocks, XsMap, Res) of + #{Arg:=Reg} -> #{Arg=>Reg}; + #{} -> #{} + end; + _ -> + %% Register hints from the success block may not + %% be safe at the failure block, and vice versa. + #{} + end; + {_,_} -> + %% Register hints from the success block may not + %% be safe at the failure block, and vice versa. + #{} + end; +reserve_terminator(L, Is, #b_br{bool=#b_literal{val=true},succ=Succ}, + Blocks, XsMap, Res) -> + case map_get(Succ, Blocks) of #b_blk{is=[],last=Last} -> - reserve_terminator(Succ, Last, Blocks, Res); - #b_blk{is=[_|_]=Is} -> - {res_xregs_from_phi(Is, L, Res, #{}),[]} + reserve_terminator(Succ, Is, Last, Blocks, XsMap, Res); + #b_blk{is=[_|_]=PhiIs} -> + res_xregs_from_phi(PhiIs, L, Res, #{}) end; -reserve_terminator(_, Last, _, _) -> - {#{},beam_ssa:used(Last)}. +reserve_terminator(_, _, _, _, _, _) -> #{}. -res_xregs_from_phi([#b_set{op=phi,dst=#b_var{name=Dst},args=Args}|Is], +%% Pick up a reservation from a phi node. +res_xregs_from_phi([#b_set{op=phi,dst=Dst,args=Args}|Is], Pred, Res, Acc) -> - case [V || {#b_var{name=V},L} <- Args, L =:= Pred] of + case [V || {#b_var{}=V,L} <- Args, L =:= Pred] of [] -> + %% The value of the phi node for this predecessor + %% is a literal. Nothing to do here. res_xregs_from_phi(Is, Pred, Res, Acc); [V] -> case Res of #{Dst:={prefer,Reg}} -> + %% Try placing V in the same register as for + %% the phi node. res_xregs_from_phi(Is, Pred, Res, Acc#{V=>Reg}); #{Dst:=_} -> res_xregs_from_phi(Is, Pred, Res, Acc) @@ -1990,8 +2461,8 @@ res_xregs_from_phi(_, _, _, Acc) -> Acc. reserve_call_args(Args) -> reserve_call_args(Args, 0, #{}). -reserve_call_args([#b_var{name=Name}|As], X, Xs) -> - reserve_call_args(As, X+1, Xs#{Name=>{x,X}}); +reserve_call_args([#b_var{}=Var|As], X, Xs) -> + reserve_call_args(As, X+1, Xs#{Var=>{x,X}}); reserve_call_args([#b_literal{}|As], X, Xs) -> reserve_call_args(As, X+1, Xs); reserve_call_args([], _, Xs) -> Xs. @@ -1999,12 +2470,12 @@ reserve_call_args([], _, Xs) -> Xs. reserve_xreg(V, Xs, Res) -> case Res of #{V:=_} -> - %% Already reserved. + %% Already reserved (but not as an X register). Res; #{} -> case Xs of #{V:=X} -> - %% Add a hint that a specific X register is + %% Add a hint that this specific X register is %% preferred, unless it is already in use. Res#{V=>{prefer,X}}; #{} -> @@ -2013,23 +2484,15 @@ reserve_xreg(V, Xs, Res) -> end end. -is_gc_safe(#b_set{op=phi}) -> - false; -is_gc_safe(#b_set{op=Op,args=Args}) -> - case beam_ssa_codegen:classify_heap_need(Op, Args) of - neutral -> true; - {put,_} -> true; - _ -> false - end. - %% res_xregs_prune(PreferredRegs, Used, Res) -> PreferredRegs. -%% Prune the list of preferred to only include X registers that -%% are guaranteed to survice a garbage collection. +%% Prune the list of preferred registers, to make sure that +%% there are no "holes" (uninitialized X registers) when +%% invoking the garbage collector. -res_xregs_prune(Xs, Used, Res) -> +res_xregs_prune(Xs, Used, Res) when map_size(Xs) =/= 0 -> %% The number of safe registers is the number of the X registers %% used after this point. The actual number of safe registers may - %% be highter than this number, but this is a conservative safe + %% be higher than this number, but this is a conservative safe %% estimate. NumSafe = foldl(fun(V, N) -> case Res of @@ -2041,83 +2504,8 @@ res_xregs_prune(Xs, Used, Res) -> %% Remove unsafe registers from the list of potential %% preferred registers. - maps:filter(fun(_, {x,X}) -> X < NumSafe end, Xs). - -%%% -%%% Remove unsuitable aliases. -%%% -%%% If a binary is matched more than once, we must not put the -%%% the match context in the same register as the binary to -%%% avoid the following situation: -%%% -%%% {test,bs_start_match2,{f,3},1,[{x,0},0],{x,0}}. -%%% . -%%% . -%%% . -%%% {test,bs_start_match2,{f,6},1,[{x,0},0],{x,1}}. %% ILLEGAL! -%%% -%%% The second instruction is illegal because a match context source -%%% is only allowed if source and destination registers are identical. -%%% - -remove_unsuitable_aliases(#st{aliases=[_|_]=Aliases0,ssa=Blocks}=St) -> - R = rem_unsuitable(maps:values(Blocks)), - Unsuitable0 = [V || {V,[_,_|_]} <- rel2fam(R)], - Unsuitable = gb_sets:from_list(Unsuitable0), - Aliases =[P || {_,V}=P <- Aliases0, - not gb_sets:is_member(V, Unsuitable)], - St#st{aliases=Aliases}; -remove_unsuitable_aliases(#st{aliases=[]}=St) -> St. - -rem_unsuitable([#b_blk{is=Is}|Bs]) -> - Vs = [{V,Dst} || - #b_set{op=bs_start_match,dst=#b_var{name=Dst}, - args=[#b_var{name=V}]} <- Is], - Vs ++ rem_unsuitable(Bs); -rem_unsuitable([]) -> []. - -%%% -%%% Merge intervals. -%%% - -merge_intervals(#st{aliases=Aliases0,intervals=Intervals0, - res=Reserved}=St) -> - Aliases1 = [A || A <- Aliases0, - is_suitable_alias(A, Reserved)], - case Aliases1 of - [] -> - St#st{aliases=Aliases1}; - [_|_] -> - Intervals1 = maps:from_list(Intervals0), - {Intervals,Aliases} = - merge_intervals_1(Aliases1, Intervals1, []), - St#st{aliases=Aliases,intervals=Intervals} - end. - -merge_intervals_1([{Alias,V}|Vs], Intervals0, Acc) -> - #{Alias:=Int1,V:=Int2} = Intervals0, - Int3 = lists:merge(Int1, Int2), - Int = merge_intervals_2(Int3), - Intervals1 = maps:remove(Alias, Intervals0), - Intervals = Intervals1#{V:=Int}, - merge_intervals_1(Vs, Intervals, [{Alias,V}|Acc]); -merge_intervals_1([], Intervals, Acc) -> - {maps:to_list(Intervals),Acc}. - -merge_intervals_2([{A1,B1},{A2,B2}|Is]) when A2 =< B1 -> - merge_intervals_2([{min(A1, A2),max(B1, B2)}|Is]); -merge_intervals_2([{_A1,B1}=R|[{A2,_B2}|_]=Is]) when B1 < A2 -> - [R|merge_intervals_2(Is)]; -merge_intervals_2([_]=Is) -> Is. - -is_suitable_alias({V1,V2}, Reserved) -> - #{V1:=Res1,V2:=Res2} = Reserved, - case {Res1,Res2} of - {x,x} -> true; - {x,{x,_}} -> true; - {{x,_},x} -> true; - {_,_} -> false - end. + maps:filter(fun(_, {x,X}) -> X < NumSafe end, Xs); +res_xregs_prune(Xs, _Used, _Res) -> Xs. %%% %%% Register allocation using linear scan. @@ -2152,7 +2540,13 @@ linear_scan(#st{intervals=Intervals0,res=Res}=St0) -> Free = init_free(maps:to_list(Res)), Intervals1 = [init_interval(Int, Res) || Int <- Intervals0], Intervals = sort(Intervals1), - IsReserved = fun (#i{reg=Reg}) -> Reg =/= none end, + IsReserved = fun(#i{reg=Reg}) -> + case Reg of + none -> false; + {prefer,{_,_}} -> false; + {_,_} -> true + end + end, {UnhandledRes,Unhandled} = partition(IsReserved, Intervals), L = #l{unhandled_res=UnhandledRes, unhandled_any=Unhandled,free=Free}, @@ -2160,7 +2554,7 @@ linear_scan(#st{intervals=Intervals0,res=Res}=St0) -> St#st{regs=maps:from_list(Regs)}. init_interval({V,[{Start,_}|_]=Rs}, Res) -> - Info = maps:get(V, Res), + Info = map_get(V, Res), Pool = case Info of {prefer,{x,_}} -> x; x -> x; @@ -2361,16 +2755,16 @@ free_reg(#i{reg={_,_}=Reg}=I, L) -> update_pool(I, FreeRegs, L). get_pool(#i{pool=Pool}, #l{free=Free}) -> - maps:get(Pool, Free). + map_get(Pool, Free). update_pool(#i{pool=Pool}, New, #l{free=Free0}=L) -> - Free = maps:put(Pool, New, Free0), + Free = Free0#{Pool:=New}, L#l{free=Free}. get_next_free(#i{pool=Pool}, #l{free=Free0}=L0) -> K = {next,Pool}, - N = maps:get(K, Free0), - Free = maps:put(K, N+1, Free0), + N = map_get(K, Free0), + Free = Free0#{K:=N+1}, L = L0#l{free=Free}, if is_integer(Pool) -> {{y,N},L}; @@ -2406,7 +2800,7 @@ are_overlapping_1({_,_}, []) -> false. is_loop_header(L, Blocks) -> %% We KNOW that a loop header must start with a peek_message %% instruction. - case maps:get(L, Blocks) of + case map_get(L, Blocks) of #b_blk{is=[#b_set{op=peek_message}|_]} -> true; _ -> false end. @@ -2417,21 +2811,21 @@ rel2fam(S0) -> sofs:to_external(S). split_phis(Is) -> - partition(fun(#b_set{op=Op}) -> Op =:= phi end, Is). + splitwith(fun(#b_set{op=Op}) -> Op =:= phi end, Is). is_yreg({y,_}) -> true; is_yreg({x,_}) -> false; is_yreg({z,_}) -> false; is_yreg({fr,_}) -> false. -new_var_names([V0|Vs0], Count0) -> - {V,Count1} = new_var_name(V0, Count0), - {Vs,Count} = new_var_names(Vs0, Count1), +new_vars([Base|Vs0], Count0) -> + {V,Count1} = new_var(Base, Count0), + {Vs,Count} = new_vars(Vs0, Count1), {[V|Vs],Count}; -new_var_names([], Count) -> {[],Count}. +new_vars([], Count) -> {[],Count}. -new_var_name({Base,Int}, Count) -> +new_var({Base,Int}, Count) -> true = is_integer(Int), %Assertion. - {{Base,Count},Count+1}; -new_var_name(Base, Count) -> - {{Base,Count},Count+1}. + {#b_var{name={Base,Count}},Count+1}; +new_var(Base, Count) -> + {#b_var{name={Base,Count}},Count+1}. |