diff options
Diffstat (limited to 'lib/compiler/src/beam_ssa_opt.erl')
-rw-r--r-- | lib/compiler/src/beam_ssa_opt.erl | 443 |
1 files changed, 221 insertions, 222 deletions
diff --git a/lib/compiler/src/beam_ssa_opt.erl b/lib/compiler/src/beam_ssa_opt.erl index 2c898ba6f8..90c0d3cf16 100644 --- a/lib/compiler/src/beam_ssa_opt.erl +++ b/lib/compiler/src/beam_ssa_opt.erl @@ -79,14 +79,12 @@ module(Module, Opts) -> {ok, finish(Module, StMap)}. phase([FuncId | Ids], Ps, StMap, FuncDb0) -> - try - {St, FuncDb} = - compile:run_sub_passes(Ps, {map_get(FuncId, StMap), FuncDb0}), - - phase(Ids, Ps, StMap#{ FuncId => St }, FuncDb) + try compile:run_sub_passes(Ps, {map_get(FuncId, StMap), FuncDb0}) of + {St, FuncDb} -> + phase(Ids, Ps, StMap#{ FuncId => St }, FuncDb) catch Class:Error:Stack -> - #b_local{name=Name,arity=Arity} = FuncId, + #b_local{name=#b_literal{val=Name},arity=Arity} = FuncId, io:fwrite("Function: ~w/~w\n", [Name,Arity]), erlang:raise(Class, Error, Stack) end; @@ -166,15 +164,18 @@ repeated_passes(Opts) -> epilogue_passes(Opts) -> Ps = [?PASS(ssa_opt_type_finish), ?PASS(ssa_opt_float), - ?PASS(ssa_opt_live), %One last time to clean up the - %mess left by the float pass. + ?PASS(ssa_opt_sw), + + %% Run live one more time to clean up after the float and sw + %% passes. + ?PASS(ssa_opt_live), ?PASS(ssa_opt_bsm), ?PASS(ssa_opt_bsm_units), ?PASS(ssa_opt_bsm_shortcut), - ?PASS(ssa_opt_sw), ?PASS(ssa_opt_blockify), ?PASS(ssa_opt_sink), ?PASS(ssa_opt_merge_blocks), + ?PASS(ssa_opt_get_tuple_element), ?PASS(ssa_opt_trim_unreachable)], passes_1(Ps, Opts). @@ -251,22 +252,14 @@ fdb_update(Caller, Callee, FuncDb) -> FuncDb#{ Caller => CallerVertex#func_info{out=Calls}, Callee => CalleeVertex#func_info{in=CalledBy} }. -%% Returns the post-order of all local calls in this module. That is, it starts -%% with the functions that don't call any others and then walks up the call -%% chain. +%% Returns the post-order of all local calls in this module. That is, +%% called functions will be ordered before the functions calling them. %% %% Functions where module-level optimization is disabled are added last in %% arbitrary order. get_call_order_po(StMap, FuncDb) -> - Leaves = maps:fold(fun(Id, #func_info{out=[]}, Acc) -> - [Id | Acc]; - (_, _, Acc) -> - Acc - end, [], FuncDb), - - Order = gco_po_1(sort(Leaves), FuncDb, [], #{}), - + Order = gco_po(FuncDb), Order ++ maps:fold(fun(K, _V, Acc) -> case is_map_key(K, FuncDb) of false -> [K | Acc]; @@ -274,20 +267,23 @@ get_call_order_po(StMap, FuncDb) -> end end, [], StMap). -gco_po_1([Id | Ids], FuncDb, Children, Seen) when not is_map_key(Id, Seen) -> - [Id | gco_po_1(Ids, FuncDb, [Id | Children], Seen#{ Id => true })]; -gco_po_1([_Id | Ids], FuncDb, Children, Seen) -> - gco_po_1(Ids, FuncDb, Children, Seen); -gco_po_1([], FuncDb, [_|_]=Children, Seen) -> - gco_po_1(gco_po_parents(Children, FuncDb), FuncDb, [], Seen); -gco_po_1([], _FuncDb, [], _Seen) -> - []. +gco_po(FuncDb) -> + All = sort(maps:keys(FuncDb)), + {RPO,_} = gco_rpo(All, FuncDb, cerl_sets:new(), []), + reverse(RPO). -gco_po_parents([Child | Children], FuncDb) -> - #{ Child := #func_info{in=Parents}} = FuncDb, - Parents ++ gco_po_parents(Children, FuncDb); -gco_po_parents([], _FuncDb) -> - []. +gco_rpo([Id|Ids], FuncDb, Seen0, Acc0) -> + case cerl_sets:is_element(Id, Seen0) of + true -> + gco_rpo(Ids, FuncDb, Seen0, Acc0); + false -> + #func_info{out=Successors} = map_get(Id, FuncDb), + Seen1 = cerl_sets:add_element(Id, Seen0), + {Acc,Seen} = gco_rpo(Successors, FuncDb, Seen1, Acc0), + gco_rpo(Ids, FuncDb, Seen, [Id|Acc]) + end; +gco_rpo([], _, Seen, Acc) -> + {Acc,Seen}. %%% %%% Trivial sub passes. @@ -364,7 +360,7 @@ ssa_opt_coalesce_phis({#st{ssa=Blocks0}=St, FuncDb}) -> {St#st{ssa=Blocks}, FuncDb}. c_phis_1([L|Ls], Blocks0) -> - case maps:get(L, Blocks0) of + case map_get(L, Blocks0) of #b_blk{is=[#b_set{op=phi}|_]}=Blk -> Blocks = c_phis_2(L, Blk, Blocks0), c_phis_1(Ls, Blocks); @@ -403,7 +399,7 @@ c_phis_args_1([{Var,Pred}|As], Blocks) -> c_phis_args_1([], _Blocks) -> none. c_get_pred_vars(Var, Pred, Blocks) -> - case maps:get(Pred, Blocks) of + case map_get(Pred, Blocks) of #b_blk{is=[#b_set{op=phi,dst=Var,args=Args}]} -> {Var,Pred,Args}; #b_blk{} -> @@ -424,7 +420,7 @@ c_rewrite_phi([A|As], Info) -> c_rewrite_phi([], _Info) -> []. c_fix_branches([{_,Pred}|As], L, Blocks0) -> - #b_blk{last=Last0} = Blk0 = maps:get(Pred, Blocks0), + #b_blk{last=Last0} = Blk0 = map_get(Pred, Blocks0), #b_br{bool=#b_literal{val=true}} = Last0, %Assertion. Last = Last0#b_br{bool=#b_literal{val=true},succ=L,fail=L}, Blk = Blk0#b_blk{last=Last}, @@ -687,6 +683,14 @@ record_opt_is([#b_set{op={bif,is_tuple},dst=Bool,args=[Tuple]}=Set], no -> [Set] end; +record_opt_is([I|Is]=Is0, #b_br{bool=Bool}=Last, Blocks) -> + case is_tagged_tuple_1(Is0, Last, Blocks) of + {yes,_Fail,Tuple,Arity,Tag} -> + Args = [Tuple,Arity,Tag], + [I#b_set{op=is_tagged_tuple,dst=Bool,args=Args}]; + no -> + [I|record_opt_is(Is, Last, Blocks)] + end; record_opt_is([I|Is], Last, Blocks) -> [I|record_opt_is(Is, Last, Blocks)]; record_opt_is([], _Last, _Blocks) -> []. @@ -694,29 +698,30 @@ record_opt_is([], _Last, _Blocks) -> []. is_tagged_tuple(#b_var{}=Tuple, Bool, #b_br{bool=Bool,succ=Succ,fail=Fail}, Blocks) -> - SuccBlk = maps:get(Succ, Blocks), - is_tagged_tuple_1(SuccBlk, Tuple, Fail, Blocks); + #b_blk{is=Is,last=Last} = map_get(Succ, Blocks), + case is_tagged_tuple_1(Is, Last, Blocks) of + {yes,Fail,Tuple,Arity,Tag} -> + {yes,Arity,Tag}; + _ -> + no + end; is_tagged_tuple(_, _, _, _) -> no. -is_tagged_tuple_1(#b_blk{is=Is,last=Last}, Tuple, Fail, Blocks) -> - case Is of - [#b_set{op={bif,tuple_size},dst=ArityVar, - args=[#b_var{}=Tuple]}, - #b_set{op={bif,'=:='}, - dst=Bool, - args=[ArityVar, #b_literal{val=ArityVal}=Arity]}] - when is_integer(ArityVal) -> - case Last of - #b_br{bool=Bool,succ=Succ,fail=Fail} -> - SuccBlk = maps:get(Succ, Blocks), - case is_tagged_tuple_2(SuccBlk, Tuple, Fail) of - no -> - no; - {yes,Tag} -> - {yes,Arity,Tag} - end; - _ -> - no +is_tagged_tuple_1(Is, Last, Blocks) -> + case {Is,Last} of + {[#b_set{op={bif,tuple_size},dst=ArityVar, + args=[#b_var{}=Tuple]}, + #b_set{op={bif,'=:='}, + dst=Bool, + args=[ArityVar, #b_literal{val=ArityVal}=Arity]}], + #b_br{bool=Bool,succ=Succ,fail=Fail}} + when is_integer(ArityVal) -> + SuccBlk = map_get(Succ, Blocks), + case is_tagged_tuple_2(SuccBlk, Tuple, Fail) of + no -> + no; + {yes,Tag} -> + {yes,Fail,Tuple,Arity,Tag} end; _ -> no @@ -759,7 +764,7 @@ ssa_opt_cse({#st{ssa=Linear}=St, FuncDb}) -> {St#st{ssa=cse(Linear, #{}, M)}, FuncDb}. cse([{L,#b_blk{is=Is0,last=Last0}=Blk}|Bs], Sub0, M0) -> - Es0 = maps:get(L, M0), + Es0 = map_get(L, M0), {Is1,Es,Sub} = cse_is(Is0, Es0, Sub0, []), Last = sub(Last0, Sub), M = cse_successors(Is1, Blk, Es, M0), @@ -854,6 +859,7 @@ cse_expr(#b_set{op=Op,args=Args}=I) -> cse_suitable(#b_set{op=get_hd}) -> true; cse_suitable(#b_set{op=get_tl}) -> true; cse_suitable(#b_set{op=put_list}) -> true; +cse_suitable(#b_set{op=get_tuple_element}) -> true; cse_suitable(#b_set{op=put_tuple}) -> true; cse_suitable(#b_set{op={bif,tuple_size}}) -> %% Doing CSE for tuple_size/1 can prevent the @@ -905,6 +911,11 @@ ssa_opt_float({#st{ssa=Linear0,cnt=Count0}=St, FuncDb}) -> {Linear,Count} = float_opt(Linear0, Count0, Fs), {St#st{ssa=Linear,cnt=Count}, FuncDb}. +float_blk_is_in_guard(#b_blk{last=#b_br{fail=F}}, #fs{non_guards=NonGuards}) -> + not gb_sets:is_member(F, NonGuards); +float_blk_is_in_guard(#b_blk{}, #fs{}) -> + false. + float_non_guards([{L,#b_blk{is=Is}}|Bs]) -> case Is of [#b_set{op=landingpad}|_] -> @@ -914,21 +925,18 @@ float_non_guards([{L,#b_blk{is=Is}}|Bs]) -> end; float_non_guards([]) -> [?BADARG_BLOCK]. -float_opt([{L,#b_blk{last=#b_br{fail=F}}=Blk}|Bs0], - Count0, #fs{non_guards=NonGuards}=Fs) -> - case gb_sets:is_member(F, NonGuards) of +float_opt([{L,Blk}|Bs0], Count0, Fs) -> + case float_blk_is_in_guard(Blk, Fs) of true -> - %% This block is not inside a guard. - %% We can do the optimization. - float_opt_1(L, Blk, Bs0, Count0, Fs); - false -> %% This block is inside a guard. Don't do %% any floating point optimizations. {Bs,Count} = float_opt(Bs0, Count0, Fs), - {[{L,Blk}|Bs],Count} + {[{L,Blk}|Bs],Count}; + false -> + %% This block is not inside a guard. + %% We can do the optimization. + float_opt_1(L, Blk, Bs0, Count0, Fs) end; -float_opt([{L,Blk}|Bs], Count, Fs) -> - float_opt_1(L, Blk, Bs, Count, Fs); float_opt([], Count, _Fs) -> {[],Count}. @@ -1004,10 +1012,14 @@ float_conv([{L,#b_blk{is=Is0}=Blk0}|Bs0], Fail, Count0) -> float_maybe_flush(Blk0, #fs{s=cleared,fail=Fail,bs=Blocks}=Fs0, Count0) -> #b_blk{last=#b_br{bool=#b_var{},succ=Succ}=Br} = Blk0, - #b_blk{is=Is} = maps:get(Succ, Blocks), + + %% If the success block starts with a floating point operation, we can + %% defer flushing to that block as long as it isn't a guard. + #b_blk{is=Is} = SuccBlk = map_get(Succ, Blocks), + SuccIsGuard = float_blk_is_in_guard(SuccBlk, Fs0), + case Is of - [#b_set{anno=#{float_op:=_}}|_] -> - %% The next operation is also a floating point operation. + [#b_set{anno=#{float_op:=_}}|_] when not SuccIsGuard -> %% No flush needed. {[],Blk0,Fs0,Count0}; _ -> @@ -1151,25 +1163,28 @@ ssa_opt_live({#st{ssa=Linear0}=St, FuncDb}) -> live_opt([{L,Blk0}|Bs], LiveMap0, Blocks) -> Blk1 = beam_ssa_share:block(Blk0, Blocks), Successors = beam_ssa:successors(Blk1), - Live0 = live_opt_succ(Successors, L, LiveMap0), + Live0 = live_opt_succ(Successors, L, LiveMap0, gb_sets:empty()), {Blk,Live} = live_opt_blk(Blk1, Live0), LiveMap = live_opt_phis(Blk#b_blk.is, L, Live, LiveMap0), live_opt(Bs, LiveMap, Blocks#{L:=Blk}); live_opt([], _, Acc) -> Acc. -live_opt_succ([S|Ss], L, LiveMap) -> - Live0 = live_opt_succ(Ss, L, LiveMap), +live_opt_succ([S|Ss], L, LiveMap, Live0) -> Key = {S,L}, case LiveMap of #{Key:=Live} -> - gb_sets:union(Live, Live0); + %% The successor has a phi node, and the value for + %% this block in the phi node is a variable. + live_opt_succ(Ss, L, LiveMap, gb_sets:union(Live, Live0)); #{S:=Live} -> - gb_sets:union(Live, Live0); + %% No phi node in the successor, or the value for + %% this block in the phi node is a literal. + live_opt_succ(Ss, L, LiveMap, gb_sets:union(Live, Live0)); #{} -> - Live0 + %% A peek_message block which has not been processed yet. + live_opt_succ(Ss, L, LiveMap, Live0) end; -live_opt_succ([], _, _) -> - gb_sets:empty(). +live_opt_succ([], _, _, Acc) -> Acc. live_opt_phis(Is, L, Live0, LiveMap0) -> LiveMap = LiveMap0#{L=>Live0}, @@ -1220,7 +1235,7 @@ live_opt_is([#b_set{op=succeeded,dst=SuccDst=SuccDstVar, case gb_sets:is_member(SuccDst, Live0) of true -> Live1 = gb_sets:add(Dst, Live0), - Live = gb_sets:delete_any(SuccDst, Live1), + Live = gb_sets:delete(SuccDst, Live1), live_opt_is([I|Is], Live, [SuccI|Acc]); false -> live_opt_is([I|Is], Live0, Acc) @@ -1231,7 +1246,7 @@ live_opt_is([#b_set{dst=Dst}=I|Is], Live0, Acc) -> case gb_sets:is_member(Dst, Live0) of true -> Live1 = gb_sets:union(Live0, gb_sets:from_ordset(beam_ssa:used(I))), - Live = gb_sets:delete_any(Dst, Live1), + Live = gb_sets:delete(Dst, Live1), live_opt_is(Is, Live, [I|Acc]); false -> case beam_ssa:no_side_effect(I) of @@ -1375,7 +1390,7 @@ bsm_positions([{L,#b_blk{is=Is,last=Last}}|Bs], PosMap0) -> case {Is,Last} of {[#b_set{op=bs_test_tail,dst=Bool,args=[Ctx,#b_literal{val=Bits0}]}], #b_br{bool=Bool,fail=Fail}} -> - Bits = Bits0 + maps:get(Ctx, PosMap0), + Bits = Bits0 + map_get(Ctx, PosMap0), bsm_positions(Bs, PosMap#{L=>{Bits,Fail}}); {_,_} -> bsm_positions(Bs, PosMap) @@ -1467,7 +1482,7 @@ bsm_units_skip_1([#b_set{op=bs_match, Block0, Units) -> [#b_set{op=succeeded,dst=Bool,args=[New]}] = Test, %Assertion. #b_br{bool=Bool} = Last0 = Block0#b_blk.last, %Assertion. - CtxUnit = maps:get(Ctx, Units), + CtxUnit = map_get(Ctx, Units), if CtxUnit rem OpUnit =:= 0 -> Is = takewhile(fun(I) -> I =/= Skip end, Block0#b_blk.is), @@ -1479,7 +1494,7 @@ bsm_units_skip_1([#b_set{op=bs_match, end; bsm_units_skip_1([#b_set{op=bs_match,dst=New,args=Args}|_], Block, Units) -> [_,Ctx|_] = Args, - CtxUnit = maps:get(Ctx, Units), + CtxUnit = map_get(Ctx, Units), OpUnit = bsm_op_unit(Args), {Block, Units#{ New => gcd(OpUnit, CtxUnit) }}; bsm_units_skip_1([_I | Is], Block, Units) -> @@ -1507,23 +1522,23 @@ bsm_op_unit(_) -> %% may differ between them, so we can only keep the information that is common %% to all paths. bsm_units_join(Lbl, MapA, UnitMaps0) when is_map_key(Lbl, UnitMaps0) -> - MapB = maps:get(Lbl, UnitMaps0), + MapB = map_get(Lbl, UnitMaps0), Merged = if map_size(MapB) =< map_size(MapA) -> bsm_units_join_1(maps:keys(MapB), MapA, MapB); map_size(MapB) > map_size(MapA) -> bsm_units_join_1(maps:keys(MapA), MapB, MapA) end, - maps:put(Lbl, Merged, UnitMaps0); + UnitMaps0#{Lbl := Merged}; bsm_units_join(Lbl, MapA, UnitMaps0) when MapA =/= #{} -> - maps:put(Lbl, MapA, UnitMaps0); + UnitMaps0#{Lbl => MapA}; bsm_units_join(_Lbl, _MapA, UnitMaps0) -> UnitMaps0. bsm_units_join_1([Key | Keys], Left, Right) when is_map_key(Key, Left) -> - UnitA = maps:get(Key, Left), - UnitB = maps:get(Key, Right), - bsm_units_join_1(Keys, Left, maps:put(Key, gcd(UnitA, UnitB), Right)); + UnitA = map_get(Key, Left), + UnitB = map_get(Key, Right), + bsm_units_join_1(Keys, Left, Right#{Key := gcd(UnitA, UnitB)}); bsm_units_join_1([Key | Keys], Left, Right) -> bsm_units_join_1(Keys, Left, maps:remove(Key, Right)); bsm_units_join_1([], _MapA, Right) -> @@ -1834,12 +1849,16 @@ opt_tup_size_is([], _, _, _Acc) -> none. %%% ssa_opt_sw({#st{ssa=Linear0,cnt=Count0}=St, FuncDb}) -> - {Linear,Count} = opt_sw(Linear0, #{}, Count0, []), + {Linear,Count} = opt_sw(Linear0, Count0, []), {St#st{ssa=Linear,cnt=Count}, FuncDb}. -opt_sw([{L,#b_blk{is=Is,last=#b_switch{}=Last0}=Blk0}|Bs], Phis0, Count0, Acc) -> - Phis = opt_sw_phis(Is, Phis0), - case opt_sw_last(Last0, Phis) of +opt_sw([{L,#b_blk{is=Is,last=#b_switch{}=Sw0}=Blk0}|Bs], Count0, Acc) -> + %% Ensure that no label in the switch list is the same + %% as the failure label. + #b_switch{fail=Fail,list=List0} = Sw0, + List = [{Val,Lbl} || {Val,Lbl} <- List0, Lbl =/= Fail], + Sw1 = beam_ssa:normalize(Sw0#b_switch{list=List}), + case Sw1 of #b_switch{arg=Arg,fail=Fail,list=[{Lit,Lbl}]} -> %% Rewrite a single value switch to a br. Bool = #b_var{name={'@ssa_bool',Count0}}, @@ -1847,7 +1866,7 @@ opt_sw([{L,#b_blk{is=Is,last=#b_switch{}=Last0}=Blk0}|Bs], Phis0, Count0, Acc) - IsEq = #b_set{op={bif,'=:='},dst=Bool,args=[Arg,Lit]}, Br = #b_br{bool=Bool,succ=Lbl,fail=Fail}, Blk = Blk0#b_blk{is=Is++[IsEq],last=Br}, - opt_sw(Bs, Phis, Count, [{L,Blk}|Acc]); + opt_sw(Bs, Count, [{L,Blk}|Acc]); #b_switch{arg=Arg,fail=Fail, list=[{#b_literal{val=B1},Lbl},{#b_literal{val=B2},Lbl}]} when B1 =:= not B2 -> @@ -1857,71 +1876,18 @@ opt_sw([{L,#b_blk{is=Is,last=#b_switch{}=Last0}=Blk0}|Bs], Phis0, Count0, Acc) - IsBool = #b_set{op={bif,is_boolean},dst=Bool,args=[Arg]}, Br = #b_br{bool=Bool,succ=Lbl,fail=Fail}, Blk = Blk0#b_blk{is=Is++[IsBool],last=Br}, - opt_sw(Bs, Phis, Count, [{L,Blk}|Acc]); - Last0 -> - opt_sw(Bs, Phis, Count0, [{L,Blk0}|Acc]); - Last -> - Blk = Blk0#b_blk{last=Last}, - opt_sw(Bs, Phis, Count0, [{L,Blk}|Acc]) + opt_sw(Bs, Count, [{L,Blk}|Acc]); + Sw0 -> + opt_sw(Bs, Count0, [{L,Blk0}|Acc]); + Sw -> + Blk = Blk0#b_blk{last=Sw}, + opt_sw(Bs, Count0, [{L,Blk}|Acc]) end; -opt_sw([{L,#b_blk{is=Is}=Blk}|Bs], Phis0, Count, Acc) -> - Phis = opt_sw_phis(Is, Phis0), - opt_sw(Bs, Phis, Count, [{L,Blk}|Acc]); -opt_sw([], _Phis, Count, Acc) -> +opt_sw([{L,#b_blk{}=Blk}|Bs], Count, Acc) -> + opt_sw(Bs, Count, [{L,Blk}|Acc]); +opt_sw([], Count, Acc) -> {reverse(Acc),Count}. -opt_sw_phis([#b_set{op=phi,dst=Dst,args=Args}|Is], Phis) -> - case opt_sw_literals(Args, []) of - error -> - opt_sw_phis(Is, Phis); - Literals -> - opt_sw_phis(Is, Phis#{Dst=>Literals}) - end; -opt_sw_phis(_, Phis) -> Phis. - -opt_sw_last(#b_switch{arg=Arg,fail=Fail,list=List0}=Sw0, Phis) -> - case Phis of - #{Arg:=Values0} -> - Values = gb_sets:from_list(Values0), - - %% Prune the switch list to only contain the possible values. - List1 = [P || {Lit,_}=P <- List0, gb_sets:is_member(Lit, Values)], - - %% Now test whether the failure label can ever be reached. - Sw = case gb_sets:size(Values) =:= length(List1) of - true -> - %% The switch list has the same number of values as the phi node. - %% The values must be the same, because the values that were not - %% possible were pruned from the switch list. Therefore, the - %% failure label can't possibly be reached, and we can choose a - %% a new failure label by picking a value from the list. - case List1 of - [{#b_literal{},Lbl}|List] -> - Sw0#b_switch{fail=Lbl,list=List}; - [] -> - Sw0#b_switch{list=List1} - end; - false -> - %% There are some values in the phi node that are not in the - %% switch list; thus, the failure label can still be reached. - Sw0 - end, - beam_ssa:normalize(Sw); - #{} -> - %% Ensure that no label in the switch list is the same - %% as the failure label. - List = [{Val,Lbl} || {Val,Lbl} <- List0, Lbl =/= Fail], - Sw = Sw0#b_switch{list=List}, - beam_ssa:normalize(Sw) - end. - -opt_sw_literals([{#b_literal{}=Lit,_}|T], Acc) -> - opt_sw_literals(T, [Lit|Acc]); -opt_sw_literals([_|_], _Acc) -> - error; -opt_sw_literals([], Acc) -> Acc. - - %%% %%% Merge blocks. %%% @@ -1943,7 +1909,7 @@ merge_blocks_1([L|Ls], Preds0, Blocks0) -> Is = Is0 ++ Is1, Blk = Blk1#b_blk{is=Is}, Blocks1 = maps:remove(L, Blocks0), - Blocks2 = maps:put(P, Blk, Blocks1), + Blocks2 = Blocks1#{P:=Blk}, Successors = beam_ssa:successors(Blk), Blocks = beam_ssa:update_phi_labels(Successors, L, P, Blocks2), Preds = merge_update_preds(Successors, L, P, Preds0), @@ -1957,8 +1923,8 @@ merge_blocks_1([L|Ls], Preds0, Blocks0) -> merge_blocks_1([], _Preds, Blocks) -> Blocks. merge_update_preds([L|Ls], From, To, Preds0) -> - Ps = [rename_label(P, From, To) || P <- maps:get(L, Preds0)], - Preds = maps:put(L, Ps, Preds0), + Ps = [rename_label(P, From, To) || P <- map_get(L, Preds0)], + Preds = Preds0#{L:=Ps}, merge_update_preds(Ls, From, To, Preds); merge_update_preds([], _, _, Preds) -> Preds. @@ -1972,13 +1938,17 @@ verify_merge_is([#b_set{op=Op}|_]) -> verify_merge_is(_) -> ok. -is_merge_allowed(_, _, #b_blk{is=[#b_set{op=peek_message}|_]}) -> +is_merge_allowed(_, #b_blk{}, #b_blk{is=[#b_set{op=peek_message}|_]}) -> false; -is_merge_allowed(L, Blk0, #b_blk{}) -> - case beam_ssa:successors(Blk0) of +is_merge_allowed(L, #b_blk{last=#b_br{}}=Blk, #b_blk{}) -> + %% The predecessor block must have exactly one successor (L) for + %% the merge to be safe. + case beam_ssa:successors(Blk) of [L] -> true; [_|_] -> false - end. + end; +is_merge_allowed(_, #b_blk{last=#b_switch{}}, #b_blk{}) -> + false. %%% %%% When a tuple is matched, the pattern matching compiler generates a @@ -2001,14 +1971,22 @@ ssa_opt_sink({#st{ssa=Blocks0}=St, FuncDb}) -> %% Create a map with all variables that define get_tuple_element %% instructions. The variable name map to the block it is defined in. - Defs = maps:from_list(def_blocks(Linear)), + case def_blocks(Linear) of + [] -> + %% No get_tuple_element instructions, so there is nothing to do. + {St, FuncDb}; + [_|_]=Defs0 -> + Defs = maps:from_list(Defs0), + {do_ssa_opt_sink(Linear, Defs, St), FuncDb} + end. +do_ssa_opt_sink(Linear, Defs, #st{ssa=Blocks0}=St) -> %% Now find all the blocks that use variables defined by get_tuple_element %% instructions. Used = used_blocks(Linear, Defs, []), %% Calculate dominators. - Dom0 = beam_ssa:dominators(Blocks0), + {Dom,Numbering} = beam_ssa:dominators(Blocks0), %% It is not safe to move get_tuple_element instructions to blocks %% that begin with certain instructions. It is also unsafe to move @@ -2016,28 +1994,18 @@ ssa_opt_sink({#st{ssa=Blocks0}=St, FuncDb}) -> %% unsafe moves, pretend that the unsuitable blocks are not %% dominators. Unsuitable = unsuitable(Linear, Blocks0), - Dom = case gb_sets:is_empty(Unsuitable) of - true -> - Dom0; - false -> - F = fun(_, DomBy) -> - [L || L <- DomBy, - not gb_sets:is_element(L, Unsuitable)] - end, - maps:map(F, Dom0) - end, %% Calculate new positions for get_tuple_element instructions. The new %% position is a block that dominates all uses of the variable. - DefLoc = new_def_locations(Used, Defs, Dom), + DefLoc = new_def_locations(Used, Defs, Dom, Numbering, Unsuitable), %% Now move all suitable get_tuple_element instructions to their %% new blocks. Blocks = foldl(fun({V,To}, A) -> - From = maps:get(V, Defs), + From = map_get(V, Defs), move_defs(V, From, To, A) end, Blocks0, DefLoc), - {St#st{ssa=Blocks}, FuncDb}. + St#st{ssa=Blocks}. def_blocks([{L,#b_blk{is=Is}}|Bs]) -> def_blocks_is(Is, L, def_blocks(Bs)); @@ -2104,11 +2072,11 @@ unsuitable_loop(L, Blocks, Predecessors) -> unsuitable_loop(L, Blocks, Predecessors, []). unsuitable_loop(L, Blocks, Predecessors, Acc) -> - Ps = maps:get(L, Predecessors), + Ps = map_get(L, Predecessors), unsuitable_loop_1(Ps, Blocks, Predecessors, Acc). unsuitable_loop_1([P|Ps], Blocks, Predecessors, Acc0) -> - case maps:get(P, Blocks) of + case map_get(P, Blocks) of #b_blk{is=[#b_set{op=peek_message}|_]} -> unsuitable_loop_1(Ps, Blocks, Predecessors, Acc0); #b_blk{} -> @@ -2123,50 +2091,42 @@ unsuitable_loop_1([P|Ps], Blocks, Predecessors, Acc0) -> end; unsuitable_loop_1([], _, _, Acc) -> Acc. -%% new_def_locations([{Variable,[UsedInBlock]}|Vs], Defs, Dominators) -> -%% [{Variable,NewDefinitionBlock}] -%% Calculate new locations for get_tuple_element instructions. For each -%% variable, the new location is a block that dominates all uses of -%% variable and as near to the uses of as possible. If no such block -%% distinct from the block where the instruction currently is, the -%% variable will not be included in the result list. - -new_def_locations([{V,UsedIn}|Vs], Defs, Dom) -> - DefIn = maps:get(V, Defs), - case common_dom(UsedIn, DefIn, Dom) of - [] -> - new_def_locations(Vs, Defs, Dom); - [_|_]=BetterDef -> - L = most_dominated(BetterDef, Dom), - [{V,L}|new_def_locations(Vs, Defs, Dom)] - end; -new_def_locations([], _, _) -> []. - -common_dom([L|Ls], DefIn, Dom) -> - DomBy0 = maps:get(L, Dom), - DomBy = ordsets:subtract(DomBy0, maps:get(DefIn, Dom)), - common_dom_1(Ls, Dom, DomBy). - -common_dom_1(_, _, []) -> - []; -common_dom_1([L|Ls], Dom, [_|_]=DomBy0) -> - DomBy1 = maps:get(L, Dom), - DomBy = ordsets:intersection(DomBy0, DomBy1), - common_dom_1(Ls, Dom, DomBy); -common_dom_1([], _, DomBy) -> DomBy. - -most_dominated([L|Ls], Dom) -> - most_dominated(Ls, L, maps:get(L, Dom), Dom). - -most_dominated([L|Ls], L0, DomBy, Dom) -> - case member(L, DomBy) of +%% new_def_locations([{Variable,[UsedInBlock]}|Vs], Defs, +%% Dominators, Numbering, Unsuitable) -> +%% [{Variable,NewDefinitionBlock}] +%% +%% Calculate new locations for get_tuple_element instructions. For +%% each variable, the new location is a block that dominates all uses +%% of the variable and as near to the uses of as possible. + +new_def_locations([{V,UsedIn}|Vs], Defs, Dom, Numbering, Unsuitable) -> + DefIn = map_get(V, Defs), + Common = common_dominator(UsedIn, Dom, Numbering, Unsuitable), + case member(Common, map_get(DefIn, Dom)) of true -> - most_dominated(Ls, L0, DomBy, Dom); + %% The common dominator is either DefIn or an + %% ancestor of DefIn. + new_def_locations(Vs, Defs, Dom, Numbering, Unsuitable); false -> - most_dominated(Ls, L, maps:get(L, Dom), Dom) + %% We have found a suitable descendant of DefIn, + %% to which the get_tuple_element instruction can + %% be sunk. + [{V,Common}|new_def_locations(Vs, Defs, Dom, Numbering, Unsuitable)] end; -most_dominated([], L, _, _) -> L. +new_def_locations([], _, _, _, _) -> []. +common_dominator(Ls0, Dom, Numbering, Unsuitable) -> + [Common|_] = beam_ssa:common_dominators(Ls0, Dom, Numbering), + case gb_sets:is_member(Common, Unsuitable) of + true -> + %% It is not allowed to place the instruction here. Try + %% to find another suitable dominating block by going up + %% one step in the dominator tree. + [Common,OneUp|_] = map_get(Common, Dom), + common_dominator([OneUp], Dom, Numbering, Unsuitable); + false -> + Common + end. %% Move get_tuple_element instructions to their new locations. @@ -2206,7 +2166,6 @@ insert_def_is([#b_set{op=Op}=I|Is]=Is0, V, Def) -> Action0 = case Op of call -> beyond; 'catch_end' -> beyond; - set_tuple_element -> beyond; timeout -> beyond; _ -> here end, @@ -2231,6 +2190,46 @@ insert_def_is([#b_set{op=Op}=I|Is]=Is0, V, Def) -> insert_def_is([], _V, Def) -> [Def]. +%%% +%%% Order consecutive get_tuple_element instructions in ascending +%%% position order. This will give the loader more opportunities +%%% for combining get_tuple_element instructions. +%%% + +ssa_opt_get_tuple_element({#st{ssa=Blocks0}=St, FuncDb}) -> + Blocks = opt_get_tuple_element(maps:to_list(Blocks0), Blocks0), + {St#st{ssa=Blocks}, FuncDb}. + +opt_get_tuple_element([{L,#b_blk{is=Is0}=Blk0}|Bs], Blocks) -> + case opt_get_tuple_element_is(Is0, false, []) of + {yes,Is} -> + Blk = Blk0#b_blk{is=Is}, + opt_get_tuple_element(Bs, Blocks#{L:=Blk}); + no -> + opt_get_tuple_element(Bs, Blocks) + end; +opt_get_tuple_element([], Blocks) -> Blocks. + +opt_get_tuple_element_is([#b_set{op=get_tuple_element, + args=[#b_var{}=Src,_]}=I0|Is0], + _AnyChange, Acc) -> + {GetIs0,Is} = collect_get_tuple_element(Is0, Src, [I0]), + GetIs1 = sort([{Pos,I} || #b_set{args=[_,Pos]}=I <- GetIs0]), + GetIs = [I || {_,I} <- GetIs1], + opt_get_tuple_element_is(Is, true, reverse(GetIs, Acc)); +opt_get_tuple_element_is([I|Is], AnyChange, Acc) -> + opt_get_tuple_element_is(Is, AnyChange, [I|Acc]); +opt_get_tuple_element_is([], AnyChange, Acc) -> + case AnyChange of + true -> {yes,reverse(Acc)}; + false -> no + end. + +collect_get_tuple_element([#b_set{op=get_tuple_element, + args=[Src,_]}=I|Is], Src, Acc) -> + collect_get_tuple_element(Is, Src, [I|Acc]); +collect_get_tuple_element(Is, _Src, Acc) -> + {Acc,Is}. %%% %%% Common utilities. |