diff options
Diffstat (limited to 'lib/compiler/src/beam_ssa_opt.erl')
-rw-r--r-- | lib/compiler/src/beam_ssa_opt.erl | 212 |
1 files changed, 129 insertions, 83 deletions
diff --git a/lib/compiler/src/beam_ssa_opt.erl b/lib/compiler/src/beam_ssa_opt.erl index 32b64b393f..580abf4ed9 100644 --- a/lib/compiler/src/beam_ssa_opt.erl +++ b/lib/compiler/src/beam_ssa_opt.erl @@ -158,6 +158,7 @@ repeated_passes(Opts) -> ?PASS(ssa_opt_dead), ?PASS(ssa_opt_cse), ?PASS(ssa_opt_tail_phis), + ?PASS(ssa_opt_sink), ?PASS(ssa_opt_tuple_size), ?PASS(ssa_opt_record), ?PASS(ssa_opt_type_continue)], %Must run after ssa_opt_dead to @@ -175,8 +176,8 @@ epilogue_passes(Opts) -> ?PASS(ssa_opt_bsm), ?PASS(ssa_opt_bsm_units), ?PASS(ssa_opt_bsm_shortcut), - ?PASS(ssa_opt_blockify), ?PASS(ssa_opt_sink), + ?PASS(ssa_opt_blockify), ?PASS(ssa_opt_merge_blocks), ?PASS(ssa_opt_get_tuple_element), ?PASS(ssa_opt_trim_unreachable)], @@ -901,44 +902,52 @@ cse_suitable(#b_set{}) -> false. -record(fs, {s=undefined :: 'undefined' | 'cleared', regs=#{} :: #{beam_ssa:b_var():=beam_ssa:b_var()}, + vars=cerl_sets:new() :: cerl_sets:set(), fail=none :: 'none' | beam_ssa:label(), non_guards :: gb_sets:set(beam_ssa:label()), bs :: beam_ssa:block_map() }). ssa_opt_float({#st{ssa=Linear0,cnt=Count0}=St, FuncDb}) -> - NonGuards0 = float_non_guards(Linear0), - NonGuards = gb_sets:from_list(NonGuards0), + NonGuards = non_guards(Linear0), Blocks = maps:from_list(Linear0), Fs = #fs{non_guards=NonGuards,bs=Blocks}, {Linear,Count} = float_opt(Linear0, Count0, Fs), {St#st{ssa=Linear,cnt=Count}, FuncDb}. -float_blk_is_in_guard(#b_blk{last=#b_br{fail=F}}, #fs{non_guards=NonGuards}) -> - not gb_sets:is_member(F, NonGuards); -float_blk_is_in_guard(#b_blk{}, #fs{}) -> +%% The fconv instruction doesn't support jumping to a fail label, so we have to +%% skip this optimization if the fail block is a guard. +%% +%% We also skip the optimization in blocks that always fail, as it's both +%% difficult and pointless to rewrite them to use float ops. +float_can_optimize_blk(#b_blk{last=#b_br{bool=#b_var{},fail=F}}, + #fs{non_guards=NonGuards}) -> + gb_sets:is_member(F, NonGuards); +float_can_optimize_blk(#b_blk{}, #fs{}) -> false. -float_non_guards([{L,#b_blk{is=Is}}|Bs]) -> - case Is of - [#b_set{op=landingpad}|_] -> - [L|float_non_guards(Bs)]; - _ -> - float_non_guards(Bs) - end; -float_non_guards([]) -> [?BADARG_BLOCK]. - +float_opt([{L,#b_blk{is=[#b_set{op=exception_trampoline,args=[Var]}]}=Blk0} | + Bs0], Count0, Fs) -> + %% If we've replaced a BIF with float operations, we'll have a lot of extra + %% blocks that jump to the same failure block, which may have a trampoline + %% that refers to the original operation. + %% + %% Since the point of the trampoline is to keep the BIF from being removed + %% by liveness optimization, we can discard it as the liveness pass leaves + %% floats alone. + Blk = case cerl_sets:is_element(Var, Fs#fs.vars) of + true -> Blk0#b_blk{is=[]}; + false -> Blk0 + end, + {Bs, Count} = float_opt(Bs0, Count0, Fs), + {[{L,Blk}|Bs],Count}; float_opt([{L,Blk}|Bs0], Count0, Fs) -> - case float_blk_is_in_guard(Blk, Fs) of + case float_can_optimize_blk(Blk, Fs) of true -> - %% This block is inside a guard. Don't do - %% any floating point optimizations. - {Bs,Count} = float_opt(Bs0, Count0, Fs), - {[{L,Blk}|Bs],Count}; + float_opt_1(L, Blk, Bs0, Count0, Fs); false -> - %% This block is not inside a guard. - %% We can do the optimization. - float_opt_1(L, Blk, Bs0, Count0, Fs) + {Bs,Count} = float_opt(Bs0, Count0, Fs), + {[{L,Blk}|Bs],Count} end; float_opt([], Count, _Fs) -> {[],Count}. @@ -1017,12 +1026,12 @@ float_maybe_flush(Blk0, #fs{s=cleared,fail=Fail,bs=Blocks}=Fs0, Count0) -> #b_blk{last=#b_br{bool=#b_var{},succ=Succ}=Br} = Blk0, %% If the success block starts with a floating point operation, we can - %% defer flushing to that block as long as it isn't a guard. + %% defer flushing to that block as long as it's suitable for optimization. #b_blk{is=Is} = SuccBlk = map_get(Succ, Blocks), - SuccIsGuard = float_blk_is_in_guard(SuccBlk, Fs0), + CanOptimizeSucc = float_can_optimize_blk(SuccBlk, Fs0), case Is of - [#b_set{anno=#{float_op:=_}}|_] when not SuccIsGuard -> + [#b_set{anno=#{float_op:=_}}|_] when CanOptimizeSucc -> %% No flush needed. {[],Blk0,Fs0,Count0}; _ -> @@ -1078,21 +1087,22 @@ float_opt_is([], Fs, _Count, _Acc) -> none. float_make_op(#b_set{op={bif,Op},dst=Dst,args=As0}=I0, - Ts, #fs{s=S,regs=Rs0}=Fs, Count0) -> + Ts, #fs{s=S,regs=Rs0,vars=Vs0}=Fs, Count0) -> {As1,Rs1,Count1} = float_load(As0, Ts, Rs0, Count0, []), {As,Is0} = unzip(As1), {Fr,Count2} = new_reg('@fr', Count1), FrDst = #b_var{name=Fr}, I = I0#b_set{op={float,Op},dst=FrDst,args=As}, + Vs = cerl_sets:add_element(Dst, Vs0), Rs = Rs1#{Dst=>FrDst}, Is = append(Is0) ++ [I], case S of undefined -> {Ignore,Count} = new_reg('@ssa_ignore', Count2), C = #b_set{op={float,clearerror},dst=#b_var{name=Ignore}}, - {[C|Is],Fs#fs{s=cleared,regs=Rs},Count}; + {[C|Is],Fs#fs{s=cleared,regs=Rs,vars=Vs},Count}; cleared -> - {Is,Fs#fs{regs=Rs},Count2} + {Is,Fs#fs{regs=Rs,vars=Vs},Count2} end. float_load([A|As], [T|Ts], Rs0, Count0, Acc) -> @@ -1221,34 +1231,31 @@ live_opt_is([#b_set{op=phi,dst=Dst}=I|Is], Live, Acc) -> false -> live_opt_is(Is, Live, Acc) end; -live_opt_is([#b_set{op=succeeded,dst=SuccDst=SuccDstVar, - args=[Dst]}=SuccI, - #b_set{dst=Dst}=I|Is], Live0, Acc) -> - case gb_sets:is_member(Dst, Live0) of - true -> - Live1 = gb_sets:add(Dst, Live0), - Live = gb_sets:delete_any(SuccDst, Live1), - live_opt_is([I|Is], Live, [SuccI|Acc]); - false -> - case live_opt_unused(I) of - {replace,NewI0} -> - NewI = NewI0#b_set{dst=SuccDstVar}, - live_opt_is([NewI|Is], Live0, Acc); - keep -> - case gb_sets:is_member(SuccDst, Live0) of - true -> - Live1 = gb_sets:add(Dst, Live0), - Live = gb_sets:delete(SuccDst, Live1), - live_opt_is([I|Is], Live, [SuccI|Acc]); - false -> - live_opt_is([I|Is], Live0, Acc) - end - end +live_opt_is([#b_set{op=succeeded,dst=SuccDst,args=[MapDst]}=SuccI, + #b_set{op=get_map_element,dst=MapDst}=MapI | Is], + Live0, Acc) -> + case {gb_sets:is_member(SuccDst, Live0), + gb_sets:is_member(MapDst, Live0)} of + {true, true} -> + Live = gb_sets:delete(SuccDst, Live0), + live_opt_is([MapI | Is], Live, [SuccI | Acc]); + {true, false} -> + %% 'get_map_element' is unused; replace 'succeeded' with + %% 'has_map_field' + NewI = MapI#b_set{op=has_map_field,dst=SuccDst}, + live_opt_is([NewI | Is], Live0, Acc); + {false, true} -> + %% 'succeeded' is unused (we know it will succeed); discard it and + %% keep 'get_map_element' + live_opt_is([MapI | Is], Live0, Acc); + {false, false} -> + live_opt_is(Is, Live0, Acc) end; live_opt_is([#b_set{dst=Dst}=I|Is], Live0, Acc) -> case gb_sets:is_member(Dst, Live0) of true -> - Live1 = gb_sets:union(Live0, gb_sets:from_ordset(beam_ssa:used(I))), + LiveUsed = gb_sets:from_ordset(beam_ssa:used(I)), + Live1 = gb_sets:union(Live0, LiveUsed), Live = gb_sets:delete(Dst, Live1), live_opt_is(Is, Live, [I|Acc]); false -> @@ -1256,17 +1263,14 @@ live_opt_is([#b_set{dst=Dst}=I|Is], Live0, Acc) -> true -> live_opt_is(Is, Live0, Acc); false -> - Live = gb_sets:union(Live0, gb_sets:from_ordset(beam_ssa:used(I))), + LiveUsed = gb_sets:from_ordset(beam_ssa:used(I)), + Live = gb_sets:union(Live0, LiveUsed), live_opt_is(Is, Live, [I|Acc]) end end; live_opt_is([], Live, Acc) -> {Acc,Live}. -live_opt_unused(#b_set{op=get_map_element}=Set) -> - {replace,Set#b_set{op=has_map_field}}; -live_opt_unused(_) -> keep. - %%% %%% Optimize binary matching. %%% @@ -1777,35 +1781,44 @@ opt_bs_put_split_int_1(Int, L, R) -> %%% ssa_opt_tuple_size({#st{ssa=Linear0,cnt=Count0}=St, FuncDb}) -> - {Linear,Count} = opt_tup_size(Linear0, Count0, []), + %% This optimization is only safe in guards, as prefixing tuple_size with + %% an is_tuple check prevents it from throwing an exception. + NonGuards = non_guards(Linear0), + {Linear,Count} = opt_tup_size(Linear0, NonGuards, Count0, []), {St#st{ssa=Linear,cnt=Count}, FuncDb}. -opt_tup_size([{L,#b_blk{is=Is,last=Last}=Blk}|Bs], Count0, Acc0) -> +opt_tup_size([{L,#b_blk{is=Is,last=Last}=Blk}|Bs], NonGuards, Count0, Acc0) -> case {Is,Last} of {[#b_set{op={bif,'=:='},dst=Bool,args=[#b_var{}=Tup,#b_literal{val=Arity}]}], #b_br{bool=Bool}} when is_integer(Arity), Arity >= 0 -> - {Acc,Count} = opt_tup_size_1(Tup, L, Count0, Acc0), - opt_tup_size(Bs, Count, [{L,Blk}|Acc]); + {Acc,Count} = opt_tup_size_1(Tup, L, NonGuards, Count0, Acc0), + opt_tup_size(Bs, NonGuards, Count, [{L,Blk}|Acc]); {_,_} -> - opt_tup_size(Bs, Count0, [{L,Blk}|Acc0]) + opt_tup_size(Bs, NonGuards, Count0, [{L,Blk}|Acc0]) end; -opt_tup_size([], Count, Acc) -> +opt_tup_size([], _NonGuards, Count, Acc) -> {reverse(Acc),Count}. -opt_tup_size_1(Size, EqL, Count0, [{L,Blk0}|Acc]) -> - case Blk0 of - #b_blk{is=Is0,last=#b_br{bool=Bool,succ=EqL,fail=Fail}} -> - case opt_tup_size_is(Is0, Bool, Size, []) of - none -> +opt_tup_size_1(Size, EqL, NonGuards, Count0, [{L,Blk0}|Acc]) -> + #b_blk{is=Is0,last=Last} = Blk0, + case Last of + #b_br{bool=Bool,succ=EqL,fail=Fail} -> + case gb_sets:is_member(Fail, NonGuards) of + true -> {[{L,Blk0}|Acc],Count0}; - {PreIs,TupleSizeIs,Tuple} -> - opt_tup_size_2(PreIs, TupleSizeIs, L, EqL, - Tuple, Fail, Count0, Acc) + false -> + case opt_tup_size_is(Is0, Bool, Size, []) of + none -> + {[{L,Blk0}|Acc],Count0}; + {PreIs,TupleSizeIs,Tuple} -> + opt_tup_size_2(PreIs, TupleSizeIs, L, EqL, + Tuple, Fail, Count0, Acc) + end end; - #b_blk{} -> + _ -> {[{L,Blk0}|Acc],Count0} end; -opt_tup_size_1(_, _, Count, Acc) -> +opt_tup_size_1(_, _, _, Count, Acc) -> {Acc,Count}. opt_tup_size_2(PreIs, TupleSizeIs, PreL, EqL, Tuple, Fail, Count0, Acc) -> @@ -1943,12 +1956,28 @@ verify_merge_is(_) -> is_merge_allowed(_, #b_blk{}, #b_blk{is=[#b_set{op=peek_message}|_]}) -> false; -is_merge_allowed(L, #b_blk{last=#b_br{}}=Blk, #b_blk{}) -> +is_merge_allowed(_, #b_blk{}, #b_blk{is=[#b_set{op=exception_trampoline}|_]}) -> + false; +is_merge_allowed(_, #b_blk{is=[#b_set{op=exception_trampoline}|_]}, #b_blk{}) -> + false; +is_merge_allowed(L, #b_blk{last=#b_br{}}=Blk, #b_blk{is=Is}) -> %% The predecessor block must have exactly one successor (L) for %% the merge to be safe. case beam_ssa:successors(Blk) of - [L] -> true; - [_|_] -> false + [L] -> + case Is of + [#b_set{op=phi,args=[_]}|_] -> + %% The type optimizer pass must have been + %% turned off, since it would have removed this + %% redundant phi node. Refuse to merge the blocks + %% to ensure that this phi node remains at the + %% beginning of a block. + false; + _ -> + true + end; + [_|_] -> + false end; is_merge_allowed(_, #b_blk{last=#b_switch{}}, #b_blk{}) -> false. @@ -1969,9 +1998,7 @@ is_merge_allowed(_, #b_blk{last=#b_switch{}}, #b_blk{}) -> %%% extracted values. %%% -ssa_opt_sink({#st{ssa=Blocks0}=St, FuncDb}) -> - Linear = beam_ssa:linearize(Blocks0), - +ssa_opt_sink({#st{ssa=Linear}=St, FuncDb}) -> %% Create a map with all variables that define get_tuple_element %% instructions. The variable name map to the block it is defined in. case def_blocks(Linear) of @@ -1980,10 +2007,12 @@ ssa_opt_sink({#st{ssa=Blocks0}=St, FuncDb}) -> {St, FuncDb}; [_|_]=Defs0 -> Defs = maps:from_list(Defs0), - {do_ssa_opt_sink(Linear, Defs, St), FuncDb} + {do_ssa_opt_sink(Defs, St), FuncDb} end. -do_ssa_opt_sink(Linear, Defs, #st{ssa=Blocks0}=St) -> +do_ssa_opt_sink(Defs, #st{ssa=Linear}=St) -> + Blocks0 = maps:from_list(Linear), + %% Now find all the blocks that use variables defined by get_tuple_element %% instructions. Used = used_blocks(Linear, Defs, []), @@ -2008,7 +2037,8 @@ do_ssa_opt_sink(Linear, Defs, #st{ssa=Blocks0}=St) -> From = map_get(V, Defs), move_defs(V, From, To, A) end, Blocks0, DefLoc), - St#st{ssa=Blocks}. + + St#st{ssa=beam_ssa:linearize(Blocks)}. def_blocks([{L,#b_blk{is=Is}}|Bs]) -> def_blocks_is(Is, L, def_blocks(Bs)); @@ -2041,6 +2071,7 @@ unsuitable_1([{L,#b_blk{is=[#b_set{op=Op}|_]}}|Bs]) -> Unsuitable = case Op of bs_extract -> true; bs_put -> true; + exception_trampoline -> true; {float,_} -> true; landingpad -> true; peek_message -> true; @@ -2244,6 +2275,21 @@ gcd(A, B) -> X -> gcd(B, X) end. +non_guards(Linear) -> + gb_sets:from_list(non_guards_1(Linear)). + +non_guards_1([{L,#b_blk{is=Is}}|Bs]) -> + case Is of + [#b_set{op=exception_trampoline}|_] -> + [L | non_guards_1(Bs)]; + [#b_set{op=landingpad}|_] -> + [L | non_guards_1(Bs)]; + _ -> + non_guards_1(Bs) + end; +non_guards_1([]) -> + [?EXCEPTION_BLOCK]. + rel2fam(S0) -> S1 = sofs:relation(S0), S = sofs:rel2fam(S1), |