From 693cda7f01137eda93362a74abd6028c41d2f5c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 14 Nov 2018 06:48:54 +0100 Subject: beam_ssa_type: Speed up type analysis for huge functions The type analysis pass (`beam_ssa_type`) keeps the type information for all variables that are in scope. For huge functions, the `join_types/2` function could get really slow when joining two maps with thousands of variables in each. Use a conservative approach to discard type information for variables only used once by a `br` or `switch` in the same block as the variable is defined in. This approach reduces the runtime for the `beam_ssa_type` pass from a few minutes down to few seconds for this module: https://github.com/aggelgian/cuter/blob/master/src/cuter_binlib.erl Rejected approach: Calculate liveness information for all variables and discard type information for any variable that would not be used again. That turned out to not work because some optimizations would invalidate the liveness (in particular, substitutions could lengthen the lifetime for a variable). Trying to update the liveness information when doing the optimizations would be tricky. Noticed-by: Kostis Sagonas --- lib/compiler/src/beam_ssa_type.erl | 103 ++++++++++++++++++++++++++++++++----- 1 file changed, 90 insertions(+), 13 deletions(-) diff --git a/lib/compiler/src/beam_ssa_type.erl b/lib/compiler/src/beam_ssa_type.erl index 18e6e73a46..95fc3bb0e9 100644 --- a/lib/compiler/src/beam_ssa_type.erl +++ b/lib/compiler/src/beam_ssa_type.erl @@ -27,9 +27,10 @@ -define(UNICODE_INT, #t_integer{elements={0,16#10FFFF}}). --record(d, {ds :: #{beam_ssa:var_name():=beam_ssa:b_set()}, +-record(d, {ds :: #{beam_ssa:b_var():=beam_ssa:b_set()}, ls :: #{beam_ssa:label():=type_db()}, - sub :: #{beam_ssa:var_name():=beam_ssa:value()} + once :: cerl_sets:set(beam_ssa:b_var()), + sub :: #{beam_ssa:b_var():=beam_ssa:value()} }). -define(ATOM_SET_SIZE, 5). @@ -56,13 +57,15 @@ Block :: beam_ssa:b_blk(). opt(Linear, Args) -> + UsedOnce = used_once(Linear, Args), Ts = maps:from_list([{V,any} || #b_var{}=V <- Args]), FakeCall = #b_set{op=call,args=[#b_remote{mod=#b_literal{val=unknown}, name=#b_literal{val=unknown}, arity=0}]}, Defs = maps:from_list([{Var,FakeCall#b_set{dst=Var}} || #b_var{}=Var <- Args]), - D = #d{ds=Defs,ls=#{0=>Ts},sub=#{}}, + D = #d{ds=Defs,ls=#{0=>Ts,?BADARG_BLOCK=>#{}}, + once=UsedOnce,sub=#{}}, opt_1(Linear, D). opt_1([{L,Blk}|Bs], #d{ls=Ls}=D) -> @@ -425,16 +428,43 @@ opt_terminator(#b_ret{}=Ret, _Ts, _Ds) -> Ret. update_successors(#b_br{bool=#b_literal{val=true},succ=S}, Ts, D) -> update_successor(S, Ts, D); -update_successors(#b_br{bool=#b_var{}=Bool,succ=Succ,fail=Fail}, Ts, D0) -> - D = update_successor_bool(Bool, false, Fail, Ts, D0), - SuccTs = infer_types(Bool, Ts, D0), - update_successor_bool(Bool, true, Succ, SuccTs, D); -update_successors(#b_switch{arg=#b_var{}=V,fail=Fail,list=List}, Ts, D0) -> - D = update_successor(Fail, Ts, D0), - foldl(fun({Val,S}, A) -> - T = get_type(Val, Ts), - update_successor(S, Ts#{V=>T}, A) - end, D, List); +update_successors(#b_br{bool=#b_var{}=Bool,succ=Succ,fail=Fail}, Ts0, D0) -> + case cerl_sets:is_element(Bool, D0#d.once) of + true -> + %% This variable is defined in this block and is only + %% referenced by this br terminator. Therefore, there is + %% no need to include the type database passed on to the + %% successors of this block. + Ts = maps:remove(Bool, Ts0), + D = update_successor(Fail, Ts, D0), + SuccTs = infer_types(Bool, Ts, D0), + update_successor(Succ, SuccTs, D); + false -> + D = update_successor_bool(Bool, false, Fail, Ts0, D0), + SuccTs = infer_types(Bool, Ts0, D0), + update_successor_bool(Bool, true, Succ, SuccTs, D) + end; +update_successors(#b_switch{arg=#b_var{}=V,fail=Fail,list=List}, Ts0, D0) -> + case cerl_sets:is_element(V, D0#d.once) of + true -> + %% This variable is defined in this block and is only + %% referenced by this switch terminator. Therefore, there is + %% no need to include the type database passed on to the + %% successors of this block. + Ts = maps:remove(V, Ts0), + D = update_successor(Fail, Ts, D0), + F = fun({_Val,S}, A) -> + update_successor(S, Ts, A) + end, + foldl(F, D, List); + false -> + D = update_successor(Fail, Ts0, D0), + F = fun({Val,S}, A) -> + T = get_type(Val, Ts0), + update_successor(S, Ts0#{V=>T}, A) + end, + foldl(F, D, List) + end; update_successors(#b_ret{}, _Ts, D) -> D. update_successor_bool(#b_var{}=Var, BoolValue, S, Ts, D) -> @@ -447,6 +477,11 @@ update_successor_bool(#b_var{}=Var, BoolValue, S, Ts, D) -> update_successor(S, Ts, D) end. +update_successor(?BADARG_BLOCK, _Ts, #d{}=D) -> + %% We KNOW that no variables are used in the ?BADARG_BLOCK, + %% so there is no need to update the type information. That + %% can be a huge timesaver for huge functions. + D; update_successor(S, Ts0, #d{ls=Ls}=D) -> case Ls of #{S:=Ts1} -> @@ -766,6 +801,48 @@ simplify_not(#b_br{bool=#b_var{}=V,succ=Succ,fail=Fail}=Br0, Ts, Ds) -> Br0 end. +%%% +%%% Calculate the set of variables that are only used once in the +%%% block that they are defined in. That will allow us to discard type +%%% information for variables that will never be referenced by the +%%% successor blocks, potentially improving compilation times. +%%% + +used_once(Linear, Args) -> + Map0 = used_once_1(reverse(Linear), #{}), + Map = maps:without(Args, Map0), + cerl_sets:from_list(maps:keys(Map)). + +used_once_1([{L,#b_blk{is=Is,last=Last}}|Bs], Uses0) -> + Uses = used_once_2([Last|reverse(Is)], L, Uses0), + used_once_1(Bs, Uses); +used_once_1([], Uses) -> Uses. + +used_once_2([I|Is], L, Uses0) -> + Uses = used_once_uses(beam_ssa:used(I), L, Uses0), + case I of + #b_set{dst=Dst} -> + case Uses of + #{Dst:=[L]} -> + used_once_2(Is, L, Uses); + #{} -> + used_once_2(Is, L, maps:remove(Dst, Uses)) + end; + _ -> + used_once_2(Is, L, Uses) + end; +used_once_2([], _, Uses) -> Uses. + +used_once_uses([V|Vs], L, Uses) -> + case Uses of + #{V:=Us} -> + used_once_uses(Vs, L, Uses#{V:=[L|Us]}); + #{} -> + used_once_uses(Vs, L, Uses#{V=>[L]}) + end; +used_once_uses([], _, Uses) -> Uses. + + get_types(Values, Ts) -> [get_type(Val, Ts) || Val <- Values]. -spec get_type(beam_ssa:value(), type_db()) -> type(). -- cgit v1.2.3 From 64a15f2979c4644298cdad1319919fe84103484e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Thu, 15 Nov 2018 03:22:53 +0100 Subject: beam_ssa_pre_codegen: Eliminate a bottleneck in linear scan The linear scan algorithm keeps unhandled intervals in two separate lists: one for intervals with reserved registers and one for intervals without reserved registers. When collecting the available free registers all unhandled intervals with reserved registers must be checked for overlap. Unhandled intervals that had a preferred register were put into the list of intervals with reserved registers, leading to a lot of unecessary overlap checking if there were many intervals with preferred registers. Changing the partition code to put intervals with preferred registers into the general list of unhandled intervals will reduce the compilation time if there are many preferred registers. On my computer, this change reduced the time of the linear scan pass from about 20 seconds down to about 0.5 seconds for this module: https://github.com/aggelgian/cuter/blob/master/src/cuter_binlib.erl Noticed-by: Kostis Sagonas --- lib/compiler/src/beam_ssa_pre_codegen.erl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/compiler/src/beam_ssa_pre_codegen.erl b/lib/compiler/src/beam_ssa_pre_codegen.erl index 9175931375..c60c6da9ea 100644 --- a/lib/compiler/src/beam_ssa_pre_codegen.erl +++ b/lib/compiler/src/beam_ssa_pre_codegen.erl @@ -2211,7 +2211,13 @@ linear_scan(#st{intervals=Intervals0,res=Res}=St0) -> Free = init_free(maps:to_list(Res)), Intervals1 = [init_interval(Int, Res) || Int <- Intervals0], Intervals = sort(Intervals1), - IsReserved = fun (#i{reg=Reg}) -> Reg =/= none end, + IsReserved = fun(#i{reg=Reg}) -> + case Reg of + none -> false; + {prefer,{_,_}} -> false; + {_,_} -> true + end + end, {UnhandledRes,Unhandled} = partition(IsReserved, Intervals), L = #l{unhandled_res=UnhandledRes, unhandled_any=Unhandled,free=Free}, -- cgit v1.2.3 From 4534b73552bdb405808154f01f9b051cacb4ff3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Thu, 15 Nov 2018 10:21:11 +0100 Subject: beam_ssa_dead: Speed up optimization of switch instructions `beam_ssa_dead` can waste a lot of time trying to optimize an unoptimizable `switch` instruction. By being a little bit smarter when optimizing `switch` instructions, the runtime for the beam_ssa_dead pass was reduced approximately by half on my computer for this module: https://github.com/aggelgian/cuter/blob/master/src/cuter_binlib.erl Noticed-by: Kostis Sagonas --- lib/compiler/src/beam_ssa_dead.erl | 59 ++++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 28 deletions(-) diff --git a/lib/compiler/src/beam_ssa_dead.erl b/lib/compiler/src/beam_ssa_dead.erl index c20652580d..067d9a6741 100644 --- a/lib/compiler/src/beam_ssa_dead.erl +++ b/lib/compiler/src/beam_ssa_dead.erl @@ -135,7 +135,8 @@ shortcut_terminator(Last, _Is, _Bs, _St) -> Last. shortcut_switch([{Lit,L0}|T], Bool, Bs, St0) -> - St = St0#st{rel_op=normalize_op({bif,'=:='}, [Bool,Lit])}, + RelOp = {'=:=',Bool,Lit}, + St = St0#st{rel_op=RelOp}, #b_br{bool=#b_literal{val=true},succ=L} = shortcut(L0, bind_var(Bool, Lit, Bs), St#st{target=one_way}), [{Lit,L}|shortcut_switch(T, Bool, Bs, St0)]; @@ -388,41 +389,43 @@ eval_terminator(#b_switch{arg=Arg,fail=Fail,list=List}=Sw, Bs, St) -> %% Literal argument. Simplify to a `br`. beam_ssa:normalize(Sw#b_switch{arg=Val}); #b_var{} -> - case St of - #st{rel_op=none} -> - %% No previous relational operator is stored. - %% Give up. + %% Try optimizing the switch. + case eval_switch(List, Arg, St, Fail) of + none -> none; - #st{} -> - %% There is a previous relational operator stored. - %% Try optimizing the switch. - case eval_switch(List, Arg, St, Fail) of - none -> - none; - To when is_integer(To) -> - %% Either one of the values in the switch - %% matched a previous value in a '=:=' test, or - %% none of the values matched a previous test. - #b_br{bool=#b_literal{val=true},succ=To,fail=To} - end + To when is_integer(To) -> + %% Either one of the values in the switch + %% matched a previous value in a '=:=' test, or + %% none of the values matched a previous test. + #b_br{bool=#b_literal{val=true},succ=To,fail=To} end end; eval_terminator(#b_ret{}, _Bs, _St) -> none. -eval_switch([{Lit,Lbl}|T], Arg, St, Fail) -> - case eval_rel_op({bif,'=:='}, [Arg,Lit], St) of - none -> - %% This label could be reached. - eval_switch(T, Arg, St, none); - #b_literal{val=false} -> - %% This branch will never be taken. - eval_switch(T, Arg, St, Fail); - #b_literal{val=true} -> +eval_switch(List, Arg, #st{rel_op={_,Arg,_}=PrevOp}, Fail) -> + %% There is a previous relational operator testing the same variable. + %% Optimization may be possible. + eval_switch_1(List, Arg, PrevOp, Fail); +eval_switch(_, _, _, _) -> + %% There is either no previous relational operator, or it tests + %% a different variable. Nothing to optimize. + none. + +eval_switch_1([{Lit,Lbl}|T], Arg, PrevOp, Fail) -> + RelOp = {'=:=',Arg,Lit}, + case will_succeed(PrevOp, RelOp) of + yes -> %% Success. This branch will always be taken. - Lbl + Lbl; + no -> + %% This branch will never be taken. + eval_switch_1(T, Arg, PrevOp, Fail); + maybe -> + %% This label could be reached. + eval_switch_1(T, Arg, PrevOp, none) end; -eval_switch([], _Arg, _St, Fail) -> +eval_switch_1([], _Arg, _PrevOp, Fail) -> %% Fail is now either the failure label or 'none'. Fail. -- cgit v1.2.3