aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorBjörn Gustavsson <[email protected]>2018-01-11 09:58:29 +0100
committerGitHub <[email protected]>2018-01-11 09:58:29 +0100
commit64baf77671e6106929ea702a6a51b089e48b3409 (patch)
tree2c249e9cfefeee5febca6e0e375a121b3ab10954 /lib
parent24e83b0372c5a18d946711fccd0678a664f9635b (diff)
parentcb6fc15c35c7e588604cb9ee767b79b5e9627d46 (diff)
downloadotp-64baf77671e6106929ea702a6a51b089e48b3409.tar.gz
otp-64baf77671e6106929ea702a6a51b089e48b3409.tar.bz2
otp-64baf77671e6106929ea702a6a51b089e48b3409.zip
Merge pull request #1661 from bjorng/bjorn/compiler/opt-allocate_zero
Optimize allocation of stack frames
Diffstat (limited to 'lib')
-rw-r--r--lib/compiler/src/beam_block.erl69
-rw-r--r--lib/compiler/src/beam_type.erl30
-rw-r--r--lib/compiler/src/beam_utils.erl45
3 files changed, 87 insertions, 57 deletions
diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl
index fe1ce6f60b..570fc05ae5 100644
--- a/lib/compiler/src/beam_block.erl
+++ b/lib/compiler/src/beam_block.erl
@@ -23,7 +23,7 @@
-module(beam_block).
-export([module/2]).
--import(lists, [reverse/1,reverse/2,foldl/3,member/2]).
+-import(lists, [reverse/1,reverse/2,member/2]).
-spec module(beam_utils:module_code(), [compile:option()]) ->
{'ok',beam_utils:module_code()}.
@@ -41,7 +41,8 @@ function({function,Name,Arity,CLabel,Is0}) ->
Is4 = move_allocates(Is3),
Is5 = beam_utils:live_opt(Is4),
Is6 = opt_blocks(Is5),
- Is = beam_utils:delete_live_annos(Is6),
+ Is7 = beam_utils:delete_live_annos(Is6),
+ Is = opt_allocs(Is7),
%% Done.
{function,Name,Arity,CLabel,Is}
@@ -143,10 +144,9 @@ opt_blocks([I|Is]) ->
opt_blocks([]) -> [].
opt_block(Is0) ->
- Is = find_fixpoint(fun(Is) ->
- opt_tuple_element(opt(Is))
- end, Is0),
- opt_alloc(Is).
+ find_fixpoint(fun(Is) ->
+ opt_tuple_element(opt(Is))
+ end, Is0).
find_fixpoint(OptFun, Is0) ->
case OptFun(Is0) of
@@ -411,31 +411,47 @@ eliminate_use_of_from_reg([I]=Is, From, _To, Acc) ->
no
end.
+%% opt_allocs(Instructions) -> Instructions. Optimize allocate
+%% instructions inside blocks. If safe, replace an allocate_zero
+%% instruction with the slightly cheaper allocate instruction.
+
+opt_allocs(Is) ->
+ D = beam_utils:index_labels(Is),
+ opt_allocs_1(Is, D).
+
+opt_allocs_1([{block,Bl0}|Is], D) ->
+ Bl = opt_alloc(Bl0, {D,Is}),
+ [{block,Bl}|opt_allocs_1(Is, D)];
+opt_allocs_1([I|Is], D) ->
+ [I|opt_allocs_1(Is, D)];
+opt_allocs_1([], _) -> [].
+
%% opt_alloc(Instructions) -> Instructions'
%% Optimises all allocate instructions.
opt_alloc([{set,[],[],{alloc,Live0,Info0}},
- {set,[],[],{alloc,Live,Info}}|Is]) ->
+ {set,[],[],{alloc,Live,Info}}|Is], D) ->
Live = Live0, %Assertion.
Alloc = combine_alloc(Info0, Info),
I = {set,[],[],{alloc,Live,Alloc}},
- opt_alloc([I|Is]);
-opt_alloc([{set,[],[],{alloc,R,{_,Ns,Nh,[]}}}|Is]) ->
- [{set,[],[],opt_alloc(Is, Ns, Nh, R)}|Is];
-opt_alloc([I|Is]) -> [I|opt_alloc(Is)];
-opt_alloc([]) -> [].
+ opt_alloc([I|Is], D);
+opt_alloc([{set,[],[],{alloc,R,{_,Ns,Nh,[]}}}|Is], D) ->
+ [{set,[],[],opt_alloc(Is, D, Ns, Nh, R)}|Is];
+opt_alloc([I|Is], D) -> [I|opt_alloc(Is, D)];
+opt_alloc([], _) -> [].
combine_alloc({_,Ns,Nh1,Init}, {_,nostack,Nh2,[]}) ->
{zero,Ns,beam_utils:combine_heap_needs(Nh1, Nh2),Init}.
-
+
%% opt_alloc(Instructions, FrameSize, HeapNeed, LivingRegs) -> [Instr]
%% Generates the optimal sequence of instructions for
%% allocating and initalizing the stack frame and needed heap.
-opt_alloc(_Is, nostack, Nh, LivingRegs) ->
+opt_alloc(_Is, _D, nostack, Nh, LivingRegs) ->
{alloc,LivingRegs,{nozero,nostack,Nh,[]}};
-opt_alloc(Is, Ns, Nh, LivingRegs) ->
- InitRegs = init_yreg(Is, 0),
+opt_alloc(Bl, {D,OuterIs}, Ns, Nh, LivingRegs) ->
+ Is = [{block,Bl}|OuterIs],
+ InitRegs = init_yregs(Ns, Is, D),
case count_ones(InitRegs) of
N when N*2 > Ns ->
{alloc,LivingRegs,{nozero,Ns,Nh,gen_init(Ns, InitRegs)}};
@@ -451,19 +467,14 @@ gen_init(Fs, Regs, Y, Acc) when Regs band 1 =:= 0 ->
gen_init(Fs, Regs, Y, Acc) ->
gen_init(Fs, Regs bsr 1, Y+1, Acc).
-%% init_yreg(Instructions, RegSet) -> RegSetInitialized
-%% Calculate the set of initialized y registers.
-
-init_yreg([{set,_,_,{bif,_,_}}|_], Reg) -> Reg;
-init_yreg([{set,_,_,{alloc,_,{gc_bif,_,_}}}|_], Reg) -> Reg;
-init_yreg([{set,_,_,{alloc,_,{put_map,_,_}}}|_], Reg) -> Reg;
-init_yreg([{set,Ds,_,_}|Is], Reg) -> init_yreg(Is, add_yregs(Ds, Reg));
-init_yreg(_Is, Reg) -> Reg.
-
-add_yregs(Ys, Reg) -> foldl(fun(Y, R0) -> add_yreg(Y, R0) end, Reg, Ys).
-
-add_yreg({y,Y}, Reg) -> Reg bor (1 bsl Y);
-add_yreg(_, Reg) -> Reg.
+init_yregs(Y, Is, D) when Y >= 0 ->
+ case beam_utils:is_killed({y,Y}, Is, D) of
+ true ->
+ (1 bsl Y) bor init_yregs(Y-1, Is, D);
+ false ->
+ init_yregs(Y-1, Is, D)
+ end;
+init_yregs(_, _, _) -> 0.
count_ones(Bits) -> count_ones(Bits, 0).
count_ones(0, Acc) -> Acc;
diff --git a/lib/compiler/src/beam_type.erl b/lib/compiler/src/beam_type.erl
index d1f1076e17..e8cca4a0c0 100644
--- a/lib/compiler/src/beam_type.erl
+++ b/lib/compiler/src/beam_type.erl
@@ -333,29 +333,17 @@ flt_need_heap_2({set,_,_,{put_tuple,_}}, H, Fl) ->
{[],H+1,Fl};
flt_need_heap_2({set,_,_,put}, H, Fl) ->
{[],H+1,Fl};
-%% Then the "neutral" instructions. We just pass them.
-flt_need_heap_2({set,[{fr,_}],_,_}, H, Fl) ->
- {[],H,Fl};
-flt_need_heap_2({set,[],[],fclearerror}, H, Fl) ->
- {[],H,Fl};
-flt_need_heap_2({set,[],[],fcheckerror}, H, Fl) ->
- {[],H,Fl};
-flt_need_heap_2({set,_,_,{bif,_,_}}, H, Fl) ->
- {[],H,Fl};
-flt_need_heap_2({set,_,_,move}, H, Fl) ->
- {[],H,Fl};
-flt_need_heap_2({set,_,_,{get_tuple_element,_}}, H, Fl) ->
- {[],H,Fl};
-flt_need_heap_2({set,_,_,get_list}, H, Fl) ->
- {[],H,Fl};
-flt_need_heap_2({set,_,_,{try_catch,_,_}}, H, Fl) ->
- {[],H,Fl};
-flt_need_heap_2({set,_,_,init}, H, Fl) ->
- {[],H,Fl};
-%% All other instructions should cause the insertion of an allocation
+%% The following instructions cause the insertion of an allocation
%% instruction if needed.
+flt_need_heap_2({set,_,_,{alloc,_,_}}, H, Fl) ->
+ {flt_alloc(H, Fl),0,0};
+flt_need_heap_2({set,_,_,{set_tuple_element,_}}, H, Fl) ->
+ {flt_alloc(H, Fl),0,0};
+flt_need_heap_2({'%live',_,_}, H, Fl) ->
+ {flt_alloc(H, Fl),0,0};
+%% All other instructions are "neutral". We just pass them.
flt_need_heap_2(_, H, Fl) ->
- {flt_alloc(H, Fl),0,0}.
+ {[],H,Fl}.
flt_alloc(0, 0) ->
[];
diff --git a/lib/compiler/src/beam_utils.erl b/lib/compiler/src/beam_utils.erl
index 60221578bd..a2795e625f 100644
--- a/lib/compiler/src/beam_utils.erl
+++ b/lib/compiler/src/beam_utils.erl
@@ -118,6 +118,7 @@ is_killed(R, Is, D) ->
St = #live{lbl=D,res=gb_trees:empty()},
case check_liveness(R, Is, St) of
{killed,_} -> true;
+ {exit_not_used,_} -> true;
{_,_} -> false
end.
@@ -130,6 +131,7 @@ is_killed_at(R, Lbl, D) when is_integer(Lbl) ->
St0 = #live{lbl=D,res=gb_trees:empty()},
case check_liveness_at(R, Lbl, St0) of
{killed,_} -> true;
+ {exit_not_used,_} -> true;
{_,_} -> false
end.
@@ -146,6 +148,7 @@ is_not_used(R, Is, D) ->
St = #live{lbl=D,res=gb_trees:empty()},
case check_liveness(R, Is, St) of
{used,_} -> false;
+ {exit_not_used,_} -> false;
{_,_} -> true
end.
@@ -309,7 +312,7 @@ delete_live_annos([]) -> [].
combine_heap_needs(H1, H2) when is_integer(H1), is_integer(H2) ->
H1 + H2;
combine_heap_needs(H1, H2) ->
- combine_alloc_lists([H1,H2]).
+ {alloc,combine_alloc_lists([H1,H2])}.
%% anno_defs(Instructions) -> Instructions'
@@ -347,6 +350,9 @@ split_even(Rs) -> split_even(Rs, [], []).
%%
%% killed - Reg is assigned or killed by an allocation instruction.
%% not_used - the value of Reg is not used, but Reg must not be garbage
+%% exit_not_used - the value of Reg is not used, but must not be garbage
+%% because the stack will be scanned because an
+%% exit BIF will raise an exception
%% used - Reg is used
check_liveness(R, [{block,Blk}|Is], St0) ->
@@ -370,6 +376,8 @@ check_liveness(R, [{test,_,{f,Fail},As}|Is], St0) ->
case check_liveness_at(R, Fail, St0) of
{killed,St1} ->
check_liveness(R, Is, St1);
+ {exit_not_used,St1} ->
+ check_liveness(R, Is, St1);
{not_used,St1} ->
not_used(check_liveness(R, Is, St1));
{used,_}=Used ->
@@ -432,7 +440,7 @@ check_liveness(R, [{bs_init,_,_,Live,Ss,Dst}|Is], St) ->
false ->
if
R =:= Dst -> {killed,St};
- true -> check_liveness(R, Is, St)
+ true -> not_used(check_liveness(R, Is, St))
end
end
end;
@@ -466,7 +474,7 @@ check_liveness(R, [{call_ext,Live,_}=I|Is], St) ->
%% We must make sure we don't check beyond this
%% instruction or we will fall through into random
%% unrelated code and get stuck in a loop.
- {killed,St}
+ {exit_not_used,St}
end
end;
check_liveness(R, [{call_fun,Live}|Is], St) ->
@@ -631,6 +639,7 @@ check_liveness_at(R, Lbl, #live{lbl=Ll,res=ResMemorized}=St0) ->
{Res,St#live{res=gb_trees:insert(Lbl, Res, St#live.res)}}
end.
+not_used({exit_not_used,St}) -> {not_used,St};
not_used({killed,St}) -> {not_used,St};
not_used({_,_}=Res) -> Res.
@@ -659,13 +668,30 @@ check_liveness_block({x,X}=R, [{set,Ds,Ss,{alloc,Live,Op}}|Is], St0) ->
{killed,St0};
true ->
case check_liveness_block_1(R, Ss, Ds, Op, Is, St0) of
- {killed,St} -> {not_used,St};
{transparent,St} -> {alloc_used,St};
- {_,_}=Res -> Res
+ {_,_}=Res -> not_used(Res)
end
end;
-check_liveness_block({y,_}=R, [{set,Ds,Ss,{alloc,_Live,Op}}|Is], St) ->
- check_liveness_block_1(R, Ss, Ds, Op, Is, St);
+check_liveness_block({y,_}=R, [{set,Ds,Ss,{alloc,_Live,Op}}|Is], St0) ->
+ case check_liveness_block_1(R, Ss, Ds, Op, Is, St0) of
+ {transparent,St} -> {alloc_used,St};
+ {_,_}=Res -> not_used(Res)
+ end;
+check_liveness_block({y,_}=R, [{set,Ds,Ss,{try_catch,_,Op}}|Is], St0) ->
+ case Ds of
+ [R] ->
+ {killed,St0};
+ _ ->
+ case check_liveness_block_1(R, Ss, Ds, Op, Is, St0) of
+ {exit_not_used,St} ->
+ {used,St};
+ {transparent,St} ->
+ %% Conservatively assumed that it is used.
+ {used,St};
+ {_,_}=Res ->
+ Res
+ end
+ end;
check_liveness_block(R, [{set,Ds,Ss,Op}|Is], St) ->
check_liveness_block_1(R, Ss, Ds, Op, Is, St);
check_liveness_block(_, [], St) -> {transparent,St}.
@@ -681,6 +707,11 @@ check_liveness_block_1(R, Ss, Ds, Op, Is, St0) ->
true -> {killed,St};
false -> check_liveness_block(R, Is, St)
end;
+ {exit_not_used,St} ->
+ case member(R, Ds) of
+ true -> {exit_not_used,St};
+ false -> check_liveness_block(R, Is, St)
+ end;
{not_used,St} ->
not_used(case member(R, Ds) of
true -> {killed,St};