aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjörn Gustavsson <[email protected]>2017-12-01 10:38:20 +0100
committerBjörn Gustavsson <[email protected]>2017-12-06 10:05:47 +0100
commit2e5d6201bb044508eb8523da208caac459c8a124 (patch)
tree75b423df86dced30a80c70ec89226e904cafc08d
parent68b7cd0820362667fe77f0065a2ac06989580eb5 (diff)
downloadotp-2e5d6201bb044508eb8523da208caac459c8a124.tar.gz
otp-2e5d6201bb044508eb8523da208caac459c8a124.tar.bz2
otp-2e5d6201bb044508eb8523da208caac459c8a124.zip
v3_codegen: Avoid excessive stack frame allocation
A 'case' or 'if' that does not occur last in a function clause will always force a stack frame. The reasoning behind this is that in most uses of 'case' there will be a function call from within the 'case'. When there is a function call, the stack frame is needed both to save the continuation pointer and to save any X registers that will need to survive the call. When there is no function call from a 'case', the resulting stack frame is annoying. There will be register shuffling, and the existence of the stack frame may thwart many optimizations (for example, in beam_dead). Therefore, add an extra pass to v3_codegen to avoid creating a stack frame when not needed. https://bugs.erlang.org/browse/ERL-514
-rw-r--r--lib/compiler/src/v3_codegen.erl120
1 files changed, 117 insertions, 3 deletions
diff --git a/lib/compiler/src/v3_codegen.erl b/lib/compiler/src/v3_codegen.erl
index b222b25d7c..409adcb546 100644
--- a/lib/compiler/src/v3_codegen.erl
+++ b/lib/compiler/src/v3_codegen.erl
@@ -77,10 +77,15 @@ functions(Forms, AtomMod) ->
function(#k_fdef{anno=#k{a=Anno},func=Name,arity=Arity,
vars=As,body=Kb}, AtomMod, St0) ->
try
- %% Annotate kernel records with variable usage.
#k_match{} = Kb, %Assertion.
+
+ %% Try to suppress the stack frame unless it is
+ %% really needed.
+ Body0 = avoid_stack_frame(Kb),
+
+ %% Annotate kernel records with variable usage.
Vdb0 = init_vars(As),
- {Body,_,Vdb} = body(Kb, 1, Vdb0),
+ {Body,_,Vdb} = body(Body0, 1, Vdb0),
%% Generate the BEAM assembly code.
{Asm,EntryLabel,St} = cg_fun(Body, As, Vdb, AtomMod,
@@ -94,6 +99,112 @@ function(#k_fdef{anno=#k{a=Anno},func=Name,arity=Arity,
erlang:raise(Class, Error, Stack)
end.
+
+%% avoid_stack_frame(Kernel) -> Kernel'
+%% If possible, avoid setting up a stack frame. Functions
+%% that only do matching, calls to guard BIFs, and tail-recursive
+%% calls don't need a stack frame.
+
+avoid_stack_frame(#k_match{body=Body}=M) ->
+ try
+ M#k_match{body=avoid_stack_frame_1(Body)}
+ catch
+ impossible ->
+ M
+ end.
+
+avoid_stack_frame_1(#k_alt{first=First0,then=Then0}=Alt) ->
+ First = avoid_stack_frame_1(First0),
+ Then = avoid_stack_frame_1(Then0),
+ Alt#k_alt{first=First,then=Then};
+avoid_stack_frame_1(#k_bif{op=Op}=Bif) ->
+ case Op of
+ #k_internal{} ->
+ %% Most internal BIFs clobber the X registers.
+ throw(impossible);
+ _ ->
+ Bif
+ end;
+avoid_stack_frame_1(#k_break{anno=Anno,args=Args}) ->
+ #k_guard_break{anno=Anno,args=Args};
+avoid_stack_frame_1(#k_guard_break{}=Break) ->
+ Break;
+avoid_stack_frame_1(#k_enter{}=Enter) ->
+ %% Tail-recursive calls don't need a stack frame.
+ Enter;
+avoid_stack_frame_1(#k_guard{clauses=Cs0}=Guard) ->
+ Cs = avoid_stack_frame_list(Cs0),
+ Guard#k_guard{clauses=Cs};
+avoid_stack_frame_1(#k_guard_clause{guard=G0,body=B0}=C) ->
+ G = avoid_stack_frame_1(G0),
+ B = avoid_stack_frame_1(B0),
+ C#k_guard_clause{guard=G,body=B};
+avoid_stack_frame_1(#k_match{anno=A,vars=Vs,body=B0,ret=Ret}) ->
+ %% Use #k_guard_match{} instead to avoid saving the X registers
+ %% to the stack before matching.
+ B = avoid_stack_frame_1(B0),
+ #k_guard_match{anno=A,vars=Vs,body=B,ret=Ret};
+avoid_stack_frame_1(#k_guard_match{body=B0}=M) ->
+ B = avoid_stack_frame_1(B0),
+ M#k_guard_match{body=B};
+avoid_stack_frame_1(#k_protected{arg=Arg0}=Prot) ->
+ Arg = avoid_stack_frame_1(Arg0),
+ Prot#k_protected{arg=Arg};
+avoid_stack_frame_1(#k_put{}=Put) ->
+ Put;
+avoid_stack_frame_1(#k_return{}=Ret) ->
+ Ret;
+avoid_stack_frame_1(#k_select{var=#k_var{anno=Vanno},types=Types0}=Select) ->
+ case member(reuse_for_context, Vanno) of
+ false ->
+ Types = avoid_stack_frame_list(Types0),
+ Select#k_select{types=Types};
+ true ->
+ %% Including binary patterns that overwrite the register containing
+ %% the binary with the match context may not be safe. For example,
+ %% bs_match_SUITE:bin_tail_e/1 with inlining will be rejected by
+ %% beam_validator.
+ %%
+ %% Essentially the following code is produced:
+ %%
+ %% bs_match {x,0} => {x,0}
+ %% ...
+ %% bs_match {x,0} => {x,1} %% ILLEGAL
+ %%
+ %% A bs_match instruction will only accept a match context as the
+ %% source operand if the source and destination registers are the
+ %% the same (as in the first bs_match instruction above).
+ %% The second bs_match instruction is therefore illegal.
+ %%
+ %% This situation is avoided if there is a stack frame:
+ %%
+ %% move {x,0} => {y,0}
+ %% bs_match {x,0} => {x,0}
+ %% ...
+ %% bs_match {y,0} => {x,1} %% LEGAL
+ %%
+ throw(impossible)
+ end;
+avoid_stack_frame_1(#k_seq{arg=A0,body=B0}=Seq) ->
+ A = avoid_stack_frame_1(A0),
+ B = avoid_stack_frame_1(B0),
+ Seq#k_seq{arg=A,body=B};
+avoid_stack_frame_1(#k_test{}=Test) ->
+ Test;
+avoid_stack_frame_1(#k_type_clause{values=Values0}=TC) ->
+ Values = avoid_stack_frame_list(Values0),
+ TC#k_type_clause{values=Values};
+avoid_stack_frame_1(#k_val_clause{body=B0}=VC) ->
+ B = avoid_stack_frame_1(B0),
+ VC#k_val_clause{body=B};
+avoid_stack_frame_1(_Body) ->
+ throw(impossible).
+
+avoid_stack_frame_list([H|T]) ->
+ [avoid_stack_frame_1(H)|avoid_stack_frame_list(T)];
+avoid_stack_frame_list([]) -> [].
+
+
%% This pass creates beam format annotated with variable lifetime
%% information. Each thing is given an index and for each variable we
%% store the first and last index for its occurrence. The variable
@@ -487,7 +598,10 @@ match_cg(M, Rs, Le, Vdb, Bef, St0) ->
guard_match_cg(M, Rs, Le, Vdb, Bef, St0) ->
I = Le#l.i,
{B,St1} = new_label(St0),
- #cg{bfail=Fail} = St1,
+ Fail = case St0 of
+ #cg{bfail=0,ultimate_failure=Fail0} -> Fail0;
+ #cg{bfail=Fail0} -> Fail0
+ end,
{Mis,Aft,St2} = match_cg(M, Fail, Bef, St1#cg{break=B}),
%% Update the register descriptors for the return registers.
Reg = guard_match_regs(Aft#sr.reg, Rs),