diff options
author | Björn Gustavsson <[email protected]> | 2017-12-01 10:38:20 +0100 |
---|---|---|
committer | Björn Gustavsson <[email protected]> | 2017-12-06 10:05:47 +0100 |
commit | 2e5d6201bb044508eb8523da208caac459c8a124 (patch) | |
tree | 75b423df86dced30a80c70ec89226e904cafc08d | |
parent | 68b7cd0820362667fe77f0065a2ac06989580eb5 (diff) | |
download | otp-2e5d6201bb044508eb8523da208caac459c8a124.tar.gz otp-2e5d6201bb044508eb8523da208caac459c8a124.tar.bz2 otp-2e5d6201bb044508eb8523da208caac459c8a124.zip |
v3_codegen: Avoid excessive stack frame allocation
A 'case' or 'if' that does not occur last in a function clause will
always force a stack frame. The reasoning behind this is that in most
uses of 'case' there will be a function call from within the
'case'. When there is a function call, the stack frame is needed both
to save the continuation pointer and to save any X registers that will
need to survive the call.
When there is no function call from a 'case', the resulting stack
frame is annoying. There will be register shuffling, and the existence
of the stack frame may thwart many optimizations (for example, in
beam_dead).
Therefore, add an extra pass to v3_codegen to avoid creating a
stack frame when not needed.
https://bugs.erlang.org/browse/ERL-514
-rw-r--r-- | lib/compiler/src/v3_codegen.erl | 120 |
1 files changed, 117 insertions, 3 deletions
diff --git a/lib/compiler/src/v3_codegen.erl b/lib/compiler/src/v3_codegen.erl index b222b25d7c..409adcb546 100644 --- a/lib/compiler/src/v3_codegen.erl +++ b/lib/compiler/src/v3_codegen.erl @@ -77,10 +77,15 @@ functions(Forms, AtomMod) -> function(#k_fdef{anno=#k{a=Anno},func=Name,arity=Arity, vars=As,body=Kb}, AtomMod, St0) -> try - %% Annotate kernel records with variable usage. #k_match{} = Kb, %Assertion. + + %% Try to suppress the stack frame unless it is + %% really needed. + Body0 = avoid_stack_frame(Kb), + + %% Annotate kernel records with variable usage. Vdb0 = init_vars(As), - {Body,_,Vdb} = body(Kb, 1, Vdb0), + {Body,_,Vdb} = body(Body0, 1, Vdb0), %% Generate the BEAM assembly code. {Asm,EntryLabel,St} = cg_fun(Body, As, Vdb, AtomMod, @@ -94,6 +99,112 @@ function(#k_fdef{anno=#k{a=Anno},func=Name,arity=Arity, erlang:raise(Class, Error, Stack) end. + +%% avoid_stack_frame(Kernel) -> Kernel' +%% If possible, avoid setting up a stack frame. Functions +%% that only do matching, calls to guard BIFs, and tail-recursive +%% calls don't need a stack frame. + +avoid_stack_frame(#k_match{body=Body}=M) -> + try + M#k_match{body=avoid_stack_frame_1(Body)} + catch + impossible -> + M + end. + +avoid_stack_frame_1(#k_alt{first=First0,then=Then0}=Alt) -> + First = avoid_stack_frame_1(First0), + Then = avoid_stack_frame_1(Then0), + Alt#k_alt{first=First,then=Then}; +avoid_stack_frame_1(#k_bif{op=Op}=Bif) -> + case Op of + #k_internal{} -> + %% Most internal BIFs clobber the X registers. + throw(impossible); + _ -> + Bif + end; +avoid_stack_frame_1(#k_break{anno=Anno,args=Args}) -> + #k_guard_break{anno=Anno,args=Args}; +avoid_stack_frame_1(#k_guard_break{}=Break) -> + Break; +avoid_stack_frame_1(#k_enter{}=Enter) -> + %% Tail-recursive calls don't need a stack frame. + Enter; +avoid_stack_frame_1(#k_guard{clauses=Cs0}=Guard) -> + Cs = avoid_stack_frame_list(Cs0), + Guard#k_guard{clauses=Cs}; +avoid_stack_frame_1(#k_guard_clause{guard=G0,body=B0}=C) -> + G = avoid_stack_frame_1(G0), + B = avoid_stack_frame_1(B0), + C#k_guard_clause{guard=G,body=B}; +avoid_stack_frame_1(#k_match{anno=A,vars=Vs,body=B0,ret=Ret}) -> + %% Use #k_guard_match{} instead to avoid saving the X registers + %% to the stack before matching. + B = avoid_stack_frame_1(B0), + #k_guard_match{anno=A,vars=Vs,body=B,ret=Ret}; +avoid_stack_frame_1(#k_guard_match{body=B0}=M) -> + B = avoid_stack_frame_1(B0), + M#k_guard_match{body=B}; +avoid_stack_frame_1(#k_protected{arg=Arg0}=Prot) -> + Arg = avoid_stack_frame_1(Arg0), + Prot#k_protected{arg=Arg}; +avoid_stack_frame_1(#k_put{}=Put) -> + Put; +avoid_stack_frame_1(#k_return{}=Ret) -> + Ret; +avoid_stack_frame_1(#k_select{var=#k_var{anno=Vanno},types=Types0}=Select) -> + case member(reuse_for_context, Vanno) of + false -> + Types = avoid_stack_frame_list(Types0), + Select#k_select{types=Types}; + true -> + %% Including binary patterns that overwrite the register containing + %% the binary with the match context may not be safe. For example, + %% bs_match_SUITE:bin_tail_e/1 with inlining will be rejected by + %% beam_validator. + %% + %% Essentially the following code is produced: + %% + %% bs_match {x,0} => {x,0} + %% ... + %% bs_match {x,0} => {x,1} %% ILLEGAL + %% + %% A bs_match instruction will only accept a match context as the + %% source operand if the source and destination registers are the + %% the same (as in the first bs_match instruction above). + %% The second bs_match instruction is therefore illegal. + %% + %% This situation is avoided if there is a stack frame: + %% + %% move {x,0} => {y,0} + %% bs_match {x,0} => {x,0} + %% ... + %% bs_match {y,0} => {x,1} %% LEGAL + %% + throw(impossible) + end; +avoid_stack_frame_1(#k_seq{arg=A0,body=B0}=Seq) -> + A = avoid_stack_frame_1(A0), + B = avoid_stack_frame_1(B0), + Seq#k_seq{arg=A,body=B}; +avoid_stack_frame_1(#k_test{}=Test) -> + Test; +avoid_stack_frame_1(#k_type_clause{values=Values0}=TC) -> + Values = avoid_stack_frame_list(Values0), + TC#k_type_clause{values=Values}; +avoid_stack_frame_1(#k_val_clause{body=B0}=VC) -> + B = avoid_stack_frame_1(B0), + VC#k_val_clause{body=B}; +avoid_stack_frame_1(_Body) -> + throw(impossible). + +avoid_stack_frame_list([H|T]) -> + [avoid_stack_frame_1(H)|avoid_stack_frame_list(T)]; +avoid_stack_frame_list([]) -> []. + + %% This pass creates beam format annotated with variable lifetime %% information. Each thing is given an index and for each variable we %% store the first and last index for its occurrence. The variable @@ -487,7 +598,10 @@ match_cg(M, Rs, Le, Vdb, Bef, St0) -> guard_match_cg(M, Rs, Le, Vdb, Bef, St0) -> I = Le#l.i, {B,St1} = new_label(St0), - #cg{bfail=Fail} = St1, + Fail = case St0 of + #cg{bfail=0,ultimate_failure=Fail0} -> Fail0; + #cg{bfail=Fail0} -> Fail0 + end, {Mis,Aft,St2} = match_cg(M, Fail, Bef, St1#cg{break=B}), %% Update the register descriptors for the return registers. Reg = guard_match_regs(Aft#sr.reg, Rs), |