aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjörn Gustavsson <[email protected]>2017-12-07 10:07:25 +0100
committerGitHub <[email protected]>2017-12-07 10:07:25 +0100
commit71bb60815630574bf710b8ce8a7d3b0e48b5a985 (patch)
treedd8bb2e46c8e82c6190233f2c84a8bbe7471466b
parentbe9f93b14e0fb9ff09db36abde62ae8099bf5bd0 (diff)
parentf33b4f5cf611472c631c30e8d6bbde83f2fb2058 (diff)
downloadotp-71bb60815630574bf710b8ce8a7d3b0e48b5a985.tar.gz
otp-71bb60815630574bf710b8ce8a7d3b0e48b5a985.tar.bz2
otp-71bb60815630574bf710b8ce8a7d3b0e48b5a985.zip
Merge pull request #1652 from bjorng/bjorn/compiler/fix-excessive-allocations/ERL-514
Avoid excessive stack frame allocation OTP-14808
-rw-r--r--lib/compiler/src/beam_dead.erl20
-rw-r--r--lib/compiler/src/core_lint.erl6
-rw-r--r--lib/compiler/src/sys_core_fold.erl10
-rw-r--r--lib/compiler/src/v3_codegen.erl120
-rw-r--r--lib/compiler/src/v3_core.erl4
-rw-r--r--lib/compiler/src/v3_kernel.erl17
-rw-r--r--lib/compiler/test/match_SUITE.erl74
-rw-r--r--lib/compiler/test/receive_SUITE.erl8
8 files changed, 236 insertions, 23 deletions
diff --git a/lib/compiler/src/beam_dead.erl b/lib/compiler/src/beam_dead.erl
index d379fdc4eb..1b152a2d6f 100644
--- a/lib/compiler/src/beam_dead.erl
+++ b/lib/compiler/src/beam_dead.erl
@@ -272,7 +272,8 @@ backward([{jump,{f,To0}},{move,Src,Reg}=Move|Is], D, Acc) ->
end;
backward([{jump,{f,To}}=J|[{bif,Op,{f,BifFail},Ops,Reg}|Is]=Is0], D, Acc) ->
try replace_comp_op(To, Reg, Op, Ops, D) of
- I -> backward(Is, D, I++Acc)
+ {Test,Jump} ->
+ backward([Jump,Test|Is], D, Acc)
catch
throw:not_possible ->
case To =:= BifFail of
@@ -446,7 +447,7 @@ prune_redundant([], _) -> [].
replace_comp_op(To, Reg, Op, Ops, D) ->
False = comp_op_find_shortcut(To, Reg, {atom,false}, D),
True = comp_op_find_shortcut(To, Reg, {atom,true}, D),
- [bif_to_test(Op, Ops, False),{jump,{f,True}}].
+ {bif_to_test(Op, Ops, False),{jump,{f,True}}}.
comp_op_find_shortcut(To0, Reg, Val, D) ->
case shortcut_select_label(To0, Reg, Val, D) of
@@ -483,15 +484,22 @@ not_possible() -> throw(not_possible).
%% F1: is_eq_exact F2 Reg Lit2 F1: is_eq_exact F2 Reg Lit2
%% L2: .... L2:
%%
-combine_eqs(To, [Reg,{Type,_}=Lit1]=Ops, D, [{label,L1}|_])
- when Type =:= atom; Type =:= integer ->
+combine_eqs(To, [Reg,{Type,_}=Lit1]=Ops, D, Acc)
+ when Type =:= atom; Type =:= integer ->
+ Next = case Acc of
+ [{label,Lbl}|_] -> Lbl;
+ [{jump,{f,Lbl}}|_] -> Lbl
+ end,
case beam_utils:code_at(To, D) of
[{test,is_eq_exact,{f,F2},[Reg,{Type,_}=Lit2]},
{label,L2}|_] when Lit1 =/= Lit2 ->
- {select,select_val,Reg,{f,F2},[Lit1,{f,L1},Lit2,{f,L2}]};
+ {select,select_val,Reg,{f,F2},[Lit1,{f,Next},Lit2,{f,L2}]};
+ [{test,is_eq_exact,{f,F2},[Reg,{Type,_}=Lit2]},
+ {jump,{f,L2}}|_] when Lit1 =/= Lit2 ->
+ {select,select_val,Reg,{f,F2},[Lit1,{f,Next},Lit2,{f,L2}]};
[{select,select_val,Reg,{f,F2},[{Type,_}|_]=List0}|_] ->
List = remove_from_list(Lit1, List0),
- {select,select_val,Reg,{f,F2},[Lit1,{f,L1}|List]};
+ {select,select_val,Reg,{f,F2},[Lit1,{f,Next}|List]};
_Is ->
{test,is_eq_exact,{f,To},Ops}
end;
diff --git a/lib/compiler/src/core_lint.erl b/lib/compiler/src/core_lint.erl
index 7d3513c0ba..6e2114be56 100644
--- a/lib/compiler/src/core_lint.erl
+++ b/lib/compiler/src/core_lint.erl
@@ -353,12 +353,6 @@ expr(#c_case{arg=Arg,clauses=Cs}, Def, Rt, St0) ->
Pc = case_patcount(Cs),
St1 = body(Arg, Def, Pc, St0),
clauses(Cs, Def, Pc, Rt, St1);
-expr(#c_receive{clauses=Cs,timeout=#c_literal{val=infinity},
- action=#c_literal{}},
- Def, Rt, St) ->
- %% If the timeout is 'infinity', the after code can never
- %% be reached. We don't care if the return count is wrong.
- clauses(Cs, Def, 1, Rt, St);
expr(#c_receive{clauses=Cs,timeout=T,action=A}, Def, Rt, St0) ->
St1 = expr(T, Def, 1, St0),
St2 = body(A, Def, Rt, St1),
diff --git a/lib/compiler/src/sys_core_fold.erl b/lib/compiler/src/sys_core_fold.erl
index df880ff784..15ced196a7 100644
--- a/lib/compiler/src/sys_core_fold.erl
+++ b/lib/compiler/src/sys_core_fold.erl
@@ -2622,9 +2622,13 @@ delay_build_expr_1(#c_receive{clauses=Cs0,
timeout=Timeout,
action=A0}=Rec, TypeSig) ->
Cs = delay_build_cs(Cs0, TypeSig),
- A = case Timeout of
- #c_literal{val=infinity} -> A0;
- _ -> delay_build_expr(A0, TypeSig)
+ A = case {Timeout,A0} of
+ {#c_literal{val=infinity},#c_literal{}} ->
+ {_Type,Arity} = TypeSig,
+ Es = lists:duplicate(Arity, A0),
+ core_lib:make_values(Es);
+ _ ->
+ delay_build_expr(A0, TypeSig)
end,
Rec#c_receive{clauses=Cs,action=A};
delay_build_expr_1(#c_seq{body=B0}=Seq, TypeSig) ->
diff --git a/lib/compiler/src/v3_codegen.erl b/lib/compiler/src/v3_codegen.erl
index b222b25d7c..409adcb546 100644
--- a/lib/compiler/src/v3_codegen.erl
+++ b/lib/compiler/src/v3_codegen.erl
@@ -77,10 +77,15 @@ functions(Forms, AtomMod) ->
function(#k_fdef{anno=#k{a=Anno},func=Name,arity=Arity,
vars=As,body=Kb}, AtomMod, St0) ->
try
- %% Annotate kernel records with variable usage.
#k_match{} = Kb, %Assertion.
+
+ %% Try to suppress the stack frame unless it is
+ %% really needed.
+ Body0 = avoid_stack_frame(Kb),
+
+ %% Annotate kernel records with variable usage.
Vdb0 = init_vars(As),
- {Body,_,Vdb} = body(Kb, 1, Vdb0),
+ {Body,_,Vdb} = body(Body0, 1, Vdb0),
%% Generate the BEAM assembly code.
{Asm,EntryLabel,St} = cg_fun(Body, As, Vdb, AtomMod,
@@ -94,6 +99,112 @@ function(#k_fdef{anno=#k{a=Anno},func=Name,arity=Arity,
erlang:raise(Class, Error, Stack)
end.
+
+%% avoid_stack_frame(Kernel) -> Kernel'
+%% If possible, avoid setting up a stack frame. Functions
+%% that only do matching, calls to guard BIFs, and tail-recursive
+%% calls don't need a stack frame.
+
+avoid_stack_frame(#k_match{body=Body}=M) ->
+ try
+ M#k_match{body=avoid_stack_frame_1(Body)}
+ catch
+ impossible ->
+ M
+ end.
+
+avoid_stack_frame_1(#k_alt{first=First0,then=Then0}=Alt) ->
+ First = avoid_stack_frame_1(First0),
+ Then = avoid_stack_frame_1(Then0),
+ Alt#k_alt{first=First,then=Then};
+avoid_stack_frame_1(#k_bif{op=Op}=Bif) ->
+ case Op of
+ #k_internal{} ->
+ %% Most internal BIFs clobber the X registers.
+ throw(impossible);
+ _ ->
+ Bif
+ end;
+avoid_stack_frame_1(#k_break{anno=Anno,args=Args}) ->
+ #k_guard_break{anno=Anno,args=Args};
+avoid_stack_frame_1(#k_guard_break{}=Break) ->
+ Break;
+avoid_stack_frame_1(#k_enter{}=Enter) ->
+ %% Tail-recursive calls don't need a stack frame.
+ Enter;
+avoid_stack_frame_1(#k_guard{clauses=Cs0}=Guard) ->
+ Cs = avoid_stack_frame_list(Cs0),
+ Guard#k_guard{clauses=Cs};
+avoid_stack_frame_1(#k_guard_clause{guard=G0,body=B0}=C) ->
+ G = avoid_stack_frame_1(G0),
+ B = avoid_stack_frame_1(B0),
+ C#k_guard_clause{guard=G,body=B};
+avoid_stack_frame_1(#k_match{anno=A,vars=Vs,body=B0,ret=Ret}) ->
+ %% Use #k_guard_match{} instead to avoid saving the X registers
+ %% to the stack before matching.
+ B = avoid_stack_frame_1(B0),
+ #k_guard_match{anno=A,vars=Vs,body=B,ret=Ret};
+avoid_stack_frame_1(#k_guard_match{body=B0}=M) ->
+ B = avoid_stack_frame_1(B0),
+ M#k_guard_match{body=B};
+avoid_stack_frame_1(#k_protected{arg=Arg0}=Prot) ->
+ Arg = avoid_stack_frame_1(Arg0),
+ Prot#k_protected{arg=Arg};
+avoid_stack_frame_1(#k_put{}=Put) ->
+ Put;
+avoid_stack_frame_1(#k_return{}=Ret) ->
+ Ret;
+avoid_stack_frame_1(#k_select{var=#k_var{anno=Vanno},types=Types0}=Select) ->
+ case member(reuse_for_context, Vanno) of
+ false ->
+ Types = avoid_stack_frame_list(Types0),
+ Select#k_select{types=Types};
+ true ->
+ %% Including binary patterns that overwrite the register containing
+ %% the binary with the match context may not be safe. For example,
+ %% bs_match_SUITE:bin_tail_e/1 with inlining will be rejected by
+ %% beam_validator.
+ %%
+ %% Essentially the following code is produced:
+ %%
+ %% bs_match {x,0} => {x,0}
+ %% ...
+ %% bs_match {x,0} => {x,1} %% ILLEGAL
+ %%
+ %% A bs_match instruction will only accept a match context as the
+ %% source operand if the source and destination registers are the
+ %% the same (as in the first bs_match instruction above).
+ %% The second bs_match instruction is therefore illegal.
+ %%
+ %% This situation is avoided if there is a stack frame:
+ %%
+ %% move {x,0} => {y,0}
+ %% bs_match {x,0} => {x,0}
+ %% ...
+ %% bs_match {y,0} => {x,1} %% LEGAL
+ %%
+ throw(impossible)
+ end;
+avoid_stack_frame_1(#k_seq{arg=A0,body=B0}=Seq) ->
+ A = avoid_stack_frame_1(A0),
+ B = avoid_stack_frame_1(B0),
+ Seq#k_seq{arg=A,body=B};
+avoid_stack_frame_1(#k_test{}=Test) ->
+ Test;
+avoid_stack_frame_1(#k_type_clause{values=Values0}=TC) ->
+ Values = avoid_stack_frame_list(Values0),
+ TC#k_type_clause{values=Values};
+avoid_stack_frame_1(#k_val_clause{body=B0}=VC) ->
+ B = avoid_stack_frame_1(B0),
+ VC#k_val_clause{body=B};
+avoid_stack_frame_1(_Body) ->
+ throw(impossible).
+
+avoid_stack_frame_list([H|T]) ->
+ [avoid_stack_frame_1(H)|avoid_stack_frame_list(T)];
+avoid_stack_frame_list([]) -> [].
+
+
%% This pass creates beam format annotated with variable lifetime
%% information. Each thing is given an index and for each variable we
%% store the first and last index for its occurrence. The variable
@@ -487,7 +598,10 @@ match_cg(M, Rs, Le, Vdb, Bef, St0) ->
guard_match_cg(M, Rs, Le, Vdb, Bef, St0) ->
I = Le#l.i,
{B,St1} = new_label(St0),
- #cg{bfail=Fail} = St1,
+ Fail = case St0 of
+ #cg{bfail=0,ultimate_failure=Fail0} -> Fail0;
+ #cg{bfail=Fail0} -> Fail0
+ end,
{Mis,Aft,St2} = match_cg(M, Fail, Bef, St1#cg{break=B}),
%% Update the register descriptors for the return registers.
Reg = guard_match_regs(Aft#sr.reg, Rs),
diff --git a/lib/compiler/src/v3_core.erl b/lib/compiler/src/v3_core.erl
index 20cb3343fb..66ddf6fcb9 100644
--- a/lib/compiler/src/v3_core.erl
+++ b/lib/compiler/src/v3_core.erl
@@ -2462,9 +2462,11 @@ cexpr(#icase{anno=A,args=Largs,clauses=Lcs,fc=Lfc}, As, St0) ->
cexpr(#ireceive1{anno=A,clauses=Lcs}, As, St0) ->
Exp = intersection(A#a.ns, As), %Exports
{Ccs,St1} = cclauses(Lcs, Exp, St0),
+ True = #c_literal{val=true},
+ Action = core_lib:make_values(lists:duplicate(1+length(Exp), True)),
{#c_receive{anno=A#a.anno,
clauses=Ccs,
- timeout=#c_literal{val=infinity},action=#c_literal{val=true}},
+ timeout=#c_literal{val=infinity},action=Action},
Exp,A#a.us,St1};
cexpr(#ireceive2{anno=A,clauses=Lcs,timeout=Lto,action=Les}, As, St0) ->
Exp = intersection(A#a.ns, As), %Exports
diff --git a/lib/compiler/src/v3_kernel.erl b/lib/compiler/src/v3_kernel.erl
index 3eea058153..23625b1f2e 100644
--- a/lib/compiler/src/v3_kernel.erl
+++ b/lib/compiler/src/v3_kernel.erl
@@ -108,6 +108,7 @@ copy_anno(Kdst, Ksrc) ->
-record(iclause, {anno=[],isub,osub,pats,guard,body}).
-record(ireceive_accept, {anno=[],arg}).
-record(ireceive_next, {anno=[],arg}).
+-record(ignored, {anno=[]}).
-type warning() :: term(). % XXX: REFINE
@@ -489,7 +490,7 @@ make_alt(First0, Then0) ->
Then1 = pre_seq(droplast(Then0), last(Then0)),
First2 = make_protected(First1),
Then2 = make_protected(Then1),
- Body = #k_atom{val=ignored},
+ Body = #ignored{},
First3 = #k_guard_clause{guard=First2,body=Body},
Then3 = #k_guard_clause{guard=Then2,body=Body},
First = #k_guard{clauses=[First3]},
@@ -2225,7 +2226,9 @@ ubody(E, return, St0) ->
{Ea,Pa,St1} = force_atomic(E, St0),
ubody(pre_seq(Pa, #ivalues{args=[Ea]}), return, St1)
end;
-ubody(E, {break,_Rs} = Break, St0) ->
+ubody(#ignored{}, {break,_} = Break, St) ->
+ ubody(#ivalues{args=[]}, Break, St);
+ubody(E, {break,[_]} = Break, St0) ->
%%ok = io:fwrite("ubody ~w:~p~n", [?LINE,{E,Br}]),
%% Exiting expressions need no trailing break.
case is_exit_expr(E) of
@@ -2233,6 +2236,16 @@ ubody(E, {break,_Rs} = Break, St0) ->
false ->
{Ea,Pa,St1} = force_atomic(E, St0),
ubody(pre_seq(Pa, #ivalues{args=[Ea]}), Break, St1)
+ end;
+ubody(E, {break,Rs}=Break, St0) ->
+ case is_exit_expr(E) of
+ true ->
+ uexpr(E, return, St0);
+ false ->
+ {Vs,St1} = new_vars(length(Rs), St0),
+ Iset = #iset{vars=Vs,arg=E},
+ PreSeq = pre_seq([Iset], #ivalues{args=Vs}),
+ ubody(PreSeq, Break, St1)
end.
iletrec_funs(#iletrec{defs=Fs}, St0) ->
diff --git a/lib/compiler/test/match_SUITE.erl b/lib/compiler/test/match_SUITE.erl
index 35d2e8e91a..4b26a8dcdc 100644
--- a/lib/compiler/test/match_SUITE.erl
+++ b/lib/compiler/test/match_SUITE.erl
@@ -24,7 +24,8 @@
pmatch/1,mixed/1,aliases/1,non_matching_aliases/1,
match_in_call/1,untuplify/1,shortcut_boolean/1,letify_guard/1,
selectify/1,deselectify/1,underscore/1,match_map/1,map_vars_used/1,
- coverage/1,grab_bag/1,literal_binary/1]).
+ coverage/1,grab_bag/1,literal_binary/1,
+ unary_op/1]).
-include_lib("common_test/include/ct.hrl").
@@ -40,7 +41,7 @@ groups() ->
match_in_call,untuplify,
shortcut_boolean,letify_guard,selectify,deselectify,
underscore,match_map,map_vars_used,coverage,
- grab_bag,literal_binary]}].
+ grab_bag,literal_binary,unary_op]}].
init_per_suite(Config) ->
@@ -662,5 +663,74 @@ literal_binary_match(_, <<"x">>) -> 2;
literal_binary_match(_, <<"y">>) -> 3;
literal_binary_match(_, _) -> fail.
+unary_op(Config) ->
+ %% ERL-514. This test case only verifies that the code
+ %% calculates the correct result, not that the generated
+ %% code is optimial.
+
+ {non_associative,30} = unary_op_1('&'),
+ {non_associative,300} = unary_op_1('^'),
+ {non_associative,300} = unary_op_1('not'),
+ {non_associative,300} = unary_op_1('+'),
+ {non_associative,300} = unary_op_1('-'),
+ {non_associative,300} = unary_op_1('~~~'),
+ {non_associative,300} = unary_op_1('!'),
+ {non_associative,320} = unary_op_1('@'),
+
+ error = unary_op_1(Config),
+ error = unary_op_1(abc),
+ error = unary_op_1(42),
+
+ ok.
+
+unary_op_1(Vop@1) ->
+ %% If all optimizations are working as they should, there should
+ %% be no stack frame and all '=:=' tests should be coalesced into
+ %% a single select_val instruction.
+
+ case Vop@1 =:= '&' of
+ true ->
+ {non_associative,30};
+ false ->
+ case
+ case Vop@1 =:= '^' of
+ true ->
+ true;
+ false ->
+ case Vop@1 =:= 'not' of
+ true ->
+ true;
+ false ->
+ case Vop@1 =:= '+' of
+ true ->
+ true;
+ false ->
+ case Vop@1 =:= '-' of
+ true ->
+ true;
+ false ->
+ case Vop@1 =:= '~~~' of
+ true ->
+ true;
+ false ->
+ Vop@1 =:= '!'
+ end
+ end
+ end
+ end
+ end
+ of
+ true ->
+ {non_associative,300};
+ false ->
+ case Vop@1 =:= '@' of
+ true ->
+ {non_associative,320};
+ false ->
+ error
+ end
+ end
+ end.
+
id(I) -> I.
diff --git a/lib/compiler/test/receive_SUITE.erl b/lib/compiler/test/receive_SUITE.erl
index 8304672558..3001bb421b 100644
--- a/lib/compiler/test/receive_SUITE.erl
+++ b/lib/compiler/test/receive_SUITE.erl
@@ -265,6 +265,10 @@ export(Config) when is_list(Config) ->
self() ! {result,Ref,42},
42 = export_1(Ref),
{error,timeout} = export_1(Ref),
+
+ self() ! {result,Ref},
+ {ok,Ref} = export_2(),
+
ok.
export_1(Reference) ->
@@ -281,6 +285,10 @@ export_1(Reference) ->
id({build,self()}),
Result.
+export_2() ->
+ receive {result,Result} -> ok end,
+ {ok,Result}.
+
wait(Config) when is_list(Config) ->
self() ! <<42>>,
<<42>> = wait_1(r, 1, 2),