diff options
author | Björn Gustavsson <[email protected]> | 2017-12-07 10:07:25 +0100 |
---|---|---|
committer | GitHub <[email protected]> | 2017-12-07 10:07:25 +0100 |
commit | 71bb60815630574bf710b8ce8a7d3b0e48b5a985 (patch) | |
tree | dd8bb2e46c8e82c6190233f2c84a8bbe7471466b /lib/compiler | |
parent | be9f93b14e0fb9ff09db36abde62ae8099bf5bd0 (diff) | |
parent | f33b4f5cf611472c631c30e8d6bbde83f2fb2058 (diff) | |
download | otp-71bb60815630574bf710b8ce8a7d3b0e48b5a985.tar.gz otp-71bb60815630574bf710b8ce8a7d3b0e48b5a985.tar.bz2 otp-71bb60815630574bf710b8ce8a7d3b0e48b5a985.zip |
Merge pull request #1652 from bjorng/bjorn/compiler/fix-excessive-allocations/ERL-514
Avoid excessive stack frame allocation
OTP-14808
Diffstat (limited to 'lib/compiler')
-rw-r--r-- | lib/compiler/src/beam_dead.erl | 20 | ||||
-rw-r--r-- | lib/compiler/src/core_lint.erl | 6 | ||||
-rw-r--r-- | lib/compiler/src/sys_core_fold.erl | 10 | ||||
-rw-r--r-- | lib/compiler/src/v3_codegen.erl | 120 | ||||
-rw-r--r-- | lib/compiler/src/v3_core.erl | 4 | ||||
-rw-r--r-- | lib/compiler/src/v3_kernel.erl | 17 | ||||
-rw-r--r-- | lib/compiler/test/match_SUITE.erl | 74 | ||||
-rw-r--r-- | lib/compiler/test/receive_SUITE.erl | 8 |
8 files changed, 236 insertions, 23 deletions
diff --git a/lib/compiler/src/beam_dead.erl b/lib/compiler/src/beam_dead.erl index d379fdc4eb..1b152a2d6f 100644 --- a/lib/compiler/src/beam_dead.erl +++ b/lib/compiler/src/beam_dead.erl @@ -272,7 +272,8 @@ backward([{jump,{f,To0}},{move,Src,Reg}=Move|Is], D, Acc) -> end; backward([{jump,{f,To}}=J|[{bif,Op,{f,BifFail},Ops,Reg}|Is]=Is0], D, Acc) -> try replace_comp_op(To, Reg, Op, Ops, D) of - I -> backward(Is, D, I++Acc) + {Test,Jump} -> + backward([Jump,Test|Is], D, Acc) catch throw:not_possible -> case To =:= BifFail of @@ -446,7 +447,7 @@ prune_redundant([], _) -> []. replace_comp_op(To, Reg, Op, Ops, D) -> False = comp_op_find_shortcut(To, Reg, {atom,false}, D), True = comp_op_find_shortcut(To, Reg, {atom,true}, D), - [bif_to_test(Op, Ops, False),{jump,{f,True}}]. + {bif_to_test(Op, Ops, False),{jump,{f,True}}}. comp_op_find_shortcut(To0, Reg, Val, D) -> case shortcut_select_label(To0, Reg, Val, D) of @@ -483,15 +484,22 @@ not_possible() -> throw(not_possible). %% F1: is_eq_exact F2 Reg Lit2 F1: is_eq_exact F2 Reg Lit2 %% L2: .... L2: %% -combine_eqs(To, [Reg,{Type,_}=Lit1]=Ops, D, [{label,L1}|_]) - when Type =:= atom; Type =:= integer -> +combine_eqs(To, [Reg,{Type,_}=Lit1]=Ops, D, Acc) + when Type =:= atom; Type =:= integer -> + Next = case Acc of + [{label,Lbl}|_] -> Lbl; + [{jump,{f,Lbl}}|_] -> Lbl + end, case beam_utils:code_at(To, D) of [{test,is_eq_exact,{f,F2},[Reg,{Type,_}=Lit2]}, {label,L2}|_] when Lit1 =/= Lit2 -> - {select,select_val,Reg,{f,F2},[Lit1,{f,L1},Lit2,{f,L2}]}; + {select,select_val,Reg,{f,F2},[Lit1,{f,Next},Lit2,{f,L2}]}; + [{test,is_eq_exact,{f,F2},[Reg,{Type,_}=Lit2]}, + {jump,{f,L2}}|_] when Lit1 =/= Lit2 -> + {select,select_val,Reg,{f,F2},[Lit1,{f,Next},Lit2,{f,L2}]}; [{select,select_val,Reg,{f,F2},[{Type,_}|_]=List0}|_] -> List = remove_from_list(Lit1, List0), - {select,select_val,Reg,{f,F2},[Lit1,{f,L1}|List]}; + {select,select_val,Reg,{f,F2},[Lit1,{f,Next}|List]}; _Is -> {test,is_eq_exact,{f,To},Ops} end; diff --git a/lib/compiler/src/core_lint.erl b/lib/compiler/src/core_lint.erl index 7d3513c0ba..6e2114be56 100644 --- a/lib/compiler/src/core_lint.erl +++ b/lib/compiler/src/core_lint.erl @@ -353,12 +353,6 @@ expr(#c_case{arg=Arg,clauses=Cs}, Def, Rt, St0) -> Pc = case_patcount(Cs), St1 = body(Arg, Def, Pc, St0), clauses(Cs, Def, Pc, Rt, St1); -expr(#c_receive{clauses=Cs,timeout=#c_literal{val=infinity}, - action=#c_literal{}}, - Def, Rt, St) -> - %% If the timeout is 'infinity', the after code can never - %% be reached. We don't care if the return count is wrong. - clauses(Cs, Def, 1, Rt, St); expr(#c_receive{clauses=Cs,timeout=T,action=A}, Def, Rt, St0) -> St1 = expr(T, Def, 1, St0), St2 = body(A, Def, Rt, St1), diff --git a/lib/compiler/src/sys_core_fold.erl b/lib/compiler/src/sys_core_fold.erl index df880ff784..15ced196a7 100644 --- a/lib/compiler/src/sys_core_fold.erl +++ b/lib/compiler/src/sys_core_fold.erl @@ -2622,9 +2622,13 @@ delay_build_expr_1(#c_receive{clauses=Cs0, timeout=Timeout, action=A0}=Rec, TypeSig) -> Cs = delay_build_cs(Cs0, TypeSig), - A = case Timeout of - #c_literal{val=infinity} -> A0; - _ -> delay_build_expr(A0, TypeSig) + A = case {Timeout,A0} of + {#c_literal{val=infinity},#c_literal{}} -> + {_Type,Arity} = TypeSig, + Es = lists:duplicate(Arity, A0), + core_lib:make_values(Es); + _ -> + delay_build_expr(A0, TypeSig) end, Rec#c_receive{clauses=Cs,action=A}; delay_build_expr_1(#c_seq{body=B0}=Seq, TypeSig) -> diff --git a/lib/compiler/src/v3_codegen.erl b/lib/compiler/src/v3_codegen.erl index b222b25d7c..409adcb546 100644 --- a/lib/compiler/src/v3_codegen.erl +++ b/lib/compiler/src/v3_codegen.erl @@ -77,10 +77,15 @@ functions(Forms, AtomMod) -> function(#k_fdef{anno=#k{a=Anno},func=Name,arity=Arity, vars=As,body=Kb}, AtomMod, St0) -> try - %% Annotate kernel records with variable usage. #k_match{} = Kb, %Assertion. + + %% Try to suppress the stack frame unless it is + %% really needed. + Body0 = avoid_stack_frame(Kb), + + %% Annotate kernel records with variable usage. Vdb0 = init_vars(As), - {Body,_,Vdb} = body(Kb, 1, Vdb0), + {Body,_,Vdb} = body(Body0, 1, Vdb0), %% Generate the BEAM assembly code. {Asm,EntryLabel,St} = cg_fun(Body, As, Vdb, AtomMod, @@ -94,6 +99,112 @@ function(#k_fdef{anno=#k{a=Anno},func=Name,arity=Arity, erlang:raise(Class, Error, Stack) end. + +%% avoid_stack_frame(Kernel) -> Kernel' +%% If possible, avoid setting up a stack frame. Functions +%% that only do matching, calls to guard BIFs, and tail-recursive +%% calls don't need a stack frame. + +avoid_stack_frame(#k_match{body=Body}=M) -> + try + M#k_match{body=avoid_stack_frame_1(Body)} + catch + impossible -> + M + end. + +avoid_stack_frame_1(#k_alt{first=First0,then=Then0}=Alt) -> + First = avoid_stack_frame_1(First0), + Then = avoid_stack_frame_1(Then0), + Alt#k_alt{first=First,then=Then}; +avoid_stack_frame_1(#k_bif{op=Op}=Bif) -> + case Op of + #k_internal{} -> + %% Most internal BIFs clobber the X registers. + throw(impossible); + _ -> + Bif + end; +avoid_stack_frame_1(#k_break{anno=Anno,args=Args}) -> + #k_guard_break{anno=Anno,args=Args}; +avoid_stack_frame_1(#k_guard_break{}=Break) -> + Break; +avoid_stack_frame_1(#k_enter{}=Enter) -> + %% Tail-recursive calls don't need a stack frame. + Enter; +avoid_stack_frame_1(#k_guard{clauses=Cs0}=Guard) -> + Cs = avoid_stack_frame_list(Cs0), + Guard#k_guard{clauses=Cs}; +avoid_stack_frame_1(#k_guard_clause{guard=G0,body=B0}=C) -> + G = avoid_stack_frame_1(G0), + B = avoid_stack_frame_1(B0), + C#k_guard_clause{guard=G,body=B}; +avoid_stack_frame_1(#k_match{anno=A,vars=Vs,body=B0,ret=Ret}) -> + %% Use #k_guard_match{} instead to avoid saving the X registers + %% to the stack before matching. + B = avoid_stack_frame_1(B0), + #k_guard_match{anno=A,vars=Vs,body=B,ret=Ret}; +avoid_stack_frame_1(#k_guard_match{body=B0}=M) -> + B = avoid_stack_frame_1(B0), + M#k_guard_match{body=B}; +avoid_stack_frame_1(#k_protected{arg=Arg0}=Prot) -> + Arg = avoid_stack_frame_1(Arg0), + Prot#k_protected{arg=Arg}; +avoid_stack_frame_1(#k_put{}=Put) -> + Put; +avoid_stack_frame_1(#k_return{}=Ret) -> + Ret; +avoid_stack_frame_1(#k_select{var=#k_var{anno=Vanno},types=Types0}=Select) -> + case member(reuse_for_context, Vanno) of + false -> + Types = avoid_stack_frame_list(Types0), + Select#k_select{types=Types}; + true -> + %% Including binary patterns that overwrite the register containing + %% the binary with the match context may not be safe. For example, + %% bs_match_SUITE:bin_tail_e/1 with inlining will be rejected by + %% beam_validator. + %% + %% Essentially the following code is produced: + %% + %% bs_match {x,0} => {x,0} + %% ... + %% bs_match {x,0} => {x,1} %% ILLEGAL + %% + %% A bs_match instruction will only accept a match context as the + %% source operand if the source and destination registers are the + %% the same (as in the first bs_match instruction above). + %% The second bs_match instruction is therefore illegal. + %% + %% This situation is avoided if there is a stack frame: + %% + %% move {x,0} => {y,0} + %% bs_match {x,0} => {x,0} + %% ... + %% bs_match {y,0} => {x,1} %% LEGAL + %% + throw(impossible) + end; +avoid_stack_frame_1(#k_seq{arg=A0,body=B0}=Seq) -> + A = avoid_stack_frame_1(A0), + B = avoid_stack_frame_1(B0), + Seq#k_seq{arg=A,body=B}; +avoid_stack_frame_1(#k_test{}=Test) -> + Test; +avoid_stack_frame_1(#k_type_clause{values=Values0}=TC) -> + Values = avoid_stack_frame_list(Values0), + TC#k_type_clause{values=Values}; +avoid_stack_frame_1(#k_val_clause{body=B0}=VC) -> + B = avoid_stack_frame_1(B0), + VC#k_val_clause{body=B}; +avoid_stack_frame_1(_Body) -> + throw(impossible). + +avoid_stack_frame_list([H|T]) -> + [avoid_stack_frame_1(H)|avoid_stack_frame_list(T)]; +avoid_stack_frame_list([]) -> []. + + %% This pass creates beam format annotated with variable lifetime %% information. Each thing is given an index and for each variable we %% store the first and last index for its occurrence. The variable @@ -487,7 +598,10 @@ match_cg(M, Rs, Le, Vdb, Bef, St0) -> guard_match_cg(M, Rs, Le, Vdb, Bef, St0) -> I = Le#l.i, {B,St1} = new_label(St0), - #cg{bfail=Fail} = St1, + Fail = case St0 of + #cg{bfail=0,ultimate_failure=Fail0} -> Fail0; + #cg{bfail=Fail0} -> Fail0 + end, {Mis,Aft,St2} = match_cg(M, Fail, Bef, St1#cg{break=B}), %% Update the register descriptors for the return registers. Reg = guard_match_regs(Aft#sr.reg, Rs), diff --git a/lib/compiler/src/v3_core.erl b/lib/compiler/src/v3_core.erl index 20cb3343fb..66ddf6fcb9 100644 --- a/lib/compiler/src/v3_core.erl +++ b/lib/compiler/src/v3_core.erl @@ -2462,9 +2462,11 @@ cexpr(#icase{anno=A,args=Largs,clauses=Lcs,fc=Lfc}, As, St0) -> cexpr(#ireceive1{anno=A,clauses=Lcs}, As, St0) -> Exp = intersection(A#a.ns, As), %Exports {Ccs,St1} = cclauses(Lcs, Exp, St0), + True = #c_literal{val=true}, + Action = core_lib:make_values(lists:duplicate(1+length(Exp), True)), {#c_receive{anno=A#a.anno, clauses=Ccs, - timeout=#c_literal{val=infinity},action=#c_literal{val=true}}, + timeout=#c_literal{val=infinity},action=Action}, Exp,A#a.us,St1}; cexpr(#ireceive2{anno=A,clauses=Lcs,timeout=Lto,action=Les}, As, St0) -> Exp = intersection(A#a.ns, As), %Exports diff --git a/lib/compiler/src/v3_kernel.erl b/lib/compiler/src/v3_kernel.erl index 3eea058153..23625b1f2e 100644 --- a/lib/compiler/src/v3_kernel.erl +++ b/lib/compiler/src/v3_kernel.erl @@ -108,6 +108,7 @@ copy_anno(Kdst, Ksrc) -> -record(iclause, {anno=[],isub,osub,pats,guard,body}). -record(ireceive_accept, {anno=[],arg}). -record(ireceive_next, {anno=[],arg}). +-record(ignored, {anno=[]}). -type warning() :: term(). % XXX: REFINE @@ -489,7 +490,7 @@ make_alt(First0, Then0) -> Then1 = pre_seq(droplast(Then0), last(Then0)), First2 = make_protected(First1), Then2 = make_protected(Then1), - Body = #k_atom{val=ignored}, + Body = #ignored{}, First3 = #k_guard_clause{guard=First2,body=Body}, Then3 = #k_guard_clause{guard=Then2,body=Body}, First = #k_guard{clauses=[First3]}, @@ -2225,7 +2226,9 @@ ubody(E, return, St0) -> {Ea,Pa,St1} = force_atomic(E, St0), ubody(pre_seq(Pa, #ivalues{args=[Ea]}), return, St1) end; -ubody(E, {break,_Rs} = Break, St0) -> +ubody(#ignored{}, {break,_} = Break, St) -> + ubody(#ivalues{args=[]}, Break, St); +ubody(E, {break,[_]} = Break, St0) -> %%ok = io:fwrite("ubody ~w:~p~n", [?LINE,{E,Br}]), %% Exiting expressions need no trailing break. case is_exit_expr(E) of @@ -2233,6 +2236,16 @@ ubody(E, {break,_Rs} = Break, St0) -> false -> {Ea,Pa,St1} = force_atomic(E, St0), ubody(pre_seq(Pa, #ivalues{args=[Ea]}), Break, St1) + end; +ubody(E, {break,Rs}=Break, St0) -> + case is_exit_expr(E) of + true -> + uexpr(E, return, St0); + false -> + {Vs,St1} = new_vars(length(Rs), St0), + Iset = #iset{vars=Vs,arg=E}, + PreSeq = pre_seq([Iset], #ivalues{args=Vs}), + ubody(PreSeq, Break, St1) end. iletrec_funs(#iletrec{defs=Fs}, St0) -> diff --git a/lib/compiler/test/match_SUITE.erl b/lib/compiler/test/match_SUITE.erl index 35d2e8e91a..4b26a8dcdc 100644 --- a/lib/compiler/test/match_SUITE.erl +++ b/lib/compiler/test/match_SUITE.erl @@ -24,7 +24,8 @@ pmatch/1,mixed/1,aliases/1,non_matching_aliases/1, match_in_call/1,untuplify/1,shortcut_boolean/1,letify_guard/1, selectify/1,deselectify/1,underscore/1,match_map/1,map_vars_used/1, - coverage/1,grab_bag/1,literal_binary/1]). + coverage/1,grab_bag/1,literal_binary/1, + unary_op/1]). -include_lib("common_test/include/ct.hrl"). @@ -40,7 +41,7 @@ groups() -> match_in_call,untuplify, shortcut_boolean,letify_guard,selectify,deselectify, underscore,match_map,map_vars_used,coverage, - grab_bag,literal_binary]}]. + grab_bag,literal_binary,unary_op]}]. init_per_suite(Config) -> @@ -662,5 +663,74 @@ literal_binary_match(_, <<"x">>) -> 2; literal_binary_match(_, <<"y">>) -> 3; literal_binary_match(_, _) -> fail. +unary_op(Config) -> + %% ERL-514. This test case only verifies that the code + %% calculates the correct result, not that the generated + %% code is optimial. + + {non_associative,30} = unary_op_1('&'), + {non_associative,300} = unary_op_1('^'), + {non_associative,300} = unary_op_1('not'), + {non_associative,300} = unary_op_1('+'), + {non_associative,300} = unary_op_1('-'), + {non_associative,300} = unary_op_1('~~~'), + {non_associative,300} = unary_op_1('!'), + {non_associative,320} = unary_op_1('@'), + + error = unary_op_1(Config), + error = unary_op_1(abc), + error = unary_op_1(42), + + ok. + +unary_op_1(Vop@1) -> + %% If all optimizations are working as they should, there should + %% be no stack frame and all '=:=' tests should be coalesced into + %% a single select_val instruction. + + case Vop@1 =:= '&' of + true -> + {non_associative,30}; + false -> + case + case Vop@1 =:= '^' of + true -> + true; + false -> + case Vop@1 =:= 'not' of + true -> + true; + false -> + case Vop@1 =:= '+' of + true -> + true; + false -> + case Vop@1 =:= '-' of + true -> + true; + false -> + case Vop@1 =:= '~~~' of + true -> + true; + false -> + Vop@1 =:= '!' + end + end + end + end + end + of + true -> + {non_associative,300}; + false -> + case Vop@1 =:= '@' of + true -> + {non_associative,320}; + false -> + error + end + end + end. + id(I) -> I. diff --git a/lib/compiler/test/receive_SUITE.erl b/lib/compiler/test/receive_SUITE.erl index 8304672558..3001bb421b 100644 --- a/lib/compiler/test/receive_SUITE.erl +++ b/lib/compiler/test/receive_SUITE.erl @@ -265,6 +265,10 @@ export(Config) when is_list(Config) -> self() ! {result,Ref,42}, 42 = export_1(Ref), {error,timeout} = export_1(Ref), + + self() ! {result,Ref}, + {ok,Ref} = export_2(), + ok. export_1(Reference) -> @@ -281,6 +285,10 @@ export_1(Reference) -> id({build,self()}), Result. +export_2() -> + receive {result,Result} -> ok end, + {ok,Result}. + wait(Config) when is_list(Config) -> self() ! <<42>>, <<42>> = wait_1(r, 1, 2), |