From 44ee726f87130ef65af350d00300384c36595b2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 9 May 2018 15:29:33 +0200 Subject: v3_core: Number argument variables in ascending order Nicer to read and less confusion. --- lib/compiler/src/v3_core.erl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib/compiler') diff --git a/lib/compiler/src/v3_core.erl b/lib/compiler/src/v3_core.erl index 3b746ab5bf..5aaf5bfd51 100644 --- a/lib/compiler/src/v3_core.erl +++ b/lib/compiler/src/v3_core.erl @@ -228,7 +228,8 @@ function({function,_,Name,Arity,Cs0}, Ws0, File, Opts) -> body(Cs0, Name, Arity, St0) -> Anno = lineno_anno(element(2, hd(Cs0)), St0), - {Args,St1} = new_vars(Anno, Arity, St0), + {Args0,St1} = new_vars(Anno, Arity, St0), + Args = reverse(Args0), %Nicer order case clauses(Cs0, St1) of {Cs1,[],St2} -> {Ps,St3} = new_vars(Arity, St2), %Need new variables here -- cgit v1.2.3 From ec2f6ef0d5170220d39ad3a8e763fe2e5b9b7bb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 6 Mar 2018 09:11:59 +0100 Subject: beam_trim: Handle an empty list of instructions --- lib/compiler/src/beam_trim.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/compiler') diff --git a/lib/compiler/src/beam_trim.erl b/lib/compiler/src/beam_trim.erl index 4da0985085..1acbedd45b 100644 --- a/lib/compiler/src/beam_trim.erl +++ b/lib/compiler/src/beam_trim.erl @@ -288,7 +288,7 @@ frame_size([{get_map_elements,{f,L},_,_}|Is], Safe) -> frame_size([{deallocate,N}|_], _) -> N; frame_size([{line,_}|Is], Safe) -> frame_size(Is, Safe); -frame_size([_|_], _) -> throw(not_possible). +frame_size(_, _) -> throw(not_possible). frame_size_branch(0, Is, Safe) -> frame_size(Is, Safe); -- cgit v1.2.3 From 22f0a7f6356bf798eaff13402426b99ce8457b3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 7 Mar 2018 06:08:43 +0100 Subject: beam_validator: Be more careful when updating try/catch state The new code generator will more aggressively reuse registers, so we must be more careful about updating the state for try/catch. In particular, an "empty" try/catch that can't throw an exception must not update the try/catch state. --- lib/compiler/src/beam_validator.erl | 58 +++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 28 deletions(-) (limited to 'lib/compiler') diff --git a/lib/compiler/src/beam_validator.erl b/lib/compiler/src/beam_validator.erl index c09dbadd7f..c85e8f53ac 100644 --- a/lib/compiler/src/beam_validator.erl +++ b/lib/compiler/src/beam_validator.erl @@ -27,7 +27,7 @@ %% Interface for compiler. -export([module/2, format_error/1]). --import(lists, [any/2,dropwhile/2,foldl/3,foreach/2,reverse/1]). +-import(lists, [any/2,dropwhile/2,foldl/3,map/2,foreach/2,reverse/1]). %% To be called by the compiler. @@ -413,30 +413,22 @@ valfun_1({trim,N,Remaining}, #vst{current=#st{y=Yregs0,numy=NumY}=St}=Vst) -> error({trim,N,Remaining,allocated,NumY}) end; %% Catch & try. -valfun_1({'catch',Dst,{f,Fail}}, Vst0) when Fail /= none -> - Vst = #vst{current=#st{ct=Fails}=St} = - set_type_y({catchtag,[Fail]}, Dst, Vst0), - Vst#vst{current=St#st{ct=[[Fail]|Fails]}}; -valfun_1({'try',Dst,{f,Fail}}, Vst0) -> - Vst = #vst{current=#st{ct=Fails}=St} = - set_type_y({trytag,[Fail]}, Dst, Vst0), - Vst#vst{current=St#st{ct=[[Fail]|Fails]}}; +valfun_1({'catch',Dst,{f,Fail}}, Vst) when Fail =/= none -> + init_try_catch_branch(catchtag, Dst, Fail, Vst); +valfun_1({'try',Dst,{f,Fail}}, Vst) when Fail =/= none -> + init_try_catch_branch(trytag, Dst, Fail, Vst); valfun_1({catch_end,Reg}, #vst{current=#st{ct=[Fail|Fails]}}=Vst0) -> case get_special_y_type(Reg, Vst0) of {catchtag,Fail} -> Vst = #vst{current=St} = set_catch_end(Reg, Vst0), - Xs = gb_trees_from_list([{0,term}]), - Vst#vst{current=St#st{x=Xs,ct=Fails,fls=undefined}}; + Xregs = gb_trees:enter(0, term, St#st.x), + Vst#vst{current=St#st{x=Xregs,ct=Fails,fls=undefined}}; Type -> error({bad_type,Type}) end; -valfun_1({try_end,Reg}, #vst{current=#st{ct=[Fail|Fails]}=St0}=Vst0) -> - case get_special_y_type(Reg, Vst0) of +valfun_1({try_end,Reg}, #vst{current=#st{ct=[Fail|Fails]}=St0}=Vst) -> + case get_special_y_type(Reg, Vst) of {trytag,Fail} -> - Vst = case Fail of - [FailLabel] -> branch_state(FailLabel, Vst0); - _ -> Vst0 - end, St = St0#st{ct=Fails,fls=undefined}, set_catch_end(Reg, Vst#vst{current=St}); Type -> @@ -464,14 +456,34 @@ valfun_1({get_tl,Src,Dst}, Vst) -> valfun_1({get_tuple_element,Src,I,Dst}, Vst) -> assert_type({tuple_element,I+1}, Src, Vst), set_type_reg(term, Src, Dst, Vst); +valfun_1({jump,{f,Lbl}}, Vst) -> + kill_state(branch_state(Lbl, Vst)); valfun_1(I, Vst) -> valfun_2(I, Vst). +init_try_catch_branch(Tag, Dst, Fail, Vst0) -> + Vst1 = set_type_y({Tag,[Fail]}, Dst, Vst0), + #vst{current=#st{ct=Fails}=St0} = Vst1, + CurrentSt = St0#st{ct=[[Fail]|Fails]}, + + %% Set the initial state at the try/catch label. + %% Assume that Y registers contain terms or try/catch + %% tags. + Yregs0 = map(fun({Y,uninitialized}) -> {Y,term}; + ({Y,initialized}) -> {Y,term}; + (E) -> E + end, gb_trees:to_list(CurrentSt#st.y)), + Yregs = gb_trees:from_orddict(Yregs0), + BranchSt = CurrentSt#st{y=Yregs}, + + Vst = branch_state(Fail, Vst1#vst{current=BranchSt}), + Vst#vst{current=CurrentSt}. + %% Update branched state if necessary and try next set of instructions. valfun_2(I, #vst{current=#st{ct=[]}}=Vst) -> valfun_3(I, Vst); valfun_2(I, #vst{current=#st{ct=[[Fail]|_]}}=Vst) when is_integer(Fail) -> - %% Update branched state + %% Update branched state. valfun_3(I, branch_state(Fail, Vst)); valfun_2(_, _) -> error(ambiguous_catch_try_state). @@ -587,8 +599,6 @@ valfun_4(return, #vst{current=#st{numy=none}}=Vst) -> kill_state(Vst); valfun_4(return, #vst{current=#st{numy=NumY}}) -> error({stack_frame,NumY}); -valfun_4({jump,{f,Lbl}}, Vst) -> - kill_state(branch_state(Lbl, Vst)); valfun_4({loop_rec,{f,Fail},Dst}, Vst0) -> Vst = branch_state(Fail, Vst0), %% This term may not be part of the root set until @@ -885,15 +895,7 @@ val_dsetel(_, #vst{current=#st{setelem=true}=St}=Vst) -> Vst#vst{current=St#st{setelem=false}}; val_dsetel(_, Vst) -> Vst. -kill_state(#vst{current=#st{ct=[[Fail]|_]}}=Vst) when is_integer(Fail) -> - %% There is an active catch. Make sure that we merge the state into - %% the catch label before clearing it, so that that we can be sure - %% that the label gets a state. - kill_state_1(branch_state(Fail, Vst)); kill_state(Vst) -> - kill_state_1(Vst). - -kill_state_1(Vst) -> Vst#vst{current=none}. %% A "plain" call. -- cgit v1.2.3 From 33d07550f0f1922f952a9a9ea747fcbe77a108a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Fri, 13 Apr 2018 10:44:23 +0200 Subject: beam_validator: Improve type analysis for tuples Since the compiler will start optimizing more aggressively, beam_validator must keep up and improve the recognization of tuples and maps. --- lib/compiler/src/beam_validator.erl | 105 +++++++++++++++++++++++++++++------- 1 file changed, 86 insertions(+), 19 deletions(-) (limited to 'lib/compiler') diff --git a/lib/compiler/src/beam_validator.erl b/lib/compiler/src/beam_validator.erl index c85e8f53ac..54c6d557ba 100644 --- a/lib/compiler/src/beam_validator.erl +++ b/lib/compiler/src/beam_validator.erl @@ -145,7 +145,8 @@ validate_0(Module, [{function,Name,Ar,Entry,Code}|Fs], Ft) -> fls=undefined, %Floating point state. ct=[], %List of hot catch/try labels setelem=false, %Previous instruction was setelement/3. - puts_left=none %put/1 instructions left. + puts_left=none, %put/1 instructions left. + defs=#{} %Defining expression for each register. }). -type label() :: integer(). @@ -332,7 +333,7 @@ valfun_1({bif,Op,{f,_},Src,Dst}=I, Vst) -> %% catch state). validate_src(Src, Vst), Type = bif_type(Op, Src, Vst), - set_type_reg(Type, Dst, Vst) + set_type_reg_expr(Type, I, Dst, Vst) end; %% Put instructions. valfun_1({put_list,A,B,Dst}, Vst0) -> @@ -553,12 +554,12 @@ valfun_4({call_ext_last,_,_,_}, #vst{current=#st{numy=NumY}}) -> valfun_4({make_fun2,_,_,_,Live}, Vst) -> call(make_fun, Live, Vst); %% Other BIFs -valfun_4({bif,tuple_size,{f,Fail},[Tuple],Dst}, Vst0) -> +valfun_4({bif,tuple_size,{f,Fail},[Tuple],Dst}=I, Vst0) -> TupleType0 = get_term_type(Tuple, Vst0), Vst1 = branch_state(Fail, Vst0), TupleType = upgrade_tuple_type({tuple,[0]}, TupleType0), Vst = set_type(TupleType, Tuple, Vst1), - set_type_reg({integer,[]}, Dst, Vst); + set_type_reg_expr({integer,[]}, I, Dst, Vst); valfun_4({bif,element,{f,Fail},[Pos,Tuple],Dst}, Vst0) -> TupleType0 = get_term_type(Tuple, Vst0), PosType = get_term_type(Pos, Vst0), @@ -625,10 +626,10 @@ valfun_4({set_tuple_element,Src,Tuple,I}, Vst) -> assert_type({tuple_element,I+1}, Tuple, Vst), Vst; %% Match instructions. -valfun_4({select_val,Src,{f,Fail},{list,Choices}}, Vst) -> - assert_term(Src, Vst), - Lbls = [L || {f,L} <- Choices]++[Fail], - kill_state(foldl(fun(L, S) -> branch_state(L, S) end, Vst, Lbls)); +valfun_4({select_val,Src,{f,Fail},{list,Choices}}, Vst0) -> + assert_term(Src, Vst0), + Vst = branch_state(Fail, Vst0), + kill_state(select_val_branches(Src, Choices, Vst)); valfun_4({select_tuple_arity,Tuple,{f,Fail},{list,Choices}}, Vst) -> assert_type(tuple, Tuple, Vst), kill_state(branch_arities(Choices, Tuple, branch_state(Fail, Vst))); @@ -728,6 +729,20 @@ valfun_4({test,is_map,{f,Lbl},[Src]}, Vst0) -> _ -> kill_state(Vst) end; +valfun_4({test,is_eq_exact,{f,Lbl},[Src,Val]=Ss}, Vst0) -> + validate_src(Ss, Vst0), + Infer = infer_types(Src, Vst0), + Vst1 = Infer(Val, Vst0), + Vst = branch_state(Lbl, Vst1), + case Val of + {literal,Tuple} when is_tuple(Tuple) -> + Type0 = get_term_type(Val, Vst), + Type = upgrade_tuple_type({tuple,tuple_size(Tuple)}, + Type0), + set_type(Type, Src, Vst); + _ -> + Vst + end; valfun_4({test,_Op,{f,Lbl},Src}, Vst) -> validate_src(Src, Vst), branch_state(Lbl, Vst); @@ -1022,10 +1037,14 @@ heap_alloc_2([{floats,Floats}|T], St0) -> heap_alloc_2(T, St); heap_alloc_2([], St) -> St. -prune_x_regs(Live, #vst{current=#st{x=Xs0}=St0}=Vst) when is_integer(Live) -> +prune_x_regs(Live, #vst{current=#st{x=Xs0,defs=Defs0}=St0}=Vst) + when is_integer(Live) -> Xs1 = gb_trees:to_list(Xs0), Xs = [P || {R,_}=P <- Xs1, R < Live], - St = St0#st{x=gb_trees:from_orddict(Xs)}, + Defs = maps:filter(fun({x,X}, _) -> X < Live; + ({y,_}, _) -> true + end, Defs0), + St = St0#st{x=gb_trees:from_orddict(Xs),defs=Defs}, Vst#vst{current=St}. %%% @@ -1156,6 +1175,38 @@ bsm_restore(Reg, SavePoint, Vst) -> _ -> error({illegal_restore,SavePoint,range}) end. +select_val_branches(Src, Choices, Vst) -> + Infer = infer_types(Src, Vst), + select_val_branches_1(Choices, Infer, Vst). + +select_val_branches_1([Val,{f,L}|T], Infer, Vst0) -> + Vst = branch_state(L, Infer(Val, Vst0)), + select_val_branches_1(T, Infer, Vst); +select_val_branches_1([], _, Vst) -> Vst. + +infer_types(Src, Vst) -> + case get_def(Src, Vst) of + {bif,is_map,{f,_},[Map],_} -> + fun({atom,true}, S) -> set_type_reg(map, Map, S); + (_, S) -> S + end; + {bif,tuple_size,{f,_},[Tuple],_} -> + fun({integer,Arity}, S) -> + Type0 = get_term_type(Tuple, S), + Type = upgrade_tuple_type({tuple,Arity}, Type0), + set_type(Type, Tuple, S); + (_, S) -> S + end; + {bif,'=:=',{f,_},[ArityReg,{integer,_}=Val],_} when ArityReg =/= Src -> + fun({atom,true}, S) -> + Infer = infer_types(ArityReg, S), + Infer(Val, S); + (_, S) -> S + end; + _ -> + fun(_, S) -> S end + end. + %%% %%% Keeping track of types. %%% @@ -1172,12 +1223,18 @@ set_type_reg(Type, Src, Dst, Vst) -> set_type_reg(Type, Dst, Vst) end. -set_type_reg(Type, {x,_}=Reg, Vst) -> - set_type_x(Type, Reg, Vst); set_type_reg(Type, Reg, Vst) -> - set_type_y(Type, Reg, Vst). + set_type_reg_expr(Type, none, Reg, Vst). + +set_type_reg_expr(Type, Expr, {x,_}=Reg, Vst) -> + set_type_x(Type, Expr, Reg, Vst); +set_type_reg_expr(Type, Expr, Reg, Vst) -> + set_type_y(Type, Expr, Reg, Vst). -set_type_x(Type, {x,X}=Reg, #vst{current=#st{x=Xs0}=St}=Vst) +set_type_y(Type, Reg, Vst) -> + set_type_y(Type, none, Reg, Vst). + +set_type_x(Type, Expr, {x,X}=Reg, #vst{current=#st{x=Xs0,defs=Defs0}=St}=Vst) when is_integer(X), 0 =< X -> check_limit(Reg), Xs = case gb_trees:lookup(X, Xs0) of @@ -1188,11 +1245,12 @@ set_type_x(Type, {x,X}=Reg, #vst{current=#st{x=Xs0}=St}=Vst) {value,_} -> gb_trees:update(X, Type, Xs0) end, - Vst#vst{current=St#st{x=Xs}}; -set_type_x(Type, Reg, #vst{}) -> + Defs = Defs0#{Reg=>Expr}, + Vst#vst{current=St#st{x=Xs,defs=Defs}}; +set_type_x(Type, _Expr, Reg, #vst{}) -> error({invalid_store,Reg,Type}). -set_type_y(Type, {y,Y}=Reg, #vst{current=#st{y=Ys0}=St}=Vst) +set_type_y(Type, Expr, {y,Y}=Reg, #vst{current=#st{y=Ys0,defs=Defs0}=St}=Vst) when is_integer(Y), 0 =< Y -> check_limit(Reg), Ys = case gb_trees:lookup(Y, Ys0) of @@ -1206,8 +1264,10 @@ set_type_y(Type, {y,Y}=Reg, #vst{current=#st{y=Ys0}=St}=Vst) gb_trees:update(Y, Type, Ys0) end, check_try_catch_tags(Type, Y, Ys0), - Vst#vst{current=St#st{y=Ys}}; -set_type_y(Type, Reg, #vst{}) -> error({invalid_store,Reg,Type}). + Defs = Defs0#{Reg=>Expr}, + Vst#vst{current=St#st{y=Ys,defs=Defs}}; +set_type_y(Type, _Expr, Reg, #vst{}) -> + error({invalid_store,Reg,Type}). make_fragile({fragile,_}=Type) -> Type; make_fragile(Type) -> {fragile,Type}. @@ -1419,6 +1479,8 @@ get_term_type_1({atom,A}=T, _) when is_atom(A) -> T; get_term_type_1({float,F}=T, _) when is_float(F) -> T; get_term_type_1({integer,I}=T, _) when is_integer(I) -> T; get_term_type_1({literal,Map}, _) when is_map(Map) -> map; +get_term_type_1({literal,Tuple}, _) when is_tuple(Tuple) -> + {tuple,tuple_size(Tuple)}; get_term_type_1({literal,_}=T, _) -> T; get_term_type_1({x,X}=Reg, #vst{current=#st{x=Xs}}) when is_integer(X) -> case gb_trees:lookup(X, Xs) of @@ -1433,6 +1495,11 @@ get_term_type_1({y,Y}=Reg, #vst{current=#st{y=Ys}}) when is_integer(Y) -> end; get_term_type_1(Src, _) -> error({bad_source,Src}). +get_def(Src, #vst{current=#st{defs=Defs}}) -> + case Defs of + #{Src:=Def} -> Def; + #{} -> none + end. %% get_literal(Src) -> literal_value(). get_literal(nil) -> []; -- cgit v1.2.3 From 316a794b41b2f4173b2b188ac4c91adeaa52c616 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Mon, 23 Apr 2018 19:52:16 +0200 Subject: beam_validator: Don't transfer state to labels that can't be reached If we transfer state appropriately to labels that can't be reached, the state could taint other labels. --- lib/compiler/src/beam_validator.erl | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'lib/compiler') diff --git a/lib/compiler/src/beam_validator.erl b/lib/compiler/src/beam_validator.erl index 54c6d557ba..8b59746cb6 100644 --- a/lib/compiler/src/beam_validator.erl +++ b/lib/compiler/src/beam_validator.erl @@ -632,7 +632,12 @@ valfun_4({select_val,Src,{f,Fail},{list,Choices}}, Vst0) -> kill_state(select_val_branches(Src, Choices, Vst)); valfun_4({select_tuple_arity,Tuple,{f,Fail},{list,Choices}}, Vst) -> assert_type(tuple, Tuple, Vst), - kill_state(branch_arities(Choices, Tuple, branch_state(Fail, Vst))); + TupleType = case get_term_type(Tuple, Vst) of + {fragile,TupleType0} -> TupleType0; + TupleType0 -> TupleType0 + end, + kill_state(branch_arities(Choices, Tuple, TupleType, + branch_state(Fail, Vst))); %% New bit syntax matching instructions. valfun_4({test,bs_start_match2,{f,Fail},Live,[Ctx,NeedSlots],Ctx}, Vst0) -> @@ -1509,13 +1514,20 @@ get_literal({integer,I}) when is_integer(I) -> I; get_literal({literal,L}) -> L; get_literal(T) -> error({not_literal,T}). - -branch_arities([], _, #vst{}=Vst) -> Vst; -branch_arities([Sz,{f,L}|T], Tuple, #vst{current=St}=Vst0) - when is_integer(Sz) -> +branch_arities([Sz,{f,L}|T], Tuple, {tuple,[_]}=Type0, Vst0) when is_integer(Sz) -> Vst1 = set_type_reg({tuple,Sz}, Tuple, Vst0), Vst = branch_state(L, Vst1), - branch_arities(T, Tuple, Vst#vst{current=St}). + branch_arities(T, Tuple, Type0, Vst); +branch_arities([Sz,{f,L}|T], Tuple, {tuple,Sz}=Type, Vst0) when is_integer(Sz) -> + %% The type is already correct. (This test is redundant.) + Vst = branch_state(L, Vst0), + branch_arities(T, Tuple, Type, Vst); +branch_arities([Sz0,{f,_}|T], Tuple, {tuple,Sz}=Type, Vst) + when is_integer(Sz), Sz0 =/= Sz -> + %% We already have an established different exact size for the tuple. + %% This label can't possibly be reached. + branch_arities(T, Tuple, Type, Vst); +branch_arities([], _, _, #vst{}=Vst) -> Vst. branch_state(0, #vst{}=Vst) -> %% If the instruction fails, the stack may be scanned -- cgit v1.2.3 From 382297819ece0518a0e94b484c8ed8cb25e507c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Mon, 23 Apr 2018 11:47:21 +0200 Subject: beam_validator: Allow get_tuple_element before dsetelement --- lib/compiler/src/beam_validator.erl | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib/compiler') diff --git a/lib/compiler/src/beam_validator.erl b/lib/compiler/src/beam_validator.erl index 8b59746cb6..57b5083c1f 100644 --- a/lib/compiler/src/beam_validator.erl +++ b/lib/compiler/src/beam_validator.erl @@ -909,6 +909,8 @@ val_dsetel({set_tuple_element,_,_,_}, #vst{current=#st{setelem=false}}) -> error(illegal_context_for_set_tuple_element); val_dsetel({set_tuple_element,_,_,_}, #vst{current=#st{setelem=true}}=Vst) -> Vst; +val_dsetel({get_tuple_element,_,_,_}, Vst) -> + Vst; val_dsetel({line,_}, Vst) -> Vst; val_dsetel(_, #vst{current=#st{setelem=true}=St}=Vst) -> -- cgit v1.2.3 From 526e0474f7038f4a8a6e1ba8972b1014316058b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 22 May 2018 08:17:12 +0200 Subject: beam_validator: Strengthen validation of func_info The func_info instruction does not expect a stack frame. There will be an assertion failure in the debug-compiled runtime system. --- lib/compiler/src/beam_validator.erl | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'lib/compiler') diff --git a/lib/compiler/src/beam_validator.erl b/lib/compiler/src/beam_validator.erl index 57b5083c1f..1c1c02cf15 100644 --- a/lib/compiler/src/beam_validator.erl +++ b/lib/compiler/src/beam_validator.erl @@ -204,6 +204,12 @@ validate_fun_info_branches([], _, _) -> ok. validate_fun_info_branches_1(Arity, {_,_,Arity}, _) -> ok; validate_fun_info_branches_1(X, {Mod,Name,Arity}=MFA, Vst) -> try + case Vst of + #vst{current=#st{numy=none}} -> + ok; + #vst{current=#st{numy=Size}} -> + error({unexpected_stack_frame,Size}) + end, get_term_type({x,X}, Vst) catch Error -> I = {func_info,{atom,Mod},{atom,Name},Arity}, -- cgit v1.2.3 From 684d07d0e21a31a6c166dda8aa3e444d217cb9d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Mon, 4 Jun 2018 15:13:49 +0200 Subject: beam_validator: Improve merge of cons and literal list --- lib/compiler/src/beam_validator.erl | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'lib/compiler') diff --git a/lib/compiler/src/beam_validator.erl b/lib/compiler/src/beam_validator.erl index 1c1c02cf15..87ddd578e8 100644 --- a/lib/compiler/src/beam_validator.erl +++ b/lib/compiler/src/beam_validator.erl @@ -1656,6 +1656,12 @@ merge_types(bool, {atom,A}) -> merge_bool(A); merge_types({atom,A}, bool) -> merge_bool(A); +merge_types(cons, {literal,[_|_]}) -> + cons; +merge_types({literal,[_|_]}, cons) -> + cons; +merge_types({literal,[_|_]}, {literal,[_|_]}) -> + cons; merge_types(#ms{id=Id1,valid=B1,slots=Slots1}, #ms{id=Id2,valid=B2,slots=Slots2}) -> Id = if -- cgit v1.2.3 From 1f221b27f1336e747f7409692f260055dd3ddf79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 26 Jun 2018 06:59:40 +0200 Subject: beam_validator: Infer the types of copies in a smarter way Smarter code generation means that beam_validator must be smarter too. In the following example, beam_validator must be able to infer that y0 refers to a map: move x0 y0 test is_map L1 x0 %% Here the type for y0 must be 'map'. --- lib/compiler/src/beam_validator.erl | 114 +++++++++++++++++++++++++++--------- 1 file changed, 87 insertions(+), 27 deletions(-) (limited to 'lib/compiler') diff --git a/lib/compiler/src/beam_validator.erl b/lib/compiler/src/beam_validator.erl index 87ddd578e8..3ee143ab8b 100644 --- a/lib/compiler/src/beam_validator.erl +++ b/lib/compiler/src/beam_validator.erl @@ -146,7 +146,8 @@ validate_0(Module, [{function,Name,Ar,Entry,Code}|Fs], Ft) -> ct=[], %List of hot catch/try labels setelem=false, %Previous instruction was setelement/3. puts_left=none, %put/1 instructions left. - defs=#{} %Defining expression for each register. + defs=#{}, %Defining expression for each register. + aliases=#{} }). -type label() :: integer(). @@ -311,9 +312,10 @@ valfun_1({move,{y,_}=Src,{y,_}=Dst}, Vst) -> {trytag,_} -> error({trytag,Src}); Type -> set_type_reg(Type, Dst, Vst) end; -valfun_1({move,Src,Dst}, Vst) -> - Type = get_move_term_type(Src, Vst), - set_type_reg(Type, Dst, Vst); +valfun_1({move,Src,Dst}, Vst0) -> + Type = get_move_term_type(Src, Vst0), + Vst = set_type_reg(Type, Dst, Vst0), + set_alias(Src, Dst, Vst); valfun_1({fmove,Src,{fr,_}=Dst}, Vst) -> assert_type(float, Src, Vst), set_freg(Dst, Vst); @@ -415,7 +417,7 @@ valfun_1({trim,N,Remaining}, #vst{current=#st{y=Yregs0,numy=NumY}=St}=Vst) -> N =< NumY, N+Remaining =:= NumY -> Yregs1 = [{Y-N,Type} || {Y,Type} <- gb_trees:to_list(Yregs0), Y >= N], Yregs = gb_trees_from_list(Yregs1), - Vst#vst{current=St#st{y=Yregs,numy=NumY-N}}; + Vst#vst{current=St#st{y=Yregs,numy=NumY-N,aliases=#{}}}; true -> error({trim,N,Remaining,allocated,NumY}) end; @@ -429,7 +431,7 @@ valfun_1({catch_end,Reg}, #vst{current=#st{ct=[Fail|Fails]}}=Vst0) -> {catchtag,Fail} -> Vst = #vst{current=St} = set_catch_end(Reg, Vst0), Xregs = gb_trees:enter(0, term, St#st.x), - Vst#vst{current=St#st{x=Xregs,ct=Fails,fls=undefined}}; + Vst#vst{current=St#st{x=Xregs,ct=Fails,fls=undefined,aliases=#{}}}; Type -> error({bad_type,Type}) end; @@ -446,7 +448,7 @@ valfun_1({try_case,Reg}, #vst{current=#st{ct=[Fail|Fails]}}=Vst0) -> {trytag,Fail} -> Vst = #vst{current=St} = set_catch_end(Reg, Vst0), Xs = gb_trees_from_list([{0,{atom,[]}},{1,term},{2,term}]), - Vst#vst{current=St#st{x=Xs,ct=Fails,fls=undefined}}; + Vst#vst{current=St#st{x=Xs,ct=Fails,fls=undefined,aliases=#{}}}; Type -> error({bad_type,Type}) end; @@ -571,7 +573,7 @@ valfun_4({bif,element,{f,Fail},[Pos,Tuple],Dst}, Vst0) -> PosType = get_term_type(Pos, Vst0), Vst1 = branch_state(Fail, Vst0), TupleType = upgrade_tuple_type({tuple,[get_tuple_size(PosType)]}, TupleType0), - Vst = set_type(TupleType, Tuple, Vst1), + Vst = set_aliased_type(TupleType, Tuple, Vst1), set_type_reg(term, Tuple, Dst, Vst); valfun_4({bif,raise,{f,0},Src,_Dst}, Vst) -> validate_src(Src, Vst), @@ -716,16 +718,19 @@ valfun_4({test,is_float,{f,Lbl},[Float]}, Vst) -> valfun_4({test,is_tuple,{f,Lbl},[Tuple]}, Vst) -> Type0 = get_term_type(Tuple, Vst), Type = upgrade_tuple_type({tuple,[0]}, Type0), - set_type(Type, Tuple, branch_state(Lbl, Vst)); + set_aliased_type(Type, Tuple, branch_state(Lbl, Vst)); valfun_4({test,is_nonempty_list,{f,Lbl},[Cons]}, Vst) -> assert_term(Cons, Vst), - set_type(cons, Cons, branch_state(Lbl, Vst)); + Type = cons, + set_aliased_type(Type, Cons, branch_state(Lbl, Vst)); valfun_4({test,test_arity,{f,Lbl},[Tuple,Sz]}, Vst) when is_integer(Sz) -> assert_type(tuple, Tuple, Vst), - set_type_reg({tuple,Sz}, Tuple, branch_state(Lbl, Vst)); + Type = {tuple,Sz}, + set_aliased_type(Type, Tuple, branch_state(Lbl, Vst)); valfun_4({test,is_tagged_tuple,{f,Lbl},[Src,Sz,_Atom]}, Vst) -> validate_src([Src], Vst), - set_type_reg({tuple, Sz}, Src, branch_state(Lbl, Vst)); + Type = {tuple,Sz}, + set_aliased_type(Type, Src, branch_state(Lbl, Vst)); valfun_4({test,has_map_fields,{f,Lbl},Src,{list,List}}, Vst) -> assert_type(map, Src, Vst), assert_unique_map_keys(List), @@ -734,11 +739,12 @@ valfun_4({test,is_map,{f,Lbl},[Src]}, Vst0) -> Vst = branch_state(Lbl, Vst0), case Src of {Tag,_} when Tag =:= x; Tag =:= y -> - set_type_reg(map, Src, Vst); + Type = map, + set_aliased_type(Type, Src, Vst); {literal,Map} when is_map(Map) -> - Vst; + Vst0; _ -> - kill_state(Vst) + kill_state(Vst0) end; valfun_4({test,is_eq_exact,{f,Lbl},[Src,Val]=Ss}, Vst0) -> validate_src(Ss, Vst0), @@ -750,7 +756,7 @@ valfun_4({test,is_eq_exact,{f,Lbl},[Src,Val]=Ss}, Vst0) -> Type0 = get_term_type(Val, Vst), Type = upgrade_tuple_type({tuple,tuple_size(Tuple)}, Type0), - set_type(Type, Src, Vst); + set_aliased_type(Type, Src, Vst); _ -> Vst end; @@ -936,7 +942,7 @@ call(Name, Live, #vst{current=St}=Vst) -> Type when Type =/= exception -> %% Type is never 'exception' because it has been handled earlier. Xs = gb_trees_from_list([{0,Type}]), - Vst#vst{current=St#st{x=Xs,f=init_fregs()}} + Vst#vst{current=St#st{x=Xs,f=init_fregs(),aliases=#{}}} end. %% Tail call. @@ -1049,15 +1055,25 @@ heap_alloc_2([{floats,Floats}|T], St0) -> St = St0#st{hf=Floats}, heap_alloc_2(T, St); heap_alloc_2([], St) -> St. - -prune_x_regs(Live, #vst{current=#st{x=Xs0,defs=Defs0}=St0}=Vst) + +prune_x_regs(Live, #vst{current=St0}=Vst) when is_integer(Live) -> + #st{x=Xs0,defs=Defs0,aliases=Aliases0} = St0, Xs1 = gb_trees:to_list(Xs0), Xs = [P || {R,_}=P <- Xs1, R < Live], Defs = maps:filter(fun({x,X}, _) -> X < Live; ({y,_}, _) -> true end, Defs0), - St = St0#st{x=gb_trees:from_orddict(Xs),defs=Defs}, + Aliases = maps:filter(fun({x,X1}, {x,X2}) -> + X1 < Live andalso X2 < Live; + ({x,X}, _) -> + X < Live; + (_, {x,X}) -> + X < Live; + (_, _) -> + true + end, Aliases0), + St = St0#st{x=gb_trees:from_orddict(Xs),defs=Defs,aliases=Aliases}, Vst#vst{current=St}. %%% @@ -1224,6 +1240,37 @@ infer_types(Src, Vst) -> %%% Keeping track of types. %%% +set_alias(Reg1, Reg2, #vst{current=St0}=Vst) -> + case Reg1 of + {Kind,_} when Kind =:= x; Kind =:= y -> + #st{aliases=Aliases0} = St0, + Aliases = Aliases0#{Reg1=>Reg2,Reg2=>Reg1}, + St = St0#st{aliases=Aliases}, + Vst#vst{current=St}; + _ -> + Vst + end. + +set_aliased_type(Type, Reg, #vst{current=#st{aliases=Aliases}}=Vst0) -> + Vst1 = set_type(Type, Reg, Vst0), + case Aliases of + #{Reg:=OtherReg} -> + Vst = set_type_reg(Type, OtherReg, Vst1), + #vst{current=St} = Vst, + Vst#vst{current=St#st{aliases=Aliases}}; + #{} -> + Vst1 + end. + +kill_aliases(Reg, #st{aliases=Aliases0}=St) -> + case Aliases0 of + #{Reg:=OtherReg} -> + Aliases = maps:without([Reg,OtherReg], Aliases0), + St#st{aliases=Aliases}; + #{} -> + St + end. + set_type(Type, {x,_}=Reg, Vst) -> set_type_reg(Type, Reg, Vst); set_type(Type, {y,_}=Reg, Vst) -> set_type_y(Type, Reg, Vst); set_type(_, _, #vst{}=Vst) -> Vst. @@ -1247,7 +1294,7 @@ set_type_reg_expr(Type, Expr, Reg, Vst) -> set_type_y(Type, Reg, Vst) -> set_type_y(Type, none, Reg, Vst). -set_type_x(Type, Expr, {x,X}=Reg, #vst{current=#st{x=Xs0,defs=Defs0}=St}=Vst) +set_type_x(Type, Expr, {x,X}=Reg, #vst{current=#st{x=Xs0,defs=Defs0}=St0}=Vst) when is_integer(X), 0 =< X -> check_limit(Reg), Xs = case gb_trees:lookup(X, Xs0) of @@ -1259,11 +1306,12 @@ set_type_x(Type, Expr, {x,X}=Reg, #vst{current=#st{x=Xs0,defs=Defs0}=St}=Vst) gb_trees:update(X, Type, Xs0) end, Defs = Defs0#{Reg=>Expr}, + St = kill_aliases(Reg, St0), Vst#vst{current=St#st{x=Xs,defs=Defs}}; set_type_x(Type, _Expr, Reg, #vst{}) -> error({invalid_store,Reg,Type}). -set_type_y(Type, Expr, {y,Y}=Reg, #vst{current=#st{y=Ys0,defs=Defs0}=St}=Vst) +set_type_y(Type, Expr, {y,Y}=Reg, #vst{current=#st{y=Ys0,defs=Defs0}=St0}=Vst) when is_integer(Y), 0 =< Y -> check_limit(Reg), Ys = case gb_trees:lookup(Y, Ys0) of @@ -1278,6 +1326,7 @@ set_type_y(Type, Expr, {y,Y}=Reg, #vst{current=#st{y=Ys0,defs=Defs0}=St}=Vst) end, check_try_catch_tags(Type, Y, Ys0), Defs = Defs0#{Reg=>Expr}, + St = kill_aliases(Reg, St0), Vst#vst{current=St#st{y=Ys,defs=Defs}}; set_type_y(Type, _Expr, Reg, #vst{}) -> error({invalid_store,Reg,Type}). @@ -1523,7 +1572,7 @@ get_literal({literal,L}) -> L; get_literal(T) -> error({not_literal,T}). branch_arities([Sz,{f,L}|T], Tuple, {tuple,[_]}=Type0, Vst0) when is_integer(Sz) -> - Vst1 = set_type_reg({tuple,Sz}, Tuple, Vst0), + Vst1 = set_aliased_type({tuple,Sz}, Tuple, Vst0), Vst = branch_state(L, Vst1), branch_arities(T, Tuple, Type0, Vst); branch_arities([Sz,{f,L}|T], Tuple, {tuple,Sz}=Type, Vst0) when is_integer(Sz) -> @@ -1565,13 +1614,14 @@ merge_states(L, St, Branched) when L =/= 0 -> {value,OtherSt} -> merge_states_1(St, OtherSt) end. -merge_states_1(#st{x=Xs0,y=Ys0,numy=NumY0,h=H0,ct=Ct0}, - #st{x=Xs1,y=Ys1,numy=NumY1,h=H1,ct=Ct1}) -> +merge_states_1(#st{x=Xs0,y=Ys0,numy=NumY0,h=H0,ct=Ct0,aliases=Aliases0}, + #st{x=Xs1,y=Ys1,numy=NumY1,h=H1,ct=Ct1,aliases=Aliases1}) -> NumY = merge_stk(NumY0, NumY1), Xs = merge_regs(Xs0, Xs1), Ys = merge_y_regs(Ys0, Ys1), Ct = merge_ct(Ct0, Ct1), - #st{x=Xs,y=Ys,numy=NumY,h=min(H0, H1),ct=Ct}. + Aliases = merge_aliases(Aliases0, Aliases1), + #st{x=Xs,y=Ys,numy=NumY,h=min(H0, H1),ct=Ct,aliases=Aliases}. merge_stk(S, S) -> S; merge_stk(_, _) -> undecided. @@ -1680,7 +1730,17 @@ merge_bool([]) -> {atom,[]}; merge_bool(true) -> bool; merge_bool(false) -> bool; merge_bool(_) -> {atom,[]}. - + +merge_aliases(Al0, Al1) when map_size(Al0) =< map_size(Al1) -> + maps:filter(fun(K, V) -> + case Al1 of + #{K:=V} -> true; + #{} -> false + end + end, Al0); +merge_aliases(Al0, Al1) -> + merge_aliases(Al1, Al0). + verify_y_init(#vst{current=#st{y=Ys}}) -> verify_y_init_1(gb_trees:to_list(Ys)). -- cgit v1.2.3 From b4e41395494cb395fa8eb9bd771cadc712912ff3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 11 Apr 2018 05:01:09 +0200 Subject: beam_except: Enhance recognition of function_clause exceptions Don't match exact BEAM instructions when trying to recognize function_clause exceptions that should be replaced with a jump to the func_info instruction. Instead, do a symbolic evaluation of the list building code and see if the result is a list of the argument registers. While at it, also teach fix_block_1/2 to completely remove a test_heap instruction before an exception generation instruction. --- lib/compiler/src/beam_except.erl | 53 ++++++++++++++++++++++++++++------------ 1 file changed, 38 insertions(+), 15 deletions(-) (limited to 'lib/compiler') diff --git a/lib/compiler/src/beam_except.erl b/lib/compiler/src/beam_except.erl index 05c0f4fbc7..366ec6cd44 100644 --- a/lib/compiler/src/beam_except.erl +++ b/lib/compiler/src/beam_except.erl @@ -31,7 +31,7 @@ %%% erlang:error(function_clause, Args) => jump FuncInfoLabel %%% --import(lists, [reverse/1]). +-import(lists, [reverse/1,seq/2]). -spec module(beam_utils:module_code(), [compile:option()]) -> {'ok',beam_utils:module_code()}. @@ -138,23 +138,46 @@ fix_block(Is, Words) -> reverse(fix_block_1(Is, Words)). fix_block_1([{set,[],[],{alloc,Live,{F1,F2,Needed0,F3}}}|Is], Words) -> - Needed = Needed0 - Words, - true = Needed >= 0, %Assertion. - [{set,[],[],{alloc,Live,{F1,F2,Needed,F3}}}|Is]; + case Needed0 - Words of + 0 -> + Is; + Needed -> + true = Needed >= 0, %Assertion. + [{set,[],[],{alloc,Live,{F1,F2,Needed,F3}}}|Is] + end; fix_block_1([I|Is], Words) -> [I|fix_block_1(Is, Words)]. dig_out_block_fc([{set,[],[],{alloc,Live,_}}|Bl]) -> - case dig_out_fc(Bl, Live-1, nil) of - no -> - no; - yes -> - {yes,{function_clause,Live}} - end; + Regs = maps:from_list([{{x,X},{arg,X}} || X <- seq(0, Live-1)]), + dig_out_fc(Bl, Regs); dig_out_block_fc(_) -> no. -dig_out_fc([{set,[Dst],[{x,Reg},Dst0],put_list}|Is], Reg, Dst0) -> - dig_out_fc(Is, Reg-1, Dst); -dig_out_fc([{set,[{x,0}],[{atom,function_clause}],move}], -1, {x,1}) -> - yes; -dig_out_fc(_, _, _) -> no. +dig_out_fc([{set,[Dst],[Hd,Tl],put_list}|Is], Regs0) -> + Regs = Regs0#{Dst=>{cons,get_reg(Hd, Regs0),get_reg(Tl, Regs0)}}, + dig_out_fc(Is, Regs); +dig_out_fc([{set,[Dst],[Src],move}|Is], Regs0) -> + Regs = Regs0#{Dst=>get_reg(Src, Regs0)}, + dig_out_fc(Is, Regs); +dig_out_fc([{set,_,_,_}|_], _Regs) -> + %% Unknown instruction. It is not a function_clause error. + no; +dig_out_fc([], Regs) -> + case Regs of + #{{x,0}:={atom,function_clause},{x,1}:=Args} -> + dig_out_fc_1(Args, 0); + #{} -> + no + end. + +dig_out_fc_1({cons,{arg,I},T}, I) -> + dig_out_fc_1(T, I+1); +dig_out_fc_1(nil, I) -> + {yes,{function_clause,I}}; +dig_out_fc_1(_, _) -> no. + +get_reg(R, Regs) -> + case Regs of + #{R:=Val} -> Val; + #{} -> R + end. -- cgit v1.2.3 From ec64aa2d6f5391fa49c6a6d05713267485654ab2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Fri, 17 Aug 2018 07:21:20 +0200 Subject: beam_utils: Make is_tagged_tuple a pure test This will enable more optimizations. --- lib/compiler/src/beam_utils.erl | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/compiler') diff --git a/lib/compiler/src/beam_utils.erl b/lib/compiler/src/beam_utils.erl index 5580d2f123..989294cf0a 100644 --- a/lib/compiler/src/beam_utils.erl +++ b/lib/compiler/src/beam_utils.erl @@ -258,6 +258,7 @@ is_pure_test({test,is_ge,_,[_,_]}) -> true; is_pure_test({test,is_lt,_,[_,_]}) -> true; is_pure_test({test,is_nil,_,[_]}) -> true; is_pure_test({test,is_nonempty_list,_,[_]}) -> true; +is_pure_test({test,is_tagged_tuple,_,[_,_,_]}) -> true; is_pure_test({test,test_arity,_,[_,_]}) -> true; is_pure_test({test,has_map_fields,_,[_|_]}) -> true; is_pure_test({test,is_bitstr,_,[_]}) -> true; -- cgit v1.2.3 From 6065d9f8540cd4621cbace0cd4d485956a720fd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Sun, 29 Apr 2018 16:32:03 +0200 Subject: Simplify optimizations by introducing is_nil late --- lib/compiler/src/beam_a.erl | 2 ++ lib/compiler/src/beam_dead.erl | 12 ------------ lib/compiler/src/beam_utils.erl | 5 ----- lib/compiler/src/beam_z.erl | 2 ++ 4 files changed, 4 insertions(+), 17 deletions(-) (limited to 'lib/compiler') diff --git a/lib/compiler/src/beam_a.erl b/lib/compiler/src/beam_a.erl index 6fd4ace540..a26f245446 100644 --- a/lib/compiler/src/beam_a.erl +++ b/lib/compiler/src/beam_a.erl @@ -113,6 +113,8 @@ rename_instr({put_map_exact,Fail,S,D,R,L}) -> {put_map,Fail,exact,S,D,R,L}; rename_instr({test,has_map_fields,Fail,Src,{list,List}}) -> {test,has_map_fields,Fail,[Src|List]}; +rename_instr({test,is_nil,Fail,[Src]}) -> + {test,is_eq_exact,Fail,[Src,nil]}; rename_instr({select_val=I,Reg,Fail,{list,List}}) -> {select,I,Reg,Fail,List}; rename_instr({select_tuple_arity=I,Reg,Fail,{list,List}}) -> diff --git a/lib/compiler/src/beam_dead.erl b/lib/compiler/src/beam_dead.erl index efad082152..eb5652b69a 100644 --- a/lib/compiler/src/beam_dead.erl +++ b/lib/compiler/src/beam_dead.erl @@ -85,11 +85,6 @@ move_move_into_block([], Acc) -> reverse(Acc). %%% %%% or in: %%% -%%% test is_nil SomeLabel Dst -%%% move nil Dst -%%% -%%% or in: -%%% %%% select_val Register FailLabel [... Literal => L1...] %%% . %%% . @@ -140,13 +135,8 @@ forward([{test,is_eq_exact,_,[Same,Same]}|Is], D, Lc, Acc) -> forward([{test,is_eq_exact,_,[Dst,Src]}=I, {block,[{set,[Dst],[Src],move}|Bl]}|Is], D, Lc, Acc) -> forward([I,{block,Bl}|Is], D, Lc, Acc); -forward([{test,is_nil,_,[Dst]}=I, - {block,[{set,[Dst],[nil],move}|Bl]}|Is], D, Lc, Acc) -> - forward([I,{block,Bl}|Is], D, Lc, Acc); forward([{test,is_eq_exact,_,[Dst,Src]}=I,{move,Src,Dst}|Is], D, Lc, Acc) -> forward([I|Is], D, Lc, Acc); -forward([{test,is_nil,_,[Dst]}=I,{move,nil,Dst}|Is], D, Lc, Acc) -> - forward([I|Is], D, Lc, Acc); forward([{test,_,_,_}=I|Is]=Is0, D, Lc, Acc) -> %% Help the second, backward pass to by inserting labels after %% relational operators so that they can be skipped if they are @@ -900,8 +890,6 @@ normalize_op({test,is_eq_exact,{f,Fail},Ops}) -> normalize_op_1('=:=', Ops, Fail); normalize_op({test,is_ne_exact,{f,Fail},Ops}) -> normalize_op_1('=/=', Ops, Fail); -normalize_op({test,is_nil,{f,Fail},[R]}) -> - normalize_op_1('=:=', [R,nil], Fail); normalize_op({test,Op,{f,Fail},[R]}) -> case erl_internal:new_type_test(Op, 1) of true -> {{Op,R},Fail}; diff --git a/lib/compiler/src/beam_utils.erl b/lib/compiler/src/beam_utils.erl index 989294cf0a..317252e87a 100644 --- a/lib/compiler/src/beam_utils.erl +++ b/lib/compiler/src/beam_utils.erl @@ -224,16 +224,12 @@ bif_to_test('=<', [A,B], Fail) -> {test,is_ge,Fail,[B,A]}; bif_to_test('>', [A,B], Fail) -> {test,is_lt,Fail,[B,A]}; bif_to_test('<', [_,_]=Ops, Fail) -> {test,is_lt,Fail,Ops}; bif_to_test('>=', [_,_]=Ops, Fail) -> {test,is_ge,Fail,Ops}; -bif_to_test('==', [A,nil], Fail) -> {test,is_nil,Fail,[A]}; -bif_to_test('==', [nil,A], Fail) -> {test,is_nil,Fail,[A]}; bif_to_test('==', [C,A], Fail) when ?is_const(C) -> {test,is_eq,Fail,[A,C]}; bif_to_test('==', [_,_]=Ops, Fail) -> {test,is_eq,Fail,Ops}; bif_to_test('/=', [C,A], Fail) when ?is_const(C) -> {test,is_ne,Fail,[A,C]}; bif_to_test('/=', [_,_]=Ops, Fail) -> {test,is_ne,Fail,Ops}; -bif_to_test('=:=', [A,nil], Fail) -> {test,is_nil,Fail,[A]}; -bif_to_test('=:=', [nil,A], Fail) -> {test,is_nil,Fail,[A]}; bif_to_test('=:=', [C,A], Fail) when ?is_const(C) -> {test,is_eq_exact,Fail,[A,C]}; bif_to_test('=:=', [_,_]=Ops, Fail) -> {test,is_eq_exact,Fail,Ops}; @@ -256,7 +252,6 @@ is_pure_test({test,is_eq_exact,_,[_,_]}) -> true; is_pure_test({test,is_ne_exact,_,[_,_]}) -> true; is_pure_test({test,is_ge,_,[_,_]}) -> true; is_pure_test({test,is_lt,_,[_,_]}) -> true; -is_pure_test({test,is_nil,_,[_]}) -> true; is_pure_test({test,is_nonempty_list,_,[_]}) -> true; is_pure_test({test,is_tagged_tuple,_,[_,_,_]}) -> true; is_pure_test({test,test_arity,_,[_,_]}) -> true; diff --git a/lib/compiler/src/beam_z.erl b/lib/compiler/src/beam_z.erl index 1c9d762eb1..677094b3cd 100644 --- a/lib/compiler/src/beam_z.erl +++ b/lib/compiler/src/beam_z.erl @@ -110,6 +110,8 @@ undo_rename({test,has_map_fields,Fail,[Src|List]}) -> {test,has_map_fields,Fail,Src,{list,List}}; undo_rename({get_map_elements,Fail,Src,{list,List}}) -> {get_map_elements,Fail,Src,{list,List}}; +undo_rename({test,is_eq_exact,Fail,[Src,nil]}) -> + {test,is_nil,Fail,[Src]}; undo_rename({select,I,Reg,Fail,List}) -> {I,Reg,Fail,{list,List}}; undo_rename(I) -> I. -- cgit v1.2.3 From 3fc40fd57fa01b097b4c363860c4d4762e13db8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Thu, 1 Mar 2018 06:42:57 +0100 Subject: Don't run unsafe compiler passes As a preparation for replacing v3_codegen with a new code generator, remove unsafe optimization passes. Especially the older compiler passes have implicit assumptions about how the code is generated. Remove the optimizations in beam_block (keep the code that creates blocks) because they are unsafe. beam_block also calls beam_utils:live_opt/1, which is unsafe. Remove beam_type because it calls beam_utils:live_opt/1, and also because it recalculates the number of heaps words and number of live registers in allocation instructions, thus potentially hiding bugs in other passes. Remove beam_receive because it is unsafe. Remove beam_record because it is the only remaining user of beam_utils:anno_defs/1. Remove beam_reorder because it makes much more sense to run it as an early SSA-based optimization pass. Remove the now unused functions in beam_utils: anno_def/1 delete_annos/1 is_killed_block/2 live_opt/1 usage/3 Note that the following test cases will fail because of the removed optimizations: compile_SUITE:optimized_guards/1 compile_SUITE:bc_options/1 receive_SUITE:ref_opt/1 --- lib/compiler/src/Makefile | 4 - lib/compiler/src/beam_block.erl | 574 +----------------------------------- lib/compiler/src/beam_utils.erl | 561 +---------------------------------- lib/compiler/src/compile.erl | 20 +- lib/compiler/src/compiler.app.src | 4 - lib/compiler/test/compile_SUITE.erl | 1 - lib/compiler/test/misc_SUITE.erl | 44 --- 7 files changed, 10 insertions(+), 1198 deletions(-) (limited to 'lib/compiler') diff --git a/lib/compiler/src/Makefile b/lib/compiler/src/Makefile index 2408c76b48..08042aeba4 100644 --- a/lib/compiler/src/Makefile +++ b/lib/compiler/src/Makefile @@ -61,12 +61,8 @@ MODULES = \ beam_listing \ beam_opcodes \ beam_peep \ - beam_receive \ - beam_reorder \ - beam_record \ beam_split \ beam_trim \ - beam_type \ beam_utils \ beam_validator \ beam_z \ diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl index fe43163455..3e28ff2c01 100644 --- a/lib/compiler/src/beam_block.erl +++ b/lib/compiler/src/beam_block.erl @@ -23,31 +23,20 @@ -module(beam_block). -export([module/2]). --import(lists, [reverse/1,reverse/2,member/2]). +-import(lists, [reverse/1]). -spec module(beam_utils:module_code(), [compile:option()]) -> {'ok',beam_utils:module_code()}. -module({Mod,Exp,Attr,Fs0,Lc}, Opts) -> - Blockify = not member(no_blockify, Opts), - Fs = [function(F, Blockify) || F <- Fs0], +module({Mod,Exp,Attr,Fs0,Lc}, _Opts) -> + Fs = [function(F) || F <- Fs0], {ok,{Mod,Exp,Attr,Fs,Lc}}. -function({function,Name,Arity,CLabel,Is0}, Blockify) -> +function({function,Name,Arity,CLabel,Is0}) -> try %% Collect basic blocks and optimize them. - Is1 = case Blockify of - false -> Is0; - true -> blockify(Is0) - end, - Is2 = embed_lines(Is1), - Is3 = local_cse(Is2), - Is4 = beam_utils:anno_defs(Is3), - Is5 = move_allocates(Is4), - Is6 = beam_utils:live_opt(Is5), - Is7 = opt_blocks(Is6), - Is8 = beam_utils:delete_annos(Is7), - Is = opt_allocs(Is8), + Is1 = blockify(Is0), + Is = embed_lines(Is1), %% Done. {function,Name,Arity,CLabel,Is} @@ -137,557 +126,6 @@ embed_lines([{block,B2},{line,_}=Line,{block,B1}|T], Acc) -> embed_lines([{block,B1},{line,_}=Line|T], Acc) -> B = {block,[{set,[],[],Line}|B1]}, embed_lines([B|T], Acc); -embed_lines([{block,B2},{block,B1}|T], Acc) -> - %% This can only happen when beam_block is run for - %% the second time. - B = {block,B1++B2}, - embed_lines([B|T], Acc); embed_lines([I|Is], Acc) -> embed_lines(Is, [I|Acc]); embed_lines([], Acc) -> Acc. - -opt_blocks([{block,Bl0}|Is]) -> - %% The live annotation at the beginning is not useful. - [{'%anno',_}|Bl] = Bl0, - [{block,opt_block(Bl)}|opt_blocks(Is)]; -opt_blocks([I|Is]) -> - [I|opt_blocks(Is)]; -opt_blocks([]) -> []. - -opt_block(Is0) -> - find_fixpoint(fun(Is) -> - opt_tuple_element(opt(Is)) - end, Is0). - -find_fixpoint(OptFun, Is0) -> - case OptFun(Is0) of - Is0 -> Is0; - Is1 -> find_fixpoint(OptFun, Is1) - end. - -%% move_allocates(Is0) -> Is -%% Move allocate instructions upwards in the instruction stream -%% (within the same block), in the hope of getting more possibilities -%% for optimizing away moves later. -%% -%% For example, we can transform the following instructions: -%% -%% get_tuple_element x(1) Element => x(2) -%% allocate_zero StackSize 3 %% x(0), x(1), x(2) are live -%% -%% to the following instructions: -%% -%% allocate_zero StackSize 2 %% x(0) and x(1) are live -%% get_tuple_element x(1) Element => x(2) -%% -%% NOTE: Since the beam_reorder pass has been run, it is no longer -%% safe to assume that if x(N) is initialized, then all lower-numbered -%% x registers are also initialized. -%% -%% For example, we must be careful when transforming the following -%% instructions: -%% -%% get_tuple_element x(0) Element => x(1) -%% allocate_zero StackSize 3 %x(0), x(1), x(2) are live -%% -%% to the following instructions: -%% -%% allocate_zero StackSize 3 -%% get_tuple_element x(0) Element => x(1) -%% -%% The transformation is safe if and only if x(1) has been -%% initialized previously. We will use the annotations added by -%% beam_utils:anno_defs/1 to determine whether x(a) has been -%% initialized. - -move_allocates([{block,Bl0}|Is]) -> - Bl = move_allocates_1(reverse(Bl0), []), - [{block,Bl}|move_allocates(Is)]; -move_allocates([I|Is]) -> - [I|move_allocates(Is)]; -move_allocates([]) -> []. - -move_allocates_1([{'%anno',_}|Is], Acc) -> - move_allocates_1(Is, Acc); -move_allocates_1([I|Is], [{set,[],[],{alloc,Live0,Info0}}|Acc]=Acc0) -> - case alloc_may_pass(I) of - false -> - move_allocates_1(Is, [I|Acc0]); - true -> - case alloc_live_regs(I, Is, Live0) of - not_possible -> - move_allocates_1(Is, [I|Acc0]); - Live when is_integer(Live) -> - Info = safe_info(Info0), - A = {set,[],[],{alloc,Live,Info}}, - move_allocates_1(Is, [A,I|Acc]) - end - end; -move_allocates_1([I|Is], Acc) -> - move_allocates_1(Is, [I|Acc]); -move_allocates_1([], Acc) -> Acc. - -alloc_may_pass({set,_,[{fr,_}],fmove}) -> false; -alloc_may_pass({set,_,_,{alloc,_,_}}) -> false; -alloc_may_pass({set,_,_,{set_tuple_element,_}}) -> false; -alloc_may_pass({set,_,_,put_list}) -> false; -alloc_may_pass({set,_,_,put}) -> false; -alloc_may_pass({set,_,_,_}) -> true. - -safe_info({nozero,Stack,Heap,_}) -> - %% nozero is not safe if the allocation instruction is moved - %% upwards past an instruction that may throw an exception - %% (such as element/2). - {zero,Stack,Heap,[]}; -safe_info(Info) -> Info. - -%% opt([Instruction]) -> [Instruction] -%% Optimize the instruction stream inside a basic block. - -opt([{set,[X],[X],move}|Is]) -> opt(Is); -opt([{set,[Dst],_,move},{set,[Dst],[Src],move}=I|Is]) when Dst =/= Src -> - opt([I|Is]); -opt([{set,[{x,0}],[S1],move}=I1,{set,[D2],[{x,0}],move}|Is]) -> - opt([I1,{set,[D2],[S1],move}|Is]); -opt([{set,[{x,0}],[S1],move}=I1,{set,[D2],[S2],move}|Is0]) when S1 =/= D2 -> - %% Place move S x0 at the end of move sequences so that - %% loader can merge with the following instruction - {Ds,Is} = opt_moves([D2], Is0), - [{set,Ds,[S2],move}|opt([I1|Is])]; -opt([{set,_,_,{line,_}}=Line1, - {set,[D1],[{integer,Idx1},Reg],{bif,element,{f,0}}}=I1, - {set,_,_,{line,_}}=Line2, - {set,[D2],[{integer,Idx2},Reg],{bif,element,{f,0}}}=I2|Is]) - when Idx1 < Idx2, D1 =/= D2, D1 =/= Reg, D2 =/= Reg -> - opt([Line2,I2,Line1,I1|Is]); -opt([{set,[D1],[{integer,Idx1},Reg],{bif,element,{f,L}}}=I1, - {set,[D2],[{integer,Idx2},Reg],{bif,element,{f,L}}}=I2|Is]) - when Idx1 < Idx2, D1 =/= D2, D1 =/= Reg, D2 =/= Reg -> - opt([I2,I1|Is]); -opt([{set,Hd0,Cons,get_hd}=GetHd, - {set,Tl0,Cons,get_tl}=GetTl|Is0]) -> - case {opt_moves(Hd0, [GetTl|Is0]),opt_moves(Tl0, [GetHd|Is0])} of - {{Hd0,Is},{Tl0,_}} -> - [GetHd|opt(Is)]; - {{Hd,Is},{Tl0,_}} -> - [{set,Hd,Cons,get_hd}|opt(Is)]; - {{_,_},{Tl,Is}} -> - [{set,Tl,Cons,get_tl}|opt(Is)] - end; -opt([{set,Ds0,Ss,Op}|Is0]) -> - {Ds,Is} = opt_moves(Ds0, Is0), - [{set,Ds,Ss,Op}|opt(Is)]; -opt([{'%anno',_}=I|Is]) -> - [I|opt(Is)]; -opt([]) -> []. - -%% opt_moves([Dest], [Instruction]) -> {[Dest],[Instruction]} -%% For each Dest, does the optimization described in opt_move/2. - -opt_moves([], Is0) -> {[],Is0}; -opt_moves([D0]=Ds, Is0) -> - case opt_move(D0, Is0) of - not_possible -> {Ds,Is0}; - {D1,Is} -> {[D1],Is} - end. - -%% opt_move(Dest, [Instruction]) -> {UpdatedDest,[Instruction]} | not_possible -%% If there is a {move,Dest,FinalDest} instruction -%% in the instruction stream, remove the move instruction -%% and let FinalDest be the destination. - -opt_move(Dest, Is) -> - opt_move_1(Dest, Is, []). - -opt_move_1(R, [{set,[D],[R],move}|Is0], Acc) -> - %% Provided that the source register is killed by instructions - %% that follow, the optimization is safe. - case eliminate_use_of_from_reg(Is0, R, D) of - {yes,Is} -> opt_move_rev(D, Acc, Is); - no -> not_possible - end; -opt_move_1(_R, [{set,_,_,{alloc,_,_}}|_], _) -> - %% The optimization is either not possible or not safe. - %% - %% If R is an X register killed by allocation, the optimization is - %% not safe. On the other hand, if the X register is killed, there - %% will not follow a 'move' instruction with this X register as - %% the source. - %% - %% If R is a Y register, the optimization is still not safe - %% because the new target register is an X register that cannot - %% safely pass the alloc instruction. - not_possible; -opt_move_1(R, [{set,_,_,_}=I|Is], Acc) -> - %% If the source register is either killed or used by this - %% instruction, the optimimization is not possible. - case is_killed_or_used(R, I) of - true -> not_possible; - false -> opt_move_1(R, Is, [I|Acc]) - end; -opt_move_1(_, _, _) -> - not_possible. - -%% opt_tuple_element([Instruction]) -> [Instruction] -%% If possible, move get_tuple_element instructions forward -%% in the instruction stream to a move instruction, eliminating -%% the move instruction. Example: -%% -%% get_tuple_element Tuple Pos Dst1 -%% ... -%% move Dst1 Dst2 -%% -%% This code may be possible to rewrite to: -%% -%% %%(Moved get_tuple_element instruction) -%% ... -%% get_tuple_element Tuple Pos Dst2 -%% - -opt_tuple_element([{set,[D],[S],{get_tuple_element,_}}=I|Is0]) -> - case opt_tuple_element_1(Is0, I, {S,D}, []) of - no -> - [I|opt_tuple_element(Is0)]; - {yes,Is} -> - opt_tuple_element(Is) - end; -opt_tuple_element([I|Is]) -> - [I|opt_tuple_element(Is)]; -opt_tuple_element([]) -> []. - -opt_tuple_element_1([{set,_,_,{alloc,_,_}}|_], _, _, _) -> - no; -opt_tuple_element_1([{set,_,_,{try_catch,_,_}}|_], _, _, _) -> - no; -opt_tuple_element_1([{set,[D],[S],move}|Is0], I0, {_,S}, Acc) -> - case eliminate_use_of_from_reg(Is0, S, D) of - no -> - no; - {yes,Is1} -> - {set,[S],Ss,Op} = I0, - I = {set,[D],Ss,Op}, - case opt_move_rev(S, Acc, [I|Is1]) of - not_possible -> - %% Not safe because the move of the - %% get_tuple_element instruction would cause the - %% result of a previous instruction to be ignored. - no; - {_,Is} -> - {yes,Is} - end - end; -opt_tuple_element_1([{set,Ds,Ss,_}=I|Is], MovedI, {S,D}=Regs, Acc) -> - case member(S, Ds) orelse member(D, Ss) of - true -> - no; - false -> - opt_tuple_element_1(Is, MovedI, Regs, [I|Acc]) - end; -opt_tuple_element_1(_, _, _, _) -> no. - -%% Reverse the instructions, while checking that there are no -%% instructions that would interfere with using the new destination -%% register (D). - -opt_move_rev(D, [I|Is], Acc) -> - case is_killed_or_used(D, I) of - true -> not_possible; - false -> opt_move_rev(D, Is, [I|Acc]) - end; -opt_move_rev(D, [], Acc) -> {D,Acc}. - -%% is_killed_or_used(Register, {set,_,_,_}) -> bool() -%% Test whether the register is used by the instruction. - -is_killed_or_used(R, {set,Ss,Ds,_}) -> - member(R, Ds) orelse member(R, Ss). - -%% eliminate_use_of_from_reg([Instruction], FromRegister, ToRegister, Acc) -> -%% {yes,Is} | no -%% Eliminate any use of FromRegister in the instruction sequence -%% by replacing uses of FromRegister with ToRegister. If FromRegister -%% is referenced by an allocation instruction, return 'no' to indicate -%% that FromRegister is still used and that the optimization is not -%% possible. - -eliminate_use_of_from_reg(Is, From, To) -> - try - eliminate_use_of_from_reg(Is, From, To, []) - catch - throw:not_possible -> - no - end. - -eliminate_use_of_from_reg([{set,_,_,{alloc,Live,_}}|_]=Is0, {x,X}, _, Acc) -> - if - X < Live -> - no; - true -> - {yes,reverse(Acc, Is0)} - end; -eliminate_use_of_from_reg([{set,Ds,Ss0,Op}=I0|Is], From, To, Acc) -> - ensure_safe_tuple(I0, To), - I = case member(From, Ss0) of - true -> - Ss = [case S of - From -> To; - _ -> S - end || S <- Ss0], - {set,Ds,Ss,Op}; - false -> - I0 - end, - case member(From, Ds) of - true -> - {yes,reverse(Acc, [I|Is])}; - false -> - case member(To, Ds) of - true -> - case beam_utils:is_killed_block(From, Is) of - true -> - {yes,reverse(Acc, [I|Is])}; - false -> - no - end; - false -> - eliminate_use_of_from_reg(Is, From, To, [I|Acc]) - end - end; -eliminate_use_of_from_reg([I]=Is, From, _To, Acc) -> - case beam_utils:is_killed_block(From, [I]) of - true -> - {yes,reverse(Acc, Is)}; - false -> - no - end. - -ensure_safe_tuple({set,[To],[],{put_tuple,_}}, To) -> - throw(not_possible); -ensure_safe_tuple(_, _) -> ok. - -%% opt_allocs(Instructions) -> Instructions. Optimize allocate -%% instructions inside blocks. If safe, replace an allocate_zero -%% instruction with the slightly cheaper allocate instruction. - -opt_allocs(Is) -> - D = beam_utils:index_labels(Is), - opt_allocs_1(Is, D). - -opt_allocs_1([{block,Bl0}|Is], D) -> - Bl = opt_alloc(Bl0, {D,Is}), - [{block,Bl}|opt_allocs_1(Is, D)]; -opt_allocs_1([I|Is], D) -> - [I|opt_allocs_1(Is, D)]; -opt_allocs_1([], _) -> []. - -%% opt_alloc(Instructions) -> Instructions' -%% Optimises all allocate instructions. - -opt_alloc([{set,[],[],{alloc,Live0,Info0}}, - {set,[],[],{alloc,Live,Info}}|Is], D) -> - Live = Live0, %Assertion. - Alloc = combine_alloc(Info0, Info), - I = {set,[],[],{alloc,Live,Alloc}}, - opt_alloc([I|Is], D); -opt_alloc([{set,[],[],{alloc,R,{_,Ns,Nh,[]}}}|Is], D) -> - [{set,[],[],opt_alloc(Is, D, Ns, Nh, R)}|Is]; -opt_alloc([I|Is], D) -> [I|opt_alloc(Is, D)]; -opt_alloc([], _) -> []. - -combine_alloc({_,Ns,Nh1,Init}, {_,nostack,Nh2,[]}) -> - {zero,Ns,beam_utils:combine_heap_needs(Nh1, Nh2),Init}. - -%% opt_alloc(Instructions, FrameSize, HeapNeed, LivingRegs) -> [Instr] -%% Generates the optimal sequence of instructions for -%% allocating and initalizing the stack frame and needed heap. - -opt_alloc(_Is, _D, nostack, Nh, LivingRegs) -> - {alloc,LivingRegs,{nozero,nostack,Nh,[]}}; -opt_alloc(Bl, {D,OuterIs}, Ns, Nh, LivingRegs) -> - Is = [{block,Bl}|OuterIs], - InitRegs = init_yregs(Ns, Is, D), - case count_ones(InitRegs) of - N when N*2 > Ns -> - {alloc,LivingRegs,{nozero,Ns,Nh,gen_init(Ns, InitRegs)}}; - _ -> - {alloc,LivingRegs,{zero,Ns,Nh,[]}} - end. - -gen_init(Fs, Regs) -> gen_init(Fs, Regs, 0, []). - -gen_init(SameFs, _Regs, SameFs, Acc) -> reverse(Acc); -gen_init(Fs, Regs, Y, Acc) when Regs band 1 =:= 0 -> - gen_init(Fs, Regs bsr 1, Y+1, [{init,{y,Y}}|Acc]); -gen_init(Fs, Regs, Y, Acc) -> - gen_init(Fs, Regs bsr 1, Y+1, Acc). - -init_yregs(Y, Is, D) when Y >= 0 -> - case beam_utils:is_killed({y,Y}, Is, D) of - true -> - (1 bsl Y) bor init_yregs(Y-1, Is, D); - false -> - init_yregs(Y-1, Is, D) - end; -init_yregs(_, _, _) -> 0. - -count_ones(Bits) -> count_ones(Bits, 0). -count_ones(0, Acc) -> Acc; -count_ones(Bits, Acc) -> - count_ones(Bits bsr 1, Acc + (Bits band 1)). - -%% Calculate the new number of live registers when we move an allocate -%% instruction upwards, passing a 'set' instruction. - -alloc_live_regs({set,Ds,Ss,_}, Is, Regs0) -> - Rset = x_live(Ss, x_dead(Ds, (1 bsl Regs0)-1)), - Live = live_regs(0, Rset), - case ensure_contiguous(Rset, Live) of - not_possible -> - %% Liveness information (looking forward in the - %% instruction stream) can't prove that moving this - %% allocation instruction is safe. Now use the annotation - %% of defined registers at the beginning of the current - %% block to see whether moving would be safe. - Def0 = defined_regs(Is, 0), - Def = Def0 band ((1 bsl Live) - 1), - ensure_contiguous(Rset bor Def, Live); - Live -> - %% Safe based on liveness information. - Live - end. - -live_regs(N, 0) -> - N; -live_regs(N, Regs) -> - live_regs(N+1, Regs bsr 1). - -ensure_contiguous(Regs, Live) -> - case (1 bsl Live) - 1 of - Regs -> Live; - _ -> not_possible - end. - -x_dead([{x,N}|Rs], Regs) -> x_dead(Rs, Regs band (bnot (1 bsl N))); -x_dead([_|Rs], Regs) -> x_dead(Rs, Regs); -x_dead([], Regs) -> Regs. - -x_live([{x,N}|Rs], Regs) -> x_live(Rs, Regs bor (1 bsl N)); -x_live([_|Rs], Regs) -> x_live(Rs, Regs); -x_live([], Regs) -> Regs. - -%% defined_regs(ReversedInstructions) -> RegBitmap. -%% Given a reversed instruction stream, determine the -%% the registers that are defined. - -defined_regs([{'%anno',{def,Def}}|_], Regs) -> - Def bor Regs; -defined_regs([{set,Ds,_,{alloc,Live,_}}|_], Regs) -> - x_live(Ds, Regs bor ((1 bsl Live) - 1)); -defined_regs([{set,Ds,_,_}|Is], Regs) -> - defined_regs(Is, x_live(Ds, Regs)). - -%%% -%%% Do local common sub expression elimination (CSE) in each block. -%%% - -local_cse([{block,Bl0}|Is]) -> - Bl = cse_block(Bl0, orddict:new(), []), - [{block,Bl}|local_cse(Is)]; -local_cse([I|Is]) -> - [I|local_cse(Is)]; -local_cse([]) -> []. - -cse_block([I|Is], Es0, Acc0) -> - Es1 = cse_clear(I, Es0), - case cse_expr(I) of - none -> - %% Instruction is not suitable for CSE. - cse_block(Is, Es1, [I|Acc0]); - {ok,D,Expr} -> - %% Suitable instruction. First update the dictionary of - %% suitable expressions for the next iteration. - Es = cse_add(D, Expr, Es1), - - %% Search for a previous identical expression. - case cse_find(Expr, Es0) of - error -> - %% Nothing found - cse_block(Is, Es, [I|Acc0]); - Src -> - %% Use the previously calculated result. - %% Also eliminate any line instruction. - Move = {set,[D],[Src],move}, - case Acc0 of - [{set,_,_,{line,_}}|Acc] -> - cse_block(Is, Es, [Move|Acc]); - [_|_] -> - cse_block(Is, Es, [Move|Acc0]) - end - end - end; -cse_block([], _, Acc) -> - reverse(Acc). - -%% cse_find(Expr, Expressions) -> error | Register. -%% Find a previously evaluated expression whose result can be reused, -%% or return 'error' if no such expression is found. - -cse_find(Expr, Es) -> - case orddict:find(Expr, Es) of - {ok,{Src,_}} -> Src; - error -> error - end. - -cse_expr({set,[D],Ss,{bif,N,_}}) -> - case D of - {fr,_} -> - %% There are too many things that can go wrong. - none; - _ -> - {ok,D,{{bif,N},Ss}} - end; -cse_expr({set,[D],Ss,{alloc,_,{gc_bif,N,_}}}) -> - {ok,D,{{gc_bif,N},Ss}}; -cse_expr({set,[D],Ss,put_list}) -> - {ok,D,{put_list,Ss}}; -cse_expr(_) -> none. - -%% cse_clear(Instr, Expressions0) -> Expressions. -%% Remove all previous expressions that will become -%% invalid when this instruction is executed. Basically, -%% an expression is no longer safe to reuse when the -%% register it has been stored to has been modified, killed, -%% or if any of the source operands have changed. - -cse_clear({set,Ds,_,{alloc,Live,_}}, Es) -> - cse_clear_1(Es, Live, Ds); -cse_clear({set,Ds,_,_}, Es) -> - cse_clear_1(Es, all, Ds). - -cse_clear_1(Es, Live, Ds0) -> - Ds = ordsets:from_list(Ds0), - [E || E <- Es, cse_is_safe(E, Live, Ds)]. - -cse_is_safe({_,{Dst,Interfering}}, Live, Ds) -> - ordsets:is_disjoint(Interfering, Ds) andalso - case Dst of - {x,X} -> - X < Live; - _ -> - true - end. - -%% cse_add(Dest, Expr, Expressions0) -> Expressions. -%% Provided that it is safe, add a new expression to the dictionary -%% of already evaluated expressions. - -cse_add(D, {_,Ss}=Expr, Es) -> - case member(D, Ss) of - false -> - Interfering = ordsets:from_list([D|Ss]), - orddict:store(Expr, {D,Interfering}, Es); - true -> - %% Unsafe because the instruction overwrites one of - %% source operands. - Es - end. diff --git a/lib/compiler/src/beam_utils.erl b/lib/compiler/src/beam_utils.erl index 317252e87a..af1e719206 100644 --- a/lib/compiler/src/beam_utils.erl +++ b/lib/compiler/src/beam_utils.erl @@ -21,18 +21,16 @@ %% -module(beam_utils). --export([is_killed_block/2,is_killed/3,is_killed_at/3, - is_not_used/3,usage/3, +-export([is_killed/3,is_killed_at/3,is_not_used/3, empty_label_index/0,index_label/3,index_labels/1,replace_labels/4, code_at/2,bif_to_test/3,is_pure_test/1, - live_opt/1,delete_annos/1,combine_heap_needs/2, - anno_defs/1, + combine_heap_needs/2, split_even/1 ]). -export_type([code_index/0,module_code/0,instruction/0]). --import(lists, [flatmap/2,map/2,member/2,sort/1,reverse/1,splitwith/2]). +-import(lists, [flatmap/2,map/2,member/2,sort/1,reverse/1]). -define(is_const(Val), (Val =:= nil orelse element(1, Val) =:= integer orelse @@ -62,46 +60,6 @@ {lbl :: code_index(), %Label to code index. res :: result_cache()}). %Result cache for each label. -%% usage(Register, [Instruction], State) -> killed|not_used|used. -%% Determine the usage of Register in the instruction sequence. -%% The return value is one of: -%% -%% killed - The register is not used in any way. -%% not_used - The register is referenced only by an allocating instruction -%% (the actual value does not matter). -%% used - The register is used (its value do matter). - --spec usage(beam_asm:reg(), [instruction()], code_index()) -> - 'killed' | 'not_used' | 'used'. - -usage(R, Is, D) -> - St = #live{lbl=D,res=gb_trees:empty()}, - {Usage,_} = check_liveness(R, Is, St), - Usage. - - -%% is_killed_block(Register, [Instruction]) -> true|false -%% Determine whether a register is killed by the instruction sequence inside -%% a block. -%% -%% If true is returned, it means that the register will not be -%% referenced in ANY way (not even indirectly by an allocate instruction); -%% i.e. it is OK to enter the instruction sequence with Register -%% containing garbage. - --spec is_killed_block(beam_asm:reg(), [instruction()]) -> boolean(). - -is_killed_block({x,X}, [{set,_,_,{alloc,Live,_}}|_]) -> - X >= Live; -is_killed_block(R, [{set,Ds,Ss,_Op}|Is]) -> - not member(R, Ss) andalso (member(R, Ds) orelse is_killed_block(R, Is)); -is_killed_block(R, [{'%anno',{used,Regs}}|Is]) -> - case R of - {x,X} when (Regs bsr X) band 1 =:= 0 -> true; - _ -> is_killed_block(R, Is) - end; -is_killed_block(_, []) -> false. - %% is_killed(Register, [Instruction], State) -> true|false %% Determine whether a register is killed by the instruction sequence. %% If true is returned, it means that the register will not be @@ -261,42 +219,6 @@ is_pure_test({test,is_function2,_,[_,_]}) -> true; is_pure_test({test,Op,_,Ops}) -> erl_internal:new_type_test(Op, length(Ops)). - -%% live_opt([Instruction]) -> [Instruction]. -%% Go through the instruction sequence in reverse execution -%% order, keep track of liveness and remove 'move' instructions -%% whose destination is a register that will not be used. -%% Also insert {used,Regs} annotations at the beginning -%% and end of each block. - --spec live_opt([instruction()]) -> [instruction()]. - -live_opt(Is0) -> - {[{label,Fail}|_]=Bef,[Fi|Is]} = - splitwith(fun({func_info,_,_,_}) -> false; - (_) -> true - end, Is0), - {func_info,_,_,Live} = Fi, - D = gb_trees:insert(Fail, live_call(Live), gb_trees:empty()), - Bef ++ [Fi|live_opt(reverse(Is), 0, D, [])]. - - -%% delete_annos([Instruction]) -> [Instruction]. -%% Delete all annotations. - --spec delete_annos([instruction()]) -> [instruction()]. - -delete_annos([{block,Bl0}|Is]) -> - case delete_annos(Bl0) of - [] -> delete_annos(Is); - [_|_]=Bl -> [{block,Bl}|delete_annos(Is)] - end; -delete_annos([{'%anno',_}|Is]) -> - delete_annos(Is); -delete_annos([I|Is]) -> - [I|delete_annos(Is)]; -delete_annos([]) -> []. - %% combine_heap_needs(HeapNeed1, HeapNeed2) -> HeapNeed %% Combine the heap need for two allocation instructions. @@ -310,24 +232,6 @@ combine_heap_needs(H1, H2) when is_integer(H1), is_integer(H2) -> combine_heap_needs(H1, H2) -> {alloc,combine_alloc_lists([H1,H2])}. - -%% anno_defs(Instructions) -> Instructions' -%% Add {def,RegisterBitmap} annotations to the beginning of -%% each block. Iff bit X is set in the the bitmap, it means -%% that {x,X} is defined when the block is entered. - --spec anno_defs([instruction()]) -> [instruction()]. - -anno_defs(Is0) -> - {Bef,[Fi|Is1]} = - splitwith(fun({func_info,_,_,_}) -> false; - (_) -> true - end, Is0), - {func_info,_,_,Arity} = Fi, - Regs = init_def_regs(Arity), - Is = defs(Is1, Regs, #{}), - Bef ++ [Fi|Is]. - %% split_even/1 %% [1,2,3,4,5,6] -> {[1,3,5],[2,4,6]} @@ -846,466 +750,7 @@ combine_alloc_lists(Al0) -> %% live_opt/4. -%% Bit syntax instructions. -live_opt([{bs_context_to_binary,Src}=I|Is], Regs0, D, Acc) -> - Regs = x_live([Src], Regs0), - live_opt(Is, Regs, D, [I|Acc]); -live_opt([{bs_init,Fail,_,none,Ss,Dst}=I|Is], Regs0, D, Acc) -> - Regs1 = x_live(Ss, x_dead([Dst], Regs0)), - Regs = live_join_label(Fail, D, Regs1), - live_opt(Is, Regs, D, [I|Acc]); -live_opt([{bs_init,Fail,Info,Live0,Ss,Dst}|Is], Regs0, D, Acc) -> - Regs1 = x_dead([Dst], Regs0), - Live = live_regs(Regs1), - true = Live =< Live0, %Assertion. - Regs2 = live_call(Live), - Regs3 = x_live(Ss, Regs2), - Regs = live_join_label(Fail, D, Regs3), - I = {bs_init,Fail,Info,Live,Ss,Dst}, - live_opt(Is, Regs, D, [I|Acc]); -live_opt([{bs_put,Fail,_,Ss}=I|Is], Regs0, D, Acc) -> - Regs1 = x_live(Ss, Regs0), - Regs = live_join_label(Fail, D, Regs1), - live_opt(Is, Regs, D, [I|Acc]); -live_opt([{bs_restore2,Src,_}=I|Is], Regs0, D, Acc) -> - Regs = x_live([Src], Regs0), - live_opt(Is, Regs, D, [I|Acc]); -live_opt([{bs_save2,Src,_}=I|Is], Regs0, D, Acc) -> - Regs = x_live([Src], Regs0), - live_opt(Is, Regs, D, [I|Acc]); -live_opt([{test,bs_start_match2,Fail,Live,[Src,_],_}=I|Is], _, D, Acc) -> - Regs0 = live_call(Live), - Regs1 = x_live([Src], Regs0), - Regs = live_join_label(Fail, D, Regs1), - live_opt(Is, Regs, D, [I|Acc]); - -%% Other instructions. -live_opt([{block,Bl0}|Is], Regs0, D, Acc) -> - Live0 = make_anno({used,Regs0}), - {Bl,Regs} = live_opt_block(reverse(Bl0), Regs0, D, [Live0]), - Live = make_anno({used,Regs}), - live_opt(Is, Regs, D, [{block,[Live|Bl]}|Acc]); -live_opt([build_stacktrace=I|Is], _, D, Acc) -> - live_opt(Is, live_call(1), D, [I|Acc]); -live_opt([raw_raise=I|Is], _, D, Acc) -> - live_opt(Is, live_call(3), D, [I|Acc]); -live_opt([{label,L}=I|Is], Regs, D0, Acc) -> - D = gb_trees:insert(L, Regs, D0), - live_opt(Is, Regs, D, [I|Acc]); -live_opt([{jump,{f,L}}=I|Is], _, D, Acc) -> - Regs = gb_trees:get(L, D), - live_opt(Is, Regs, D, [I|Acc]); -live_opt([return=I|Is], _, D, Acc) -> - live_opt(Is, 1, D, [I|Acc]); -live_opt([{catch_end,_}=I|Is], _, D, Acc) -> - live_opt(Is, live_call(1), D, [I|Acc]); -live_opt([{badmatch,Src}=I|Is], _, D, Acc) -> - Regs = x_live([Src], 0), - live_opt(Is, Regs, D, [I|Acc]); -live_opt([{case_end,Src}=I|Is], _, D, Acc) -> - Regs = x_live([Src], 0), - live_opt(Is, Regs, D, [I|Acc]); -live_opt([{try_case_end,Src}=I|Is], _, D, Acc) -> - Regs = x_live([Src], 0), - live_opt(Is, Regs, D, [I|Acc]); -live_opt([if_end=I|Is], _, D, Acc) -> - Regs = 0, - live_opt(Is, Regs, D, [I|Acc]); -live_opt([{call,Arity,_}=I|Is], _, D, Acc) -> - live_opt(Is, live_call(Arity), D, [I|Acc]); -live_opt([{call_ext,Arity,_}=I|Is], _, D, Acc) -> - live_opt(Is, live_call(Arity), D, [I|Acc]); -live_opt([{call_fun,Arity}=I|Is], _, D, Acc) -> - live_opt(Is, live_call(Arity+1), D, [I|Acc]); -live_opt([{apply,Arity}=I|Is], _, D, Acc) -> - live_opt(Is, live_call(Arity+2), D, [I|Acc]); -live_opt([{make_fun2,_,_,_,Arity}=I|Is], _, D, Acc) -> - live_opt(Is, live_call(Arity), D, [I|Acc]); -live_opt([{test,_,Fail,Ss}=I|Is], Regs0, D, Acc) -> - Regs1 = x_live(Ss, Regs0), - Regs = live_join_label(Fail, D, Regs1), - live_opt(Is, Regs, D, [I|Acc]); -live_opt([{test,_,Fail,Live,Ss,_}=I|Is], _, D, Acc) -> - Regs0 = live_call(Live), - Regs1 = x_live(Ss, Regs0), - Regs = live_join_label(Fail, D, Regs1), - live_opt(Is, Regs, D, [I|Acc]); -live_opt([{select,_,Src,Fail,List}=I|Is], _, D, Acc) -> - Regs0 = 0, - Regs1 = x_live([Src], Regs0), - Regs = live_join_labels([Fail|List], D, Regs1), - live_opt(Is, Regs, D, [I|Acc]); -live_opt([{try_case,Y}=I|Is], Regs0, D, Acc) -> - Regs = live_call(1), - case Regs0 of - 0 -> - live_opt(Is, Regs, D, [{try_end,Y}|Acc]); - _ -> - live_opt(Is, live_call(1), D, [I|Acc]) - end; -live_opt([{loop_rec,_Fail,_Dst}=I|Is], _, D, Acc) -> - live_opt(Is, 0, D, [I|Acc]); -live_opt([timeout=I|Is], _, D, Acc) -> - live_opt(Is, 0, D, [I|Acc]); -live_opt([{wait,_}=I|Is], _, D, Acc) -> - live_opt(Is, 0, D, [I|Acc]); -live_opt([{get_map_elements,Fail,Src,{list,List}}=I|Is], Regs0, D, Acc) -> - {Ss,Ds} = split_even(List), - Regs1 = x_live([Src|Ss], x_dead(Ds, Regs0)), - Regs = live_join_label(Fail, D, Regs1), - live_opt(Is, Regs, D, [I|Acc]); -live_opt([{gc_bif,N,F,R,As,Dst}=I|Is], Regs0, D, Acc) -> - Bl = [{set,[Dst],As,{alloc,R,{gc_bif,N,F}}}], - {_,Regs} = live_opt_block(Bl, Regs0, D, []), - live_opt(Is, Regs, D, [I|Acc]); -live_opt([{bif,N,F,As,Dst}=I|Is], Regs0, D, Acc) -> - Bl = [{set,[Dst],As,{bif,N,F}}], - {_,Regs} = live_opt_block(Bl, Regs0, D, []), - live_opt(Is, Regs, D, [I|Acc]); -live_opt([{get_tuple_element,Src,Idx,Dst}=I|Is], Regs0, D, Acc) -> - Bl = [{set,[Dst],[Src],{get_tuple_element,Idx}}], - {_,Regs} = live_opt_block(Bl, Regs0, D, []), - live_opt(Is, Regs, D, [I|Acc]); -live_opt([{move,Src,Dst}=I|Is], Regs0, D, Acc) -> - Regs = x_live([Src], x_dead([Dst], Regs0)), - live_opt(Is, Regs, D, [I|Acc]); -live_opt([{put_map,F,Op,S,Dst,R,{list,Puts}}=I|Is], Regs0, D, Acc) -> - Bl = [{set,[Dst],[S|Puts],{alloc,R,{put_map,Op,F}}}], - {_,Regs} = live_opt_block(Bl, Regs0, D, []), - live_opt(Is, Regs, D, [I|Acc]); - -%% Transparent instructions - they neither use nor modify x registers. -live_opt([{deallocate,_}=I|Is], Regs, D, Acc) -> - live_opt(Is, Regs, D, [I|Acc]); -live_opt([{kill,_}=I|Is], Regs, D, Acc) -> - live_opt(Is, Regs, D, [I|Acc]); -live_opt([{try_end,_}=I|Is], Regs, D, Acc) -> - live_opt(Is, Regs, D, [I|Acc]); -live_opt([{loop_rec_end,_}=I|Is], Regs, D, Acc) -> - live_opt(Is, Regs, D, [I|Acc]); -live_opt([{wait_timeout,_,nil}=I|Is], Regs, D, Acc) -> - live_opt(Is, Regs, D, [I|Acc]); -live_opt([{wait_timeout,_,{Tag,_}}=I|Is], Regs, D, Acc) when Tag =/= x -> - live_opt(Is, Regs, D, [I|Acc]); -live_opt([{line,_}=I|Is], Regs, D, Acc) -> - live_opt(Is, Regs, D, [I|Acc]); -live_opt([{'catch',_,_}=I|Is], Regs, D, Acc) -> - live_opt(Is, Regs, D, [I|Acc]); -live_opt([{'try',_,_}=I|Is], Regs, D, Acc) -> - live_opt(Is, Regs, D, [I|Acc]); - -%% The following instructions can occur if the "compilation" has been -%% started from a .S file using the 'from_asm' option. -live_opt([{trim,_,_}=I|Is], Regs, D, Acc) -> - live_opt(Is, Regs, D, [I|Acc]); -live_opt([{'%',_}=I|Is], Regs, D, Acc) -> - live_opt(Is, Regs, D, [I|Acc]); -live_opt([{recv_set,_}=I|Is], Regs, D, Acc) -> - live_opt(Is, Regs, D, [I|Acc]); -live_opt([{recv_mark,_}=I|Is], Regs, D, Acc) -> - live_opt(Is, Regs, D, [I|Acc]); - -live_opt([], _, _, Acc) -> Acc. - -live_opt_block([{set,[{x,X}]=Ds,Ss,move}=I|Is], Regs0, D, Acc) -> - Regs = x_live(Ss, x_dead(Ds, Regs0)), - case is_live(X, Regs0) of - true -> - live_opt_block(Is, Regs, D, [I|Acc]); - false -> - %% Useless move, will never be used. - live_opt_block(Is, Regs, D, Acc) - end; -live_opt_block([{set,Ds,Ss,{alloc,Live0,AllocOp}}|Is], Regs0, D, Acc) -> - %% Calculate liveness from the point of view of the GC. - %% There will never be a GC if the instruction fails, so we should - %% ignore the failure branch. - GcRegs1 = x_dead(Ds, Regs0), - GcRegs = x_live(Ss, GcRegs1), - Live = live_regs(GcRegs), - - %% The life-time analysis used by the code generator is sometimes too - %% conservative, so it may be possible to lower the number of live - %% registers based on the exact liveness information. The main benefit is - %% that more optimizations that depend on liveness information (such as the - %% beam_dead pass) may be applied. - true = Live =< Live0, %Assertion. - I = {set,Ds,Ss,{alloc,Live,AllocOp}}, - - %% Calculate liveness from the point of view of the preceding instruction. - %% The liveness is the union of live registers in the GC and the live - %% registers at the failure label. - Regs1 = live_call(Live), - Regs = live_join_alloc(AllocOp, D, Regs1), - live_opt_block(Is, Regs, D, [I|Acc]); -live_opt_block([{set,Ds,Ss,{bif,_,Fail}}=I|Is], Regs0, D, Acc) -> - Regs1 = x_dead(Ds, Regs0), - Regs2 = x_live(Ss, Regs1), - Regs = live_join_label(Fail, D, Regs2), - live_opt_block(Is, Regs, D, [I|Acc]); -live_opt_block([{set,Ds,Ss,_}=I|Is], Regs0, D, Acc) -> - Regs = x_live(Ss, x_dead(Ds, Regs0)), - live_opt_block(Is, Regs, D, [I|Acc]); -live_opt_block([{'%anno',_}|Is], Regs, D, Acc) -> - live_opt_block(Is, Regs, D, Acc); -live_opt_block([], Regs, _, Acc) -> {Acc,Regs}. - -live_join_alloc({Kind,_Name,Fail}, D, Regs) when Kind =:= gc_bif; Kind =:= put_map -> - live_join_label(Fail, D, Regs); -live_join_alloc(_, _, Regs) -> Regs. - -live_join_labels([{f,L}|T], D, Regs0) when L =/= 0 -> - Regs = gb_trees:get(L, D) bor Regs0, - live_join_labels(T, D, Regs); -live_join_labels([_|T], D, Regs) -> - live_join_labels(T, D, Regs); -live_join_labels([], _, Regs) -> Regs. - -live_join_label({f,0}, _, Regs) -> - Regs; -live_join_label({f,L}, D, Regs) -> - gb_trees:get(L, D) bor Regs. - -live_call(Live) -> (1 bsl Live) - 1. - -live_regs(Regs) -> - live_regs_1(0, Regs). - -live_regs_1(N, 0) -> N; -live_regs_1(N, Regs) -> live_regs_1(N+1, Regs bsr 1). - -x_dead([{x,N}|Rs], Regs) -> x_dead(Rs, Regs band (bnot (1 bsl N))); -x_dead([_|Rs], Regs) -> x_dead(Rs, Regs); -x_dead([], Regs) -> Regs. - -x_live([{x,N}|Rs], Regs) -> x_live(Rs, Regs bor (1 bsl N)); -x_live([_|Rs], Regs) -> x_live(Rs, Regs); -x_live([], Regs) -> Regs. - -is_live(X, Regs) -> ((Regs bsr X) band 1) =:= 1. - split_even([], Ss, Ds) -> {reverse(Ss),reverse(Ds)}; split_even([S,D|Rs], Ss, Ds) -> split_even(Rs, [S|Ss], [D|Ds]). - -%%% -%%% Add annotations for defined registers. -%%% -%%% This analysis is done by scanning the instructions in -%%% execution order. -%%% - -defs([{apply,_}=I|Is], _Regs, D) -> - [I|defs(Is, 1, D)]; -defs([{bif,_,{f,Fail},_Src,Dst}=I|Is], Regs0, D) -> - Regs = def_regs([Dst], Regs0), - [I|defs(Is, Regs, update_regs(Fail, Regs0, D))]; -defs([{block,Block0}|Is], Regs0, D0) -> - {Block,Regs,D} = defs_list(Block0, Regs0, D0), - [{block,[make_anno({def,Regs0})|Block]}|defs(Is, Regs, D)]; -defs([{bs_init,{f,L},_,Live,_,Dst}=I|Is], Regs0, D) -> - Regs1 = case Live of - none -> Regs0; - _ -> init_def_regs(Live) - end, - Regs = def_regs([Dst], Regs1), - [I|defs(Is, Regs, update_regs(L, Regs, D))]; -defs([{bs_put,{f,L},_,_}=I|Is], Regs, D) -> - [I|defs(Is, Regs, update_regs(L, Regs, D))]; -defs([build_stacktrace=I|Is], _Regs, D) -> - [I|defs(Is, 1, D)]; -defs([{call,_,_}=I|Is], _Regs, D) -> - [I|defs(Is, 1, D)]; -defs([{call_ext,_,{extfunc,M,F,A}}=I|Is], _Regs, D) -> - case erl_bifs:is_exit_bif(M, F, A) of - false -> - [I|defs(Is, 1, D)]; - true -> - [I|defs_unreachable(Is, D)] - end; -defs([{call_ext,_,_}=I|Is], _Regs, D) -> - [I|defs(Is, 1, D)]; -defs([{call_fun,_}=I|Is], _Regs, D) -> - [I|defs(Is, 1, D)]; -defs([{'catch',_,{f,L}}=I|Is], Regs, D) -> - RegsAtLabel = init_def_regs(1), - [I|defs(Is, Regs, update_regs(L, RegsAtLabel, D))]; -defs([{catch_end,_}=I|Is], _Regs, D) -> - Regs = init_def_regs(1), - [I|defs(Is, Regs, D)]; -defs([{gc_bif,_,{f,Fail},Live,_Src,Dst}=I|Is], Regs0, D) -> - true = all_defined(Live, Regs0), %Assertion. - Regs = def_regs([Dst], init_def_regs(Live)), - [I|defs(Is, Regs, update_regs(Fail, Regs0, D))]; -defs([{get_map_elements,{f,L},_Src,{list,DstList}}=I|Is], Regs0, D) -> - {_,Ds} = beam_utils:split_even(DstList), - Regs = def_regs(Ds, Regs0), - [I|defs(Is, Regs, update_regs(L, Regs0, D))]; -defs([{get_tuple_element,_,_,Dst}=I|Is], Regs0, D) -> - Regs = def_regs([Dst], Regs0), - [I|defs(Is, Regs, D)]; -defs([{jump,{f,L}}=I|Is], Regs, D) -> - [I|defs_unreachable(Is, update_regs(L, Regs, D))]; -defs([{label,L}=I|Is], Regs0, D) -> - case D of - #{L:=Regs1} -> - Regs = Regs0 band Regs1, - [I|defs(Is, Regs, D)]; - #{} -> - [I|defs(Is, Regs0, D)] - end; -defs([{loop_rec,{f,L},{x,0}}=I|Is], _Regs, D0) -> - RegsAtLabel = init_def_regs(0), - D = update_regs(L, RegsAtLabel, D0), - [I|defs(Is, init_def_regs(1), D)]; -defs([{loop_rec_end,_}=I|Is], _Regs, D) -> - [I|defs_unreachable(Is, D)]; -defs([{make_fun2,_,_,_,_}=I|Is], _Regs, D) -> - [I|defs(Is, 1, D)]; -defs([{move,_,Dst}=I|Is], Regs0, D) -> - Regs = def_regs([Dst], Regs0), - [I|defs(Is, Regs, D)]; -defs([{put_map,{f,Fail},_,_,Dst,_,_}=I|Is], Regs0, D) -> - Regs = def_regs([Dst], Regs0), - [I|defs(Is, Regs, update_regs(Fail, Regs0, D))]; -defs([raw_raise=I|Is], _Regs, D) -> - [I|defs(Is, 1, D)]; -defs([return=I|Is], _Regs, D) -> - [I|defs_unreachable(Is, D)]; -defs([{select,_,_Src,Fail,List}=I|Is], Regs, D0) -> - D = update_list([Fail|List], Regs, D0), - [I|defs_unreachable(Is, D)]; -defs([{test,_,{f,L},_}=I|Is], Regs, D) -> - [I|defs(Is, Regs, update_regs(L, Regs, D))]; -defs([{test,_,{f,L},Live,_,Dst}=I|Is], Regs0, D) -> - true = all_defined(Live, Regs0), %Assertion. - Regs = def_regs([Dst], init_def_regs(Live)), - [I|defs(Is, Regs, update_regs(L, Regs0, D))]; -defs([{'try',_,{f,L}}=I|Is], Regs, D) -> - RegsAtLabel = init_def_regs(3), - [I|defs(Is, Regs, update_regs(L, RegsAtLabel, D))]; -defs([{try_case,_}=I|Is], _Regs, D) -> - [I|defs(Is, init_def_regs(3), D)]; -defs([{wait,_}=I|Is], _Regs, D) -> - [I|defs_unreachable(Is, D)]; -defs([{wait_timeout,_,_}=I|Is], _Regs, D) -> - [I|defs(Is, 0, D)]; - -%% Exceptions. -defs([{badmatch,_}=I|Is], _Regs, D) -> - [I|defs_unreachable(Is, D)]; -defs([{case_end,_}=I|Is], _Regs, D) -> - [I|defs_unreachable(Is, D)]; -defs([if_end=I|Is], _Regs, D) -> - [I|defs_unreachable(Is, D)]; -defs([{try_case_end,_}=I|Is], _Regs, D) -> - [I|defs_unreachable(Is, D)]; - -%% Neutral instructions -defs([{bs_context_to_binary,_}=I|Is], Regs, D) -> - [I|defs(Is, Regs, D)]; -defs([{bs_restore2,_,_}=I|Is], Regs, D) -> - [I|defs(Is, Regs, D)]; -defs([{bs_save2,_,_}=I|Is], Regs, D) -> - [I|defs(Is, Regs, D)]; -defs([{deallocate,_}=I|Is], Regs, D) -> - [I|defs(Is, Regs, D)]; -defs([{kill,_}=I|Is], Regs, D) -> - [I|defs(Is, Regs, D)]; -defs([{line,_}=I|Is], Regs, D) -> - [I|defs(Is, Regs, D)]; -defs([{recv_mark,_}=I|Is], Regs, D) -> - [I|defs(Is, Regs, D)]; -defs([{recv_set,_}=I|Is], Regs, D) -> - [I|defs(Is, Regs, D)]; -defs([timeout=I|Is], Regs, D) -> - [I|defs(Is, Regs, D)]; -defs([{trim,_,_}=I|Is], Regs, D) -> - [I|defs(Is, Regs, D)]; -defs([{try_end,_}=I|Is], Regs, D) -> - [I|defs(Is, Regs, D)]; -defs([{'%',_}=I|Is], Regs, D) -> - [I|defs(Is, Regs, D)]; -defs([], _, _) -> []. - -defs_unreachable([{label,L}=I|Is], D) -> - case D of - #{L:=Regs} -> - [I|defs(Is, Regs, D)]; - #{} -> - defs_unreachable(Is, D) - end; -defs_unreachable([_|Is], D) -> - defs_unreachable(Is, D); -defs_unreachable([], _D) -> []. - -defs_list(Is, Regs, D) -> - defs_list(Is, Regs, D, []). - -defs_list([{set,Ds,_,{alloc,Live,Info}}=I|Is], Regs0, D0, Acc) -> - true = all_defined(Live, Regs0), %Assertion. - D = case Info of - {gc_bif,_,{f,Fail}} -> - update_regs(Fail, Regs0, D0); - {put_map,_,{f,Fail}} -> - update_regs(Fail, Regs0, D0); - _ -> - D0 - end, - Regs = def_regs(Ds, init_def_regs(Live)), - defs_list(Is, Regs, D, [I|Acc]); -defs_list([{set,Ds,_,Info}=I|Is], Regs0, D0, Acc) -> - D = case Info of - {bif,_,{f,Fail}} -> - update_regs(Fail, Regs0, D0); - {try_catch,'catch',{f,Fail}} -> - update_regs(Fail, init_def_regs(1), D0); - {try_catch,'try',{f,Fail}} -> - update_regs(Fail, init_def_regs(3), D0); - _ -> - D0 - end, - Regs = def_regs(Ds, Regs0), - defs_list(Is, Regs, D, [I|Acc]); -defs_list([], Regs, D, Acc) -> - {reverse(Acc),Regs,D}. - -init_def_regs(Arity) -> - (1 bsl Arity) - 1. - -def_regs([{x,X}|T], Regs) -> - def_regs(T, Regs bor (1 bsl X)); -def_regs([_|T], Regs) -> - def_regs(T, Regs); -def_regs([], Regs) -> Regs. - -update_list([{f,L}|T], Regs, D0) -> - D = update_regs(L, Regs, D0), - update_list(T, Regs, D); -update_list([_|T], Regs, D) -> - update_list(T, Regs, D); -update_list([], _Regs, D) -> D. - -update_regs(L, Regs0, D) -> - case D of - #{L:=Regs1} -> - Regs = Regs0 band Regs1, - D#{L:=Regs}; - #{} -> - D#{L=>Regs0} - end. - -all_defined(Live, Regs) -> - All = (1 bsl Live) - 1, - Regs band All =:= All. - -%%% -%%% Utilities. -%%% - -%% make_anno(Anno) -> WrappedAnno. -%% Wrap an annotation term. - -make_anno(Anno) -> - {'%anno',Anno}. diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl index e1c1f7338e..5fdea23a26 100644 --- a/lib/compiler/src/compile.erl +++ b/lib/compiler/src/compile.erl @@ -751,16 +751,12 @@ asm_passes() -> [{pass,beam_a}, {iff,da,{listing,"a"}}, {unless,no_postopt, - [{unless,no_reorder,{pass,beam_reorder}}, - {iff,dre,{listing,"reorder"}}, - {pass,beam_block}, + [{pass,beam_block}, {iff,dblk,{listing,"block"}}, {unless,no_except,{pass,beam_except}}, {iff,dexcept,{listing,"except"}}, {unless,no_bs_opt,{pass,beam_bs}}, {iff,dbs,{listing,"bs"}}, - {unless,no_topt,{pass,beam_type}}, - {iff,dtype,{listing,"type"}}, {pass,beam_split}, {iff,dsplit,{listing,"split"}}, {unless,no_dead,{pass,beam_dead}}, @@ -773,12 +769,6 @@ asm_passes() -> {iff,dclean,{listing,"clean"}}, {unless,no_bsm_opt,{pass,beam_bsm}}, {iff,dbsm,{listing,"bsm"}}, - {unless,no_recv_opt,{pass,beam_receive}}, - {iff,drecv,{listing,"recv"}}, - {unless,no_record_opt,{pass,beam_record}}, - {iff,drecord,{listing,"record"}}, - {unless,no_blk2,?pass(block2)}, - {iff,dblk2,{listing,"block2"}}, {unless,no_stack_trimming,{pass,beam_trim}}, {iff,dtrim,{listing,"trim"}}, {pass,beam_flatten}]}, @@ -1354,10 +1344,6 @@ v3_kernel(Code0, #compile{options=Opts,warnings=Ws0}=St) -> {ok,Code,St} end. -block2(Code0, #compile{options=Opts}=St) -> - {ok,Code} = beam_block:module(Code0, [no_blockify|Opts]), - {ok,Code,St}. - test_old_inliner(#compile{options=Opts}) -> %% The point of this test is to avoid loading the old inliner %% if we know that it will not be used. @@ -1974,12 +1960,8 @@ pre_load() -> beam_jump, beam_opcodes, beam_peep, - beam_receive, - beam_record, - beam_reorder, beam_split, beam_trim, - beam_type, beam_utils, beam_validator, beam_z, diff --git a/lib/compiler/src/compiler.app.src b/lib/compiler/src/compiler.app.src index cf32fd251c..9fa5a1c6ea 100644 --- a/lib/compiler/src/compiler.app.src +++ b/lib/compiler/src/compiler.app.src @@ -36,12 +36,8 @@ beam_listing, beam_opcodes, beam_peep, - beam_receive, - beam_reorder, - beam_record, beam_split, beam_trim, - beam_type, beam_utils, beam_validator, beam_z, diff --git a/lib/compiler/test/compile_SUITE.erl b/lib/compiler/test/compile_SUITE.erl index 1ecae06128..8056982d8e 100644 --- a/lib/compiler/test/compile_SUITE.erl +++ b/lib/compiler/test/compile_SUITE.erl @@ -389,7 +389,6 @@ do_file_listings(DataDir, PrivDir, [File|Files]) -> do_listing(Simple, TargetDir, dblk, ".block"), do_listing(Simple, TargetDir, dexcept, ".except"), do_listing(Simple, TargetDir, dbs, ".bs"), - do_listing(Simple, TargetDir, dtype, ".type"), do_listing(Simple, TargetDir, ddead, ".dead"), do_listing(Simple, TargetDir, djmp, ".jump"), do_listing(Simple, TargetDir, dclean, ".clean"), diff --git a/lib/compiler/test/misc_SUITE.erl b/lib/compiler/test/misc_SUITE.erl index a1d931b994..497cbad636 100644 --- a/lib/compiler/test/misc_SUITE.erl +++ b/lib/compiler/test/misc_SUITE.erl @@ -189,14 +189,6 @@ silly_coverage(Config) when is_list(Config) -> {label,2}|non_proper_list]}],99}, expect_error(fun() -> beam_a:module(BeamAInput, []) end), - %% beam_reorder - BlockInput = {?MODULE,[{foo,0}],[], - [{function,foo,0,2, - [{label,1}, - {func_info,{atom,?MODULE},{atom,foo},0}, - {label,2}|non_proper_list]}],99}, - expect_error(fun() -> beam_reorder:module(BlockInput, []) end), - %% beam_block BlockInput = {?MODULE,[{foo,0}],[], [{function,foo,0,2, @@ -209,15 +201,6 @@ silly_coverage(Config) when is_list(Config) -> BsInput = BlockInput, expect_error(fun() -> beam_bs:module(BsInput, []) end), - %% beam_type - TypeInput = {?MODULE,[{foo,0}],[], - [{function,foo,0,2, - [{label,1}, - {line,loc}, - {func_info,{atom,?MODULE},{atom,foo},0}, - {label,2}|non_proper_list]}],99}, - expect_error(fun() -> beam_type:module(TypeInput, []) end), - %% beam_except ExceptInput = {?MODULE,[{foo,0}],[], [{function,foo,0,2, @@ -268,33 +251,6 @@ silly_coverage(Config) when is_list(Config) -> {block,[a|b]}]}],0}, expect_error(fun() -> beam_bsm:module(BsmInput, []) end), - %% beam_receive. - ReceiveInput = {?MODULE,[{foo,0}],[], - [{function,foo,0,2, - [{label,1}, - {func_info,{atom,?MODULE},{atom,foo},0}, - {label,2}, - {call_ext,0,{extfunc,erlang,make_ref,0}}, - {block,[a|b]}]}],0}, - expect_error(fun() -> beam_receive:module(ReceiveInput, []) end), - - %% beam_record. - RecordInput = {?MODULE,[{foo,0}],[], - [{function,foo,1,2, - [{label,1}, - {func_info,{atom,?MODULE},{atom,foo},1}, - {label,2}, - {test,is_tuple,{f,1},[{x,0}]}, - {test,test_arity,{f,1},[{x,0},3]}, - {block,[{set,[{x,1}],[{x,0}],{get_tuple_element,0}}]}, - {test,is_eq_exact,{f,1},[{x,1},{atom,bar}]}, - {block,[{set,[{x,2}],[{x,0}],{get_tuple_element,1}}|a]}, - {test,is_eq_exact,{f,1},[{x,2},{integer,1}]}, - {block,[{set,[{x,0}],[{atom,ok}],move}]}, - return]}],0}, - - expect_error(fun() -> beam_record:module(RecordInput, []) end), - BeamZInput = {?MODULE,[{foo,0}],[], [{function,foo,0,2, [{label,1}, -- cgit v1.2.3 From 62fac82a67f1a8b82c144b3000b6d57a1f815f82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 9 May 2018 06:40:30 +0200 Subject: beam_bs: Remove optimizations that are easier done on SSA format The removal of redundant bs_restore2 instructions is done easier on the SSA format. Keep the rest of the optimizations, because they are easier to do on the BEAM instructions. --- lib/compiler/src/beam_bs.erl | 141 +++++++--------------------------------- lib/compiler/src/beam_clean.erl | 19 ------ 2 files changed, 22 insertions(+), 138 deletions(-) (limited to 'lib/compiler') diff --git a/lib/compiler/src/beam_bs.erl b/lib/compiler/src/beam_bs.erl index 5f1b9ed488..15d8d687fc 100644 --- a/lib/compiler/src/beam_bs.erl +++ b/lib/compiler/src/beam_bs.erl @@ -17,26 +17,24 @@ %% %% %CopyrightEnd% %% -%% Purpose : Partitions assembly instructions into basic blocks and -%% optimizes them. +%% Purpose: Peephole optimization of binary syntax instructions. -module(beam_bs). -export([module/2]). --import(lists, [mapfoldl/3,reverse/1]). +-import(lists, [reverse/1]). -spec module(beam_utils:module_code(), [compile:option()]) -> {'ok',beam_utils:module_code()}. -module({Mod,Exp,Attr,Fs0,Lc0}, _Opt) -> - {Fs,Lc} = mapfoldl(fun function/2, Lc0, Fs0), +module({Mod,Exp,Attr,Fs0,Lc}, _Opt) -> + Fs = [function(F) || F <- Fs0], {ok,{Mod,Exp,Attr,Fs,Lc}}. -function({function,Name,Arity,CLabel,Is0}, Lc0) -> +function({function,Name,Arity,CLabel,Is0}) -> try - Is1 = bs_put_opt(Is0), - {Is,Lc} = bsm_opt(Is1, Lc0), - {{function,Name,Arity,CLabel,Is},Lc} + Is = bs_opt(Is0), + {function,Name,Arity,CLabel,Is} catch Class:Error:Stack -> io:fwrite("Function: ~w/~w\n", [Name,Arity]), @@ -44,16 +42,25 @@ function({function,Name,Arity,CLabel,Is0}, Lc0) -> end. %%% -%%% Evaluation of constant bit fields. +%%% Evaluate construction of constant bit fields. +%%% Combine bs_skip_bits2 and bs_test_tail2 instructions. %%% -bs_put_opt([{bs_put,_,_,_}=I|Is0]) -> +bs_opt([{bs_put,_,_,_}=I|Is0]) -> {BsPuts0,Is} = collect_bs_puts(Is0, [I]), BsPuts = opt_bs_puts(BsPuts0), - BsPuts ++ bs_put_opt(Is); -bs_put_opt([I|Is]) -> - [I|bs_put_opt(Is)]; -bs_put_opt([]) -> []. + BsPuts ++ bs_opt(Is); +bs_opt([{test,bs_skip_bits2,F,[Ctx,{integer,I},Unit,_Flags]}, + {test,bs_test_tail2,F,[Ctx,Bits]}|Is]) -> + [{test,bs_test_tail2,F,[Ctx,Bits+I*Unit]}|bs_opt(Is)]; +bs_opt([{test,bs_skip_bits2,F,[Ctx,{integer,I1},Unit1,Flags]}, + {test,bs_skip_bits2,F,[Ctx,{integer,I2},Unit2,_]}|Is]) -> + I = {test,bs_skip_bits2,F, + [Ctx,{integer,I1*Unit1+I2*Unit2},1,Flags]}, + bs_opt([I|Is]); +bs_opt([I|Is]) -> + [I|bs_opt(Is)]; +bs_opt([]) -> []. collect_bs_puts([{bs_put,_,_,_}=I|Is], Acc) -> collect_bs_puts(Is, [I|Acc]); @@ -174,107 +181,3 @@ bs_split_int_1(N, ByteSz, Sz, Fail, Acc) when Sz > 0 -> [{integer,ByteSz},{integer,N band Mask}]}, bs_split_int_1(N bsr ByteSz, 8, Sz-ByteSz, Fail, [I|Acc]); bs_split_int_1(_, _, _, _, Acc) -> Acc. - -%%% -%%% Optimization of bit syntax matching: get rid -%%% of redundant bs_restore2/2 instructions across select_val -%%% instructions, as well as a few other simple peep-hole -%%% optimizations. -%%% - -bsm_opt(Is0, Lc0) -> - {Is1,D0,Lc} = bsm_scan(Is0, [], Lc0, []), - Is2 = case D0 of - [] -> - %% No bit syntax matching in this function. - Is1; - [_|_] -> - %% Optimize the bit syntax matching. - D = gb_trees:from_orddict(orddict:from_list(D0)), - bsm_reroute(Is1, D, none, []) - end, - Is = beam_clean:bs_clean_saves(Is2), - {bsm_opt_2(Is, []),Lc}. - -bsm_scan([{label,L}=Lbl,{bs_restore2,_,Save}=R|Is], D0, Lc, Acc0) -> - D = [{{L,Save},Lc}|D0], - Acc = [{label,Lc},R,Lbl|Acc0], - bsm_scan(Is, D, Lc+1, Acc); -bsm_scan([I|Is], D, Lc, Acc) -> - bsm_scan(Is, D, Lc, [I|Acc]); -bsm_scan([], D, Lc, Acc) -> - {reverse(Acc),D,Lc}. - -bsm_reroute([{bs_save2,Reg,Save}=I|Is], D, _, Acc) -> - bsm_reroute(Is, D, {Reg,Save}, [I|Acc]); -bsm_reroute([{bs_restore2,Reg,Save}=I|Is], D, _, Acc) -> - bsm_reroute(Is, D, {Reg,Save}, [I|Acc]); -bsm_reroute([{label,_}=I|Is], D, S, Acc) -> - bsm_reroute(Is, D, S, [I|Acc]); -bsm_reroute([{select,select_val,Reg,F0,Lbls0}|Is], D, {_,Save}=S, Acc0) -> - [F|Lbls] = bsm_subst_labels([F0|Lbls0], Save, D), - Acc = [{select,select_val,Reg,F,Lbls}|Acc0], - bsm_reroute(Is, D, S, Acc); -bsm_reroute([{test,TestOp,F0,TestArgs}=I|Is], D, {_,Save}=S, Acc0) -> - F = bsm_subst_label(F0, Save, D), - Acc = [{test,TestOp,F,TestArgs}|Acc0], - case bsm_not_bs_test(I) of - true -> - %% The test instruction will not update the bit offset for - %% the binary being matched. Therefore the save position - %% can be kept. - bsm_reroute(Is, D, S, Acc); - false -> - %% The test instruction might update the bit offset. Kill - %% our remembered Save position. - bsm_reroute(Is, D, none, Acc) - end; -bsm_reroute([{test,TestOp,F0,Live,TestArgs,Dst}|Is], D, {_,Save}, Acc0) -> - F = bsm_subst_label(F0, Save, D), - Acc = [{test,TestOp,F,Live,TestArgs,Dst}|Acc0], - %% The test instruction will update the bit offset. Kill our - %% remembered Save position. - bsm_reroute(Is, D, none, Acc); -bsm_reroute([{block,[{set,[],[],{alloc,_,_}}]}=Bl, - {bs_context_to_binary,_}=I|Is], D, S, Acc) -> - %% To help further bit syntax optimizations. - bsm_reroute([I,Bl|Is], D, S, Acc); -bsm_reroute([I|Is], D, _, Acc) -> - bsm_reroute(Is, D, none, [I|Acc]); -bsm_reroute([], _, _, Acc) -> reverse(Acc). - -bsm_opt_2([{test,bs_test_tail2,F,[Ctx,Bits]}|Is], - [{test,bs_skip_bits2,F,[Ctx,{integer,I},Unit,_Flags]}|Acc]) -> - bsm_opt_2(Is, [{test,bs_test_tail2,F,[Ctx,Bits+I*Unit]}|Acc]); -bsm_opt_2([{test,bs_skip_bits2,F,[Ctx,{integer,I1},Unit1,_]}|Is], - [{test,bs_skip_bits2,F,[Ctx,{integer,I2},Unit2,Flags]}|Acc]) -> - bsm_opt_2(Is, [{test,bs_skip_bits2,F, - [Ctx,{integer,I1*Unit1+I2*Unit2},1,Flags]}|Acc]); -bsm_opt_2([I|Is], Acc) -> - bsm_opt_2(Is, [I|Acc]); -bsm_opt_2([], Acc) -> reverse(Acc). - -%% bsm_not_bs_test({test,Name,_,Operands}) -> true|false. -%% Test whether is the test is a "safe", i.e. does not move the -%% bit offset for a binary. -%% -%% 'true' means that the test is safe, 'false' that we don't know or -%% that the test moves the offset (e.g. bs_get_integer2). - -bsm_not_bs_test({test,bs_test_tail2,_,[_,_]}) -> true; -bsm_not_bs_test(Test) -> beam_utils:is_pure_test(Test). - -bsm_subst_labels(Fs, Save, D) -> - bsm_subst_labels_1(Fs, Save, D, []). - -bsm_subst_labels_1([F|Fs], Save, D, Acc) -> - bsm_subst_labels_1(Fs, Save, D, [bsm_subst_label(F, Save, D)|Acc]); -bsm_subst_labels_1([], _, _, Acc) -> - reverse(Acc). - -bsm_subst_label({f,Lbl0}=F, Save, D) -> - case gb_trees:lookup({Lbl0,Save}, D) of - {value,Lbl} -> {f,Lbl}; - none -> F - end; -bsm_subst_label(Other, _, _) -> Other. diff --git a/lib/compiler/src/beam_clean.erl b/lib/compiler/src/beam_clean.erl index 207f1c4deb..5a9023c259 100644 --- a/lib/compiler/src/beam_clean.erl +++ b/lib/compiler/src/beam_clean.erl @@ -22,7 +22,6 @@ -module(beam_clean). -export([module/2]). --export([bs_clean_saves/1]). -export([clean_labels/1]). -import(lists, [foldl/3,reverse/1]). @@ -41,15 +40,6 @@ module({Mod,Exp,Attr,Fs0,_}, Opts) -> Fs = maybe_remove_lines(Fs3, Opts), {ok,{Mod,Exp,Attr,Fs,Lc}}. -%% Remove all bs_save2/2 instructions not referenced by a bs_restore2/2. - --spec bs_clean_saves([beam_utils:instruction()]) -> - [beam_utils:instruction()]. - -bs_clean_saves(Is) -> - Needed = bs_restores(Is, []), - bs_clean_saves_1(Is, gb_sets:from_list(Needed), []). - %% Determine the rootset, i.e. exported functions and %% the on_load function (if any). @@ -283,15 +273,6 @@ bs_replace([I|Is], Dict, Acc) -> bs_replace(Is, Dict, [I|Acc]); bs_replace([], _, Acc) -> reverse(Acc). -bs_clean_saves_1([{bs_save2,_,{_,_}=SavePoint}=I|Is], Needed, Acc) -> - case gb_sets:is_member(SavePoint, Needed) of - false -> bs_clean_saves_1(Is, Needed, Acc); - true -> bs_clean_saves_1(Is, Needed, [I|Acc]) - end; -bs_clean_saves_1([I|Is], Needed, Acc) -> - bs_clean_saves_1(Is, Needed, [I|Acc]); -bs_clean_saves_1([], _, Acc) -> reverse(Acc). - %%% %%% Remove line instructions if requested. %%% -- cgit v1.2.3 From f107c76b7996c731670e52788801f534fa5326aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 22 May 2018 05:53:30 +0200 Subject: beam_dead: Remove shortcut of binary matching instruction This optimization can be better done in the SSA format before code generation. --- lib/compiler/src/beam_dead.erl | 94 +++++++----------------------------------- 1 file changed, 15 insertions(+), 79 deletions(-) (limited to 'lib/compiler') diff --git a/lib/compiler/src/beam_dead.erl b/lib/compiler/src/beam_dead.erl index eb5652b69a..1004f6048b 100644 --- a/lib/compiler/src/beam_dead.erl +++ b/lib/compiler/src/beam_dead.erl @@ -233,8 +233,7 @@ backward([{label,Lbl}=L|Is], D, Acc) -> backward(Is, beam_utils:index_label(Lbl, Acc, D), [L|Acc]); backward([{select,select_val,Reg,{f,Fail0},List0}|Is], D, Acc) -> List1 = shortcut_select_list(List0, Reg, D, []), - Fail1 = shortcut_label(Fail0, D), - Fail = shortcut_bs_test(Fail1, Is, D), + Fail = shortcut_label(Fail0, D), List = prune_redundant(List1, Fail), case List of [] -> @@ -304,9 +303,8 @@ backward([{test,bs_start_match2,F,Live,[Src,_]=Args,Ctxt}|Is], D, Acc0) -> backward(Is, D, [I|Acc0]) end; backward([{test,Op,{f,To0},Ops0}|Is], D, Acc) -> - To1 = shortcut_bs_test(To0, Is, D), - To2 = shortcut_label(To1, D), - To3 = shortcut_rel_op(To2, Op, Ops0, D), + To1 = shortcut_label(To0, D), + To2 = shortcut_rel_op(To1, Op, Ops0, D), %% Try to shortcut a repeated test: %% @@ -314,14 +312,14 @@ backward([{test,Op,{f,To0},Ops0}|Is], D, Acc) -> %% . . . ==> ... %% Fail1: test Op {f,Fail2} Operands Fail1: test Op {f,Fail2} Operands %% - To = case beam_utils:code_at(To3, D) of - [{test,Op,{f,To4},Ops}|_] -> + To = case beam_utils:code_at(To2, D) of + [{test,Op,{f,To3},Ops}|_] -> case equal_ops(Ops0, Ops) of - true -> To4; - false -> To3 + true -> To3; + false -> To2 end; _Code -> - To3 + To2 end, I = case Op of is_eq_exact -> combine_eqs(To, Ops0, D, Acc); @@ -350,22 +348,22 @@ backward([{test,Op,{f,To0},Ops0}|Is], D, Acc) -> backward([I|Is], D, Acc) end; backward([{test,Op,{f,To0},Live,Ops0,Dst}|Is], D, Acc) -> - To1 = shortcut_bs_test(To0, Is, D), - To2 = shortcut_label(To1, D), + To1 = shortcut_label(To0, D), + %% Try to shortcut a repeated test: %% %% test Op {f,Fail1} _ Ops _ test Op {f,Fail2} _ Ops _ %% . . . ==> ... %% Fail1: test Op {f,Fail2} _ Ops _ Fail1: test Op {f,Fail2} _ Ops _ %% - To = case beam_utils:code_at(To2, D) of - [{test,Op,{f,To3},_,Ops,_}|_] -> + To = case beam_utils:code_at(To1, D) of + [{test,Op,{f,To2},_,Ops,_}|_] -> case equal_ops(Ops0, Ops) of - true -> To3; - false -> To2 + true -> To2; + false -> To1 end; _Code -> - To2 + To1 end, I = {test,Op,{f,To},Live,Ops0,Dst}, backward(Is, D, [I|Acc]); @@ -538,68 +536,6 @@ test_bs_literal_1(Ctxt, Is, D, Literal) -> false -> not_killed end. -%% shortcut_bs_test(TargetLabel, ReversedInstructions, D) -> TargetLabel' -%% Try to shortcut the failure label for bit syntax matching. - -shortcut_bs_test(To, Is, D) -> - shortcut_bs_test_1(beam_utils:code_at(To, D), Is, To, D). - -shortcut_bs_test_1([{bs_restore2,Reg,SavePoint}, - {label,_}, - {test,bs_test_tail2,{f,To},[_,TailBits]}|_], - PrevIs, To0, D) -> - case count_bits_matched(PrevIs, {Reg,SavePoint}, 0) of - Bits when Bits > TailBits -> - %% This instruction will fail. We know because a restore has been - %% done from the previous point SavePoint in the binary, and we - %% also know that the binary contains at least Bits bits from - %% SavePoint. - %% - %% Since we will skip a bs_restore2 if we shortcut to label To, - %% we must now make sure that code at To does not depend on - %% the position in the context in any way. - case shortcut_bs_pos_used(To, Reg, D) of - false -> To; - true -> To0 - end; - _Bits -> - To0 - end; -shortcut_bs_test_1([_|_], _, To, _) -> To. - -%% counts_bits_matched(ReversedInstructions, SavePoint, Bits) -> Bits' -%% Given a reversed instruction stream, determine the minimum number -%% of bits that will be matched by bit syntax instructions up to the -%% given save point. - -count_bits_matched([{test,bs_get_utf8,{f,_},_,_,_}|Is], SavePoint, Bits) -> - count_bits_matched(Is, SavePoint, Bits+8); -count_bits_matched([{test,bs_get_utf16,{f,_},_,_,_}|Is], SavePoint, Bits) -> - count_bits_matched(Is, SavePoint, Bits+16); -count_bits_matched([{test,bs_get_utf32,{f,_},_,_,_}|Is], SavePoint, Bits) -> - count_bits_matched(Is, SavePoint, Bits+32); -count_bits_matched([{test,_,_,_,[_,Sz,U,{field_flags,_}],_}|Is], SavePoint, Bits) -> - case Sz of - {integer,N} -> count_bits_matched(Is, SavePoint, Bits+N*U); - _ -> count_bits_matched(Is, SavePoint, Bits) - end; -count_bits_matched([{test,bs_match_string,_,[_,Bs]}|Is], SavePoint, Bits) -> - count_bits_matched(Is, SavePoint, Bits+bit_size(Bs)); -count_bits_matched([{test,_,_,_}|Is], SavePoint, Bits) -> - count_bits_matched(Is, SavePoint, Bits); -count_bits_matched([{bs_save2,Reg,SavePoint}|_], {Reg,SavePoint}, Bits) -> - %% The save point we are looking for - we are done. - Bits; -count_bits_matched([_|_], _, Bits) -> Bits. - -shortcut_bs_pos_used(To, Reg, D) -> - shortcut_bs_pos_used_1(beam_utils:code_at(To, D), Reg, D). - -shortcut_bs_pos_used_1([{bs_context_to_binary,Reg}|_], Reg, _) -> - false; -shortcut_bs_pos_used_1(Is, Reg, D) -> - not beam_utils:is_killed(Reg, Is, D). - %% shortcut_bs_start_match(TargetLabel, Reg) -> TargetLabel %% A failing bs_start_match2 instruction means that the source (Reg) %% cannot be a binary. That means that it is safe to skip -- cgit v1.2.3 From 6bee2ac7d11668888d93ec4f93730bcae3e5fa79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Thu, 1 Feb 2018 08:33:10 +0100 Subject: Introduce a new SSA-based intermediate format v3_codegen is replaced by three new passes: * beam_kernel_to_ssa which translates the Kernel Erlang format to a new SSA-based intermediate format. * beam_ssa_pre_codegen which prepares the SSA-based format for code generation, including register allocation. Registers are allocated using the linear scan algorithm. * beam_ssa_codegen which generates BEAM assembly code from the SSA-based format. It easier and more effective to optimize the SSA-based format before X and Y registers have been assigned. The current optimization passes constantly have to make sure no "holes" in the X register assignments are created (that is, that no X register becomes undefined that an allocation instruction depends on). This commit also introduces the following optimizations: * Replacing of tuple matching of records with the is_tagged_tuple instruction. (Replacing beam_record.) * Sinking of get_tuple_element instructions to just before the first use of the extracted values. As well as potentially avoiding extracting tuple elements when they are not actually used on all executions paths, this optimization could also reduce the number values that will need to be stored in Y registers. (Similar to beam_reorder, but more effective.) * Live optimizations, removing the definition of a variable that is not subsequently used (provided that the operation has no side effects), as well strength reduction of binary matching by replacing the extraction of value from a binary with a skip instruction. (Used to be done by beam_block, beam_utils, and v3_codegen.) * Removal of redundant bs_restore2 instructions. (Formerly done by beam_bs.) * Type-based optimizations across branches. More effective than the old beam_type pass that only did type-based optimizations in basic blocks. * Optimization of floating point instructions. (Formerly done by beam_type.) * Optimization of receive statements to introduce recv_mark and recv_set instructions. More effective with far fewer restrictions on what instructions are allowed between creating the reference and entering the receive statement. * Common subexpression elimination. (Formerly done by beam_block.) --- lib/compiler/src/Makefile | 22 +- lib/compiler/src/beam_a.erl | 17 +- lib/compiler/src/beam_block.erl | 16 +- lib/compiler/src/beam_clean.erl | 143 +- lib/compiler/src/beam_dead.erl | 20 +- lib/compiler/src/beam_flatten.erl | 4 +- lib/compiler/src/beam_jump.erl | 6 + lib/compiler/src/beam_kernel_to_ssa.erl | 1330 +++++++++++ lib/compiler/src/beam_listing.erl | 7 + lib/compiler/src/beam_peep.erl | 69 +- lib/compiler/src/beam_split.erl | 4 - lib/compiler/src/beam_ssa.erl | 616 +++++ lib/compiler/src/beam_ssa.hrl | 66 + lib/compiler/src/beam_ssa_codegen.erl | 1827 +++++++++++++++ lib/compiler/src/beam_ssa_lint.erl | 349 +++ lib/compiler/src/beam_ssa_opt.erl | 1307 +++++++++++ lib/compiler/src/beam_ssa_pp.erl | 238 ++ lib/compiler/src/beam_ssa_pre_codegen.erl | 2437 ++++++++++++++++++++ lib/compiler/src/beam_ssa_recv.erl | 281 +++ lib/compiler/src/beam_ssa_type.erl | 1106 +++++++++ lib/compiler/src/beam_utils.erl | 3 +- lib/compiler/src/compile.erl | 116 +- lib/compiler/src/compiler.app.src | 10 +- lib/compiler/src/v3_kernel.erl | 11 +- lib/compiler/src/v3_kernel.hrl | 2 +- lib/compiler/test/Makefile | 8 +- lib/compiler/test/beam_ssa_SUITE.erl | 290 +++ lib/compiler/test/beam_type_SUITE.erl | 54 +- lib/compiler/test/bs_match_SUITE.erl | 44 + lib/compiler/test/compilation_SUITE.erl | 6 +- lib/compiler/test/compile_SUITE.erl | 27 +- lib/compiler/test/core_fold_SUITE.erl | 2 +- lib/compiler/test/guard_SUITE.erl | 15 + lib/compiler/test/inline_SUITE.erl | 12 +- lib/compiler/test/misc_SUITE.erl | 53 +- .../test/receive_SUITE_data/ref_opt/no_5.erl | 38 + .../test/receive_SUITE_data/ref_opt/yes_14.S | 71 - .../test/receive_SUITE_data/ref_opt/yes_14.erl | 27 + lib/compiler/test/test_lib.erl | 8 +- 39 files changed, 10342 insertions(+), 320 deletions(-) create mode 100644 lib/compiler/src/beam_kernel_to_ssa.erl create mode 100644 lib/compiler/src/beam_ssa.erl create mode 100644 lib/compiler/src/beam_ssa.hrl create mode 100644 lib/compiler/src/beam_ssa_codegen.erl create mode 100644 lib/compiler/src/beam_ssa_lint.erl create mode 100644 lib/compiler/src/beam_ssa_opt.erl create mode 100644 lib/compiler/src/beam_ssa_pp.erl create mode 100644 lib/compiler/src/beam_ssa_pre_codegen.erl create mode 100644 lib/compiler/src/beam_ssa_recv.erl create mode 100644 lib/compiler/src/beam_ssa_type.erl create mode 100644 lib/compiler/test/beam_ssa_SUITE.erl create mode 100644 lib/compiler/test/receive_SUITE_data/ref_opt/no_5.erl delete mode 100644 lib/compiler/test/receive_SUITE_data/ref_opt/yes_14.S create mode 100644 lib/compiler/test/receive_SUITE_data/ref_opt/yes_14.erl (limited to 'lib/compiler') diff --git a/lib/compiler/src/Makefile b/lib/compiler/src/Makefile index 08042aeba4..f036a2d8dc 100644 --- a/lib/compiler/src/Makefile +++ b/lib/compiler/src/Makefile @@ -62,6 +62,15 @@ MODULES = \ beam_opcodes \ beam_peep \ beam_split \ + beam_ssa \ + beam_ssa_codegen \ + beam_ssa_lint \ + beam_ssa_opt \ + beam_ssa_pp \ + beam_ssa_pre_codegen \ + beam_ssa_recv \ + beam_ssa_type \ + beam_kernel_to_ssa \ beam_trim \ beam_utils \ beam_validator \ @@ -86,7 +95,6 @@ MODULES = \ sys_core_fold_lists \ sys_core_inline \ sys_pre_attributes \ - v3_codegen \ v3_core \ v3_kernel \ v3_kernel_pp @@ -95,6 +103,7 @@ BEAM_H = $(wildcard ../priv/beam_h/*.h) HRL_FILES= \ beam_disasm.hrl \ + beam_ssa.hrl \ core_parse.hrl \ v3_kernel.hrl @@ -181,7 +190,16 @@ release_docs_spec: # ---------------------------------------------------- $(EBIN)/beam_disasm.beam: $(EGEN)/beam_opcodes.hrl beam_disasm.hrl -$(EBIN)/beam_listing.beam: core_parse.hrl v3_kernel.hrl +$(EBIN)/beam_listing.beam: core_parse.hrl v3_kernel.hrl beam_ssa.hrl +$(EBIN)/beam_kernel_to_ssa.beam: v3_kernel.hrl beam_ssa.hrl +$(EBIN)/beam_ssa.beam: beam_ssa.hrl +$(EBIN)/beam_ssa_codegen.beam: beam_ssa.hrl +$(EBIN)/beam_ssa_lint.beam: beam_ssa.hrl +$(EBIN)/beam_ssa_opt.beam: beam_ssa.hrl +$(EBIN)/beam_ssa_pp.beam: beam_ssa.hrl +$(EBIN)/beam_ssa_pre_codegen.beam: beam_ssa.hrl +$(EBIN)/beam_ssa_recv.beam: beam_ssa.hrl +$(EBIN)/beam_ssa_type.beam: beam_ssa.hrl $(EBIN)/cerl.beam: core_parse.hrl $(EBIN)/compile.beam: core_parse.hrl ../../stdlib/include/erl_compile.hrl $(EBIN)/core_lib.beam: core_parse.hrl diff --git a/lib/compiler/src/beam_a.erl b/lib/compiler/src/beam_a.erl index a26f245446..266e8f46c8 100644 --- a/lib/compiler/src/beam_a.erl +++ b/lib/compiler/src/beam_a.erl @@ -39,8 +39,13 @@ function({function,Name,Arity,CLabel,Is0}) -> %% Remove unusued labels for cleanliness and to help %% optimization passes and HiPE. - Is = beam_jump:remove_unused_labels(Is1), - {function,Name,Arity,CLabel,Is} + Is2 = beam_jump:remove_unused_labels(Is1), + + %% Some optimization passes can't handle consecutive labels. + %% Coalesce multiple consecutive labels. + Is = coalesce_consecutive_labels(Is2, [], []), + + {function,Name,Arity,CLabel,Is} catch Class:Error:Stack -> io:fwrite("Function: ~w/~w\n", [Name,Arity]), @@ -122,3 +127,11 @@ rename_instr({select_tuple_arity=I,Reg,Fail,{list,List}}) -> rename_instr(send) -> {call_ext,2,send}; rename_instr(I) -> I. + +coalesce_consecutive_labels([{label,L}=Lbl,{label,Alias}|Is], Replace, Acc) -> + coalesce_consecutive_labels([Lbl|Is], [{Alias,L}|Replace], Acc); +coalesce_consecutive_labels([I|Is], Replace, Acc) -> + coalesce_consecutive_labels(Is, Replace, [I|Acc]); +coalesce_consecutive_labels([], Replace, Acc) -> + D = maps:from_list(Replace), + beam_utils:replace_labels(Acc, [], D, fun(L) -> L end). diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl index 3e28ff2c01..c928fc7187 100644 --- a/lib/compiler/src/beam_block.erl +++ b/lib/compiler/src/beam_block.erl @@ -17,13 +17,12 @@ %% %% %CopyrightEnd% %% -%% Purpose : Partitions assembly instructions into basic blocks and -%% optimizes them. +%% Purpose: Partition BEAM instructions into basic blocks. -module(beam_block). -export([module/2]). --import(lists, [reverse/1]). +-import(lists, [reverse/1,splitwith/2]). -spec module(beam_utils:module_code(), [compile:option()]) -> {'ok',beam_utils:module_code()}. @@ -34,11 +33,8 @@ module({Mod,Exp,Attr,Fs0,Lc}, _Opts) -> function({function,Name,Arity,CLabel,Is0}) -> try - %% Collect basic blocks and optimize them. Is1 = blockify(Is0), Is = embed_lines(Is1), - - %% Done. {function,Name,Arity,CLabel,Is} catch Class:Error:Stack -> @@ -69,12 +65,10 @@ collect_block(Is) -> collect_block(Is, []). collect_block([{allocate,N,R}|Is0], Acc) -> - {Inits,Is} = lists:splitwith(fun ({init,{y,_}}) -> true; - (_) -> false - end, Is0), + {Inits,Is} = splitwith(fun ({init,{y,_}}) -> true; + (_) -> false + end, Is0), collect_block(Is, [{set,[],[],{alloc,R,{nozero,N,0,Inits}}}|Acc]); -collect_block([{allocate_zero,Ns,R},{test_heap,Nh,R}|Is], Acc) -> - collect_block(Is, [{set,[],[],{alloc,R,{zero,Ns,Nh,[]}}}|Acc]); collect_block([I|Is]=Is0, Acc) -> case collect(I) of error -> {reverse(Acc),Is0}; diff --git a/lib/compiler/src/beam_clean.erl b/lib/compiler/src/beam_clean.erl index 5a9023c259..f5f0ac2218 100644 --- a/lib/compiler/src/beam_clean.erl +++ b/lib/compiler/src/beam_clean.erl @@ -23,7 +23,7 @@ -export([module/2]). -export([clean_labels/1]). --import(lists, [foldl/3,reverse/1]). +-import(lists, [foldl/3]). -spec module(beam_utils:module_code(), [compile:option()]) -> {'ok',beam_utils:module_code()}. @@ -36,8 +36,7 @@ module({Mod,Exp,Attr,Fs0,_}, Opts) -> Used = find_all_used(WorkList, All, sets:from_list(WorkList)), Fs1 = remove_unused(Order, Used, All), {Fs2,Lc} = clean_labels(Fs1), - Fs3 = bs_fix(Fs2), - Fs = maybe_remove_lines(Fs3, Opts), + Fs = maybe_remove_lines(Fs2, Opts), {ok,{Mod,Exp,Attr,Fs,Lc}}. %% Determine the rootset, i.e. exported functions and @@ -87,12 +86,8 @@ add_to_work_list(F, {Fs,Used}=Sets) -> %%% %%% Coalesce adjacent labels. Renumber all labels to eliminate gaps. -%%% This cleanup will slightly reduce file size and slightly speed up loading. -%%% -%%% We also expand is_record/3 to a sequence of instructions. It is done -%%% here merely because this module will always be called even if optimization -%%% is turned off. We don't want to do the expansion in beam_asm because we -%%% want to see the expanded code in a .S file. +%%% This cleanup will slightly reduce file size and slightly speed up +%%% loading. %%% -type label() :: beam_asm:label(). @@ -117,45 +112,6 @@ function_renumber([{function,Name,Arity,_Entry,Asm0}|Fs], St0, Acc) -> function_renumber(Fs, St, [{function,Name,Arity,St#st.entry,Asm}|Acc]); function_renumber([], St, Acc) -> {Acc,St}. -renumber_labels([{bif,is_record,{f,_}, - [Term,{atom,Tag}=TagAtom,{integer,Arity}],Dst}|Is0], Acc, St) -> - ContLabel = 900000000+2*St#st.lc, - FailLabel = ContLabel+1, - Fail = {f,FailLabel}, - Tmp = Dst, - Is = case is_record_tuple(Term, Tag, Arity) of - yes -> - [{move,{atom,true},Dst}|Is0]; - no -> - [{move,{atom,false},Dst}|Is0]; - maybe -> - [{test,is_tuple,Fail,[Term]}, - {test,test_arity,Fail,[Term,Arity]}, - {get_tuple_element,Term,0,Tmp}, - {test,is_eq_exact,Fail,[Tmp,TagAtom]}, - {move,{atom,true},Dst}, - {jump,{f,ContLabel}}, - {label,FailLabel}, - {move,{atom,false},Dst}, - {jump,{f,ContLabel}}, %Improves optimization by beam_dead. - {label,ContLabel}|Is0] - end, - renumber_labels(Is, Acc, St); -renumber_labels([{test,is_record,{f,_}=Fail, - [Term,{atom,Tag}=TagAtom,{integer,Arity}]}|Is0], Acc, St) -> - Tmp = {x,1022}, - Is = case is_record_tuple(Term, Tag, Arity) of - yes -> - Is0; - no -> - [{jump,Fail}|Is0]; - maybe -> - [{test,is_tuple,Fail,[Term]}, - {test,test_arity,Fail,[Term,Arity]}, - {get_tuple_element,Term,0,Tmp}, - {test,is_eq_exact,Fail,[Tmp,TagAtom]}|Is0] - end, - renumber_labels(Is, Acc, St); renumber_labels([{label,Old}|Is], [{label,New}|_]=Acc, #st{lmap=D0}=St) -> D = [{Old,New}|D0], renumber_labels(Is, Acc, St#st{lmap=D}); @@ -169,12 +125,6 @@ renumber_labels([I|Is], Acc, St0) -> renumber_labels(Is, [I|Acc], St0); renumber_labels([], Acc, St) -> {Acc,St}. -is_record_tuple({x,_}, _, _) -> maybe; -is_record_tuple({y,_}, _, _) -> maybe; -is_record_tuple({literal,Tuple}, Tag, Arity) - when element(1, Tuple) =:= Tag, tuple_size(Tuple) =:= Arity -> yes; -is_record_tuple(_, _, _) -> no. - function_replace([{function,Name,Arity,Entry,Asm0}|Fs], Dict, Acc) -> Asm = try Fb = fun(Old) -> throw({error,{undefined_label,Old}}) end, @@ -188,91 +138,6 @@ function_replace([{function,Name,Arity,Entry,Asm0}|Fs], Dict, Acc) -> function_replace(Fs, Dict, [{function,Name,Arity,Entry,Asm}|Acc]); function_replace([], _, Acc) -> Acc. -%%% -%%% Final fixup of bs_start_match2/5,bs_save2/bs_restore2 instructions for -%%% new bit syntax matching (introduced in R11B). -%%% -%%% Pass 1: Scan the code, looking for bs_restore2/2 instructions. -%%% -%%% Pass 2: Update bs_save2/2 and bs_restore/2 instructions. Remove -%%% any bs_save2/2 instruction whose save position are never referenced -%%% by any bs_restore2/2 instruction. -%%% -%%% Note this module can be invoked several times, so we must be careful -%%% not to touch instructions that have already been fixed up. -%%% - -bs_fix(Fs) -> - bs_fix(Fs, []). - -bs_fix([{function,Name,Arity,Entry,Asm0}|Fs], Acc) -> - Asm = bs_function(Asm0), - bs_fix(Fs, [{function,Name,Arity,Entry,Asm}|Acc]); -bs_fix([], Acc) -> reverse(Acc). - -bs_function(Is) -> - Dict0 = bs_restores(Is, []), - S0 = sofs:relation(Dict0, [{context,save_point}]), - S1 = sofs:relation_to_family(S0), - S = sofs:to_external(S1), - Dict = make_save_point_dict(S, []), - bs_replace(Is, Dict, []). - -make_save_point_dict([{Ctx,Pts}|T], Acc0) -> - Acc = make_save_point_dict_1(Pts, Ctx, 0, Acc0), - make_save_point_dict(T, Acc); -make_save_point_dict([], Acc) -> - gb_trees:from_orddict(ordsets:from_list(Acc)). - -make_save_point_dict_1([H|T], Ctx, I, Acc) -> - make_save_point_dict_1(T, Ctx, I+1, [{{Ctx,H},I}|Acc]); -make_save_point_dict_1([], Ctx, I, Acc) -> - [{Ctx,I}|Acc]. - -%% Pass 1. -bs_restores([{bs_restore2,_,{Same,Same}}|Is], Dict) -> - %% This save point is special. No explicit save is needed. - bs_restores(Is, Dict); -bs_restores([{bs_restore2,_,{atom,start}}|Is], Dict) -> - %% This instruction can occur if "compilation" - %% started from a .S file. - bs_restores(Is, Dict); -bs_restores([{bs_restore2,_,{_,_}=SavePoint}|Is], Dict) -> - bs_restores(Is, [SavePoint|Dict]); -bs_restores([_|Is], Dict) -> - bs_restores(Is, Dict); -bs_restores([], Dict) -> Dict. - -%% Pass 2. -bs_replace([{test,bs_start_match2,F,Live,[Src,{context,Ctx}],CtxR}|T], Dict, Acc) -> - Slots = case gb_trees:lookup(Ctx, Dict) of - {value,Slots0} -> Slots0; - none -> 0 - end, - I = {test,bs_start_match2,F,Live,[Src,Slots],CtxR}, - bs_replace(T, Dict, [I|Acc]); -bs_replace([{bs_save2,CtxR,{_,_}=SavePoint}|T], Dict, Acc) -> - case gb_trees:lookup(SavePoint, Dict) of - {value,N} -> - bs_replace(T, Dict, [{bs_save2,CtxR,N}|Acc]); - none -> - bs_replace(T, Dict, Acc) - end; -bs_replace([{bs_restore2,_,{atom,start}}=I|T], Dict, Acc) -> - %% This instruction can occur if "compilation" - %% started from a .S file. - bs_replace(T, Dict, [I|Acc]); -bs_replace([{bs_restore2,CtxR,{Same,Same}}|T], Dict, Acc) -> - %% This save point refers to the point in the binary where the match - %% started. It has a special name. - bs_replace(T, Dict, [{bs_restore2,CtxR,{atom,start}}|Acc]); -bs_replace([{bs_restore2,CtxR,{_,_}=SavePoint}|T], Dict, Acc) -> - N = gb_trees:get(SavePoint, Dict), - bs_replace(T, Dict, [{bs_restore2,CtxR,N}|Acc]); -bs_replace([I|Is], Dict, Acc) -> - bs_replace(Is, Dict, [I|Acc]); -bs_replace([], _, Acc) -> reverse(Acc). - %%% %%% Remove line instructions if requested. %%% diff --git a/lib/compiler/src/beam_dead.erl b/lib/compiler/src/beam_dead.erl index 1004f6048b..546f0461b9 100644 --- a/lib/compiler/src/beam_dead.erl +++ b/lib/compiler/src/beam_dead.erl @@ -325,25 +325,11 @@ backward([{test,Op,{f,To0},Ops0}|Is], D, Acc) -> is_eq_exact -> combine_eqs(To, Ops0, D, Acc); _ -> {test,Op,{f,To},Ops0} end, - case {I,Acc} of - {{test,is_atom,Fail,Ops0},[{test,is_boolean,Fail,Ops0}|_]} -> - %% An is_atom test before an is_boolean test (with the - %% same failure label) is redundant. - backward(Is, D, Acc); - {{test,is_atom,Fail,[R]}, - [{test,is_eq_exact,Fail,[R,{atom,_}]}|_]} -> - %% An is_atom test before a comparison with an atom (with - %% the same failure label) is redundant. - backward(Is, D, Acc); - {{test,is_integer,Fail,[R]}, - [{test,is_eq_exact,Fail,[R,{integer,_}]}|_]} -> - %% An is_integer test before a comparison with an integer - %% (with the same failure label) is redundant. - backward(Is, D, Acc); - {{test,_,_,_},_} -> + case I of + {test,_,_,_} -> %% Still a test instruction. Done. backward(Is, D, [I|Acc]); - {_,_} -> + _ -> %% Rewritten to a select_val. Rescan. backward([I|Is], D, Acc) end; diff --git a/lib/compiler/src/beam_flatten.erl b/lib/compiler/src/beam_flatten.erl index 20bd23a912..9a1c5a1021 100644 --- a/lib/compiler/src/beam_flatten.erl +++ b/lib/compiler/src/beam_flatten.erl @@ -43,13 +43,15 @@ block([{block,Is0}|Is1], Acc) -> block(Is1, norm_block(Is0, Acc)); block([I|Is], Acc) -> block(Is, [I|Acc]); block([], Acc) -> reverse(Acc). -norm_block([{set,[],[],{alloc,R,Alloc}}|Is], Acc0) -> +norm_block([{set,[],[],{alloc,R,{_,nostack,_,_}=Alloc}}|Is], Acc0) -> case insert_alloc_in_bs_init(Acc0, Alloc) of impossible -> norm_block(Is, reverse(norm_allocate(Alloc, R), Acc0)); Acc -> norm_block(Is, Acc) end; +norm_block([{set,[],[],{alloc,R,Alloc}}|Is], Acc0) -> + norm_block(Is, reverse(norm_allocate(Alloc, R), Acc0)); norm_block([{set,[D1],[S],get_hd},{set,[D2],[S],get_tl}|Is], Acc) -> I = {get_list,S,D1,D2}, norm_block(Is, [I|Acc]); diff --git a/lib/compiler/src/beam_jump.erl b/lib/compiler/src/beam_jump.erl index 43084ad588..42b4cdaf4f 100644 --- a/lib/compiler/src/beam_jump.erl +++ b/lib/compiler/src/beam_jump.erl @@ -610,6 +610,12 @@ ulbl({put_map,Lbl,_Op,_Src,_Dst,_Live,_List}, Used) -> mark_used(Lbl, Used); ulbl({get_map_elements,Lbl,_Src,_List}, Used) -> mark_used(Lbl, Used); +ulbl({recv_mark,Lbl}, Used) -> + mark_used(Lbl, Used); +ulbl({recv_set,Lbl}, Used) -> + mark_used(Lbl, Used); +ulbl({fcheckerror,Lbl}, Used) -> + mark_used(Lbl, Used); ulbl(_, Used) -> Used. mark_used({f,0}, Used) -> Used; diff --git a/lib/compiler/src/beam_kernel_to_ssa.erl b/lib/compiler/src/beam_kernel_to_ssa.erl new file mode 100644 index 0000000000..c55a57ab32 --- /dev/null +++ b/lib/compiler/src/beam_kernel_to_ssa.erl @@ -0,0 +1,1330 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2018. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +%% Purpose: Convert the Kernel Erlang format to the SSA format. + +-module(beam_kernel_to_ssa). + +%% The main interface. +-export([module/2]). + +-import(lists, [append/1,duplicate/2,flatmap/2,foldl/3, + keysort/2,mapfoldl/3,map/2,member/2, + reverse/1,reverse/2,sort/1]). + +-include("v3_kernel.hrl"). +-include("beam_ssa.hrl"). + +-type label() :: beam_ssa:label(). + +%% Main codegen structure. +-record(cg, {lcount=1 :: label(), %Label counter + bfail=1 :: label(), + catch_label=none :: 'none' | label(), + vars=#{} :: map(), %Defined variables. + break=0 :: label(), %Break label + recv=0 :: label(), %Receive label + ultimate_failure=0 :: label() %Label for ultimate match failure. + }). + +%% Internal records. +-record(cg_break, {args :: [beam_ssa:value()], + phi :: label() + }). +-record(cg_phi, {vars :: [beam_ssa:b_var()] + }). +-record(cg_unreachable, {}). + +-spec module(#k_mdef{}, [compile:option()]) -> {'ok',#b_module{}}. + +module(#k_mdef{name=Mod,exports=Es,attributes=Attr,body=Forms}, _Opts) -> + Body = functions(Forms, Mod), + Module = #b_module{name=Mod,exports=Es,attributes=Attr,body=Body}, + {ok,Module}. + +functions(Forms, Mod) -> + [function(F, Mod) || F <- Forms]. + +function(#k_fdef{anno=Anno0,func=Name,arity=Arity, + vars=As0,body=Kb}, Mod) -> + try + #k_match{} = Kb, %Assertion. + + %% Generate the SSA form immediate format. + St0 = #cg{}, + {As,St1} = new_ssa_vars(As0, St0), + {Asm,St} = cg_fun(Kb, St1), + Anno1 = line_anno(Anno0), + Anno = Anno1#{func_info=>{Mod,Name,Arity}}, + #b_function{anno=Anno,args=As,bs=Asm,cnt=St#cg.lcount} + catch + Class:Error:Stack -> + io:fwrite("Function: ~w/~w\n", [Name,Arity]), + erlang:raise(Class, Error, Stack) + end. + +%% cg_fun([Lkexpr], [HeadVar], State) -> {[Ainstr],State} + +cg_fun(Ke, St0) -> + {UltimateFail,FailIs,St1} = make_failure(badarg, St0), + St2 = St1#cg{bfail=UltimateFail,ultimate_failure=UltimateFail}, + {B,St} = cg(Ke, St2), + Asm = [{label,0}|B++FailIs], + finalize(Asm, St). + +make_failure(Reason, St0) -> + {Lbl,St1} = new_label(St0), + {Dst,St} = new_ssa_var('@ssa_ret', St1), + Is = [{label,Lbl}, + #b_set{op=call,dst=Dst, + args=[#b_remote{mod=#b_literal{val=erlang}, + name=#b_literal{val=error}, + arity=1}, + #b_literal{val=Reason}]}, + #b_ret{arg=Dst}], + {Lbl,Is,St}. + +%% cg(Lkexpr, State) -> {[Ainstr],State}. +%% Generate code for a kexpr. + +cg(#k_match{body=M,ret=Rs}, St) -> + do_match_cg(M, Rs, St); +cg(#k_guard_match{body=M,ret=Rs}, St) -> + do_match_cg(M, Rs, St); +cg(#k_seq{arg=Arg,body=Body}, St0) -> + {ArgIs,St1} = cg(Arg, St0), + {BodyIs,St} = cg(Body, St1), + {ArgIs++BodyIs,St}; +cg(#k_call{anno=Le,op=Func,args=As,ret=Rs}, St) -> + call_cg(Func, As, Rs, Le, St); +cg(#k_enter{anno=Le,op=Func,args=As}, St) -> + enter_cg(Func, As, Le, St); +cg(#k_bif{anno=Le}=Bif, St) -> + bif_cg(Bif, Le, St); +cg(#k_try{arg=Ta,vars=Vs,body=Tb,evars=Evs,handler=Th,ret=Rs}, St) -> + try_cg(Ta, Vs, Tb, Evs, Th, Rs, St); +cg(#k_try_enter{arg=Ta,vars=Vs,body=Tb,evars=Evs,handler=Th}, St) -> + try_enter_cg(Ta, Vs, Tb, Evs, Th, St); +cg(#k_catch{body=Cb,ret=[R]}, St) -> + do_catch_cg(Cb, R, St); +cg(#k_receive{anno=Le,timeout=Te,var=Rvar,body=Rm,action=Tes,ret=Rs}, St) -> + recv_loop_cg(Te, Rvar, Rm, Tes, Rs, Le, St); +cg(#k_receive_next{}, #cg{recv=Recv}=St) -> + Is = [#b_set{op=recv_next},make_uncond_branch(Recv)], + {Is,St}; +cg(#k_receive_accept{}, St) -> + Remove = #b_set{op=remove_message}, + {[Remove],St}; +cg(#k_put{anno=Le,arg=Con,ret=Var}, St) -> + put_cg(Var, Con, Le, St); +cg(#k_return{args=[Ret0]}, St) -> + Ret = ssa_arg(Ret0, St), + {[#b_ret{arg=Ret}],St}; +cg(#k_break{args=Bs}, #cg{break=Br}=St) -> + Args = ssa_args(Bs, St), + {[#cg_break{args=Args,phi=Br}],St}; +cg(#k_guard_break{args=Bs}, St) -> + cg(#k_break{args=Bs}, St). + +%% match_cg(Matc, [Ret], State) -> {[Ainstr],State}. +%% Generate code for a match. + +do_match_cg(M, Rs, St0) -> + {B,St1} = new_label(St0), + {Mis,St2} = match_cg(M, St1#cg.bfail, St1#cg{break=B}), + {BreakVars,St} = new_ssa_vars(Rs, St2), + {Mis ++ [{label,B},#cg_phi{vars=BreakVars}], + St#cg{bfail=St0#cg.bfail,break=St1#cg.break}}. + +%% match_cg(Match, Fail, State) -> {[Ainstr],State}. +%% Generate code for a match tree. + +match_cg(#k_alt{first=F,then=S}, Fail, St0) -> + {Tf,St1} = new_label(St0), + {Fis,St2} = match_cg(F, Tf, St1), + {Sis,St3} = match_cg(S, Fail, St2), + {Fis ++ [{label,Tf}] ++ Sis,St3}; +match_cg(#k_select{var=#k_var{}=V,types=Scs}, Fail, St) -> + match_fmf(fun (S, F, Sta) -> + select_cg(S, V, F, Fail, Sta) + end, Fail, St, Scs); +match_cg(#k_guard{clauses=Gcs}, Fail, St) -> + match_fmf(fun (G, F, Sta) -> + guard_clause_cg(G, F, Sta) + end, Fail, St, Gcs); +match_cg(Ke, _Fail, St0) -> + cg(Ke, St0). + +%% select_cg(Sclause, V, TypeFail, ValueFail, State) -> {Is,State}. +%% Selecting type and value needs two failure labels, TypeFail is the +%% label to jump to of the next type test when this type fails, and +%% ValueFail is the label when this type is correct but the value is +%% wrong. These are different as in the second case there is no need +%% to try the next type, it will always fail. + +select_cg(#k_type_clause{type=k_binary,values=[S]}, Var, Tf, Vf, St) -> + select_binary(S, Var, Tf, Vf, St); +select_cg(#k_type_clause{type=k_bin_seg,values=Vs}, Var, Tf, _Vf, St) -> + select_bin_segs(Vs, Var, Tf, St); +select_cg(#k_type_clause{type=k_bin_int,values=Vs}, Var, Tf, _Vf, St) -> + select_bin_segs(Vs, Var, Tf, St); +select_cg(#k_type_clause{type=k_bin_end,values=[S]}, Var, Tf, _Vf, St) -> + select_bin_end(S, Var, Tf, St); +select_cg(#k_type_clause{type=k_map,values=Vs}, Var, Tf, Vf, St) -> + select_map(Vs, Var, Tf, Vf, St); +select_cg(#k_type_clause{type=k_cons,values=[S]}, Var, Tf, Vf, St) -> + select_cons(S, Var, Tf, Vf, St); +select_cg(#k_type_clause{type=k_nil,values=[S]}, Var, Tf, Vf, St) -> + select_nil(S, Var, Tf, Vf, St); +select_cg(#k_type_clause{type=k_literal,values=Vs}, Var, Tf, Vf, St) -> + select_literal(Vs, Var, Tf, Vf, St); +select_cg(#k_type_clause{type=Type,values=Scs}, Var, Tf, Vf, St0) -> + {Vis,St1} = + mapfoldl(fun (S, Sta) -> + {Val,Is,Stb} = select_val(S, Var, Vf, Sta), + {{Is,[Val]},Stb} + end, St0, Scs), + OptVls = combine(lists:sort(combine(Vis))), + {Vls,Sis,St2} = select_labels(OptVls, St1, [], []), + Arg = ssa_arg(Var, St2), + {Is,St} = select_val_cg(Type, Arg, Vls, Tf, Vf, Sis, St2), + {Is,St}. + +select_val_cg(k_tuple, Tuple, Vls, Tf, Vf, Sis, St0) -> + {Is0,St1} = make_cond_branch({bif,is_tuple}, [Tuple], Tf, St0), + {Arity,St2} = new_ssa_var('@ssa_arity', St1), + GetArity = #b_set{op={bif,tuple_size},dst=Arity,args=[Tuple]}, + {Is,St} = select_val_cg(k_int, Arity, Vls, Vf, Vf, Sis, St2), + {Is0++[GetArity]++Is,St}; +select_val_cg(Type, R, Vls, Tf, Vf, Sis, St0) -> + {TypeIs,St1} = if + Tf =:= Vf -> + %% The type and value failure labels are the same; + %% we don't need a type test. + {[],St0}; + true -> + %% Different labels for type failure and + %% label failure; we need a type test. + Test = select_type_test(Type), + make_cond_branch(Test, [R], Tf, St0) + end, + case Vls of + [{Val,Succ}] -> + {Is,St} = make_cond({bif,'=:='}, [R,Val], Vf, Succ, St1), + {TypeIs++Is++Sis,St}; + [_|_] -> + {TypeIs++[#b_switch{arg=R,fail=Vf,list=Vls}|Sis],St1} + end. + +select_type_test(k_int) -> {bif,is_integer}; +select_type_test(k_atom) -> {bif,is_atom}; +select_type_test(k_float) -> {bif,is_float}. + +combine([{Is,Vs1},{Is,Vs2}|Vis]) -> combine([{Is,Vs1 ++ Vs2}|Vis]); +combine([V|Vis]) -> [V|combine(Vis)]; +combine([]) -> []. + +select_labels([{Is,Vs}|Vis], St0, Vls, Sis) -> + {Lbl,St1} = new_label(St0), + select_labels(Vis, St1, add_vls(Vs, Lbl, Vls), [[{label,Lbl}|Is]|Sis]); +select_labels([], St, Vls, Sis) -> + {Vls,append(Sis),St}. + +add_vls([V|Vs], Lbl, Acc) -> + add_vls(Vs, Lbl, [{#b_literal{val=V},Lbl}|Acc]); +add_vls([], _, Acc) -> Acc. + +select_literal(S, V, Tf, Vf, St) -> + Src = ssa_arg(V, St), + F = fun(ValClause, Fail, St0) -> + {Val,ValIs,St1} = select_val(ValClause, V, Vf, St0), + Args = [Src,#b_literal{val=Val}], + {Is,St2} = make_cond_branch({bif,'=:='}, Args, Fail, St1), + {Is++ValIs,St2} + end, + match_fmf(F, Tf, St, S). + +select_cons(#k_val_clause{val=#k_cons{hd=Hd,tl=Tl},body=B}, + V, Tf, Vf, St0) -> + Es = [Hd,Tl], + {Eis,St1} = select_extract_cons(V, Es, St0), + {Bis,St2} = match_cg(B, Vf, St1), + Src = ssa_arg(V, St2), + {Is,St} = make_cond_branch(is_nonempty_list, [Src], Tf, St2), + {Is ++ Eis ++ Bis,St}. + +select_nil(#k_val_clause{val=#k_nil{},body=B}, V, Tf, Vf, St0) -> + {Bis,St1} = match_cg(B, Vf, St0), + Src = ssa_arg(V, St1), + {Is,St} = make_cond_branch({bif,'=:='}, [Src,#b_literal{val=[]}], Tf, St1), + {Is ++ Bis,St}. + +select_binary(#k_val_clause{val=#k_binary{segs=#k_var{name=Ctx0}},body=B}, + #k_var{anno=Anno0}=Src, Tf, Vf, St0) -> + Anno = #{reuse_for_context=>member(reuse_for_context, Anno0)}, + {Ctx,St1} = new_ssa_var(Ctx0, St0), + {Bis0,St2} = match_cg(B, Vf, St1), + {TestIs,St} = make_cond_branch(succeeded, [Ctx], Tf, St2), + Bis1 = [#b_set{anno=Anno,op=bs_start_match,dst=Ctx, + args=[ssa_arg(Src, St)]}] ++ TestIs ++ Bis0, + Bis = finish_bs_matching(Bis1), + {Bis,St}. + +finish_bs_matching([#b_set{op=bs_match, + args=[#b_literal{val=string},Ctx,#b_literal{val=BinList}]}=Set|Is]) + when is_list(BinList) -> + I = Set#b_set{args=[#b_literal{val=string},Ctx, + #b_literal{val=list_to_bitstring(BinList)}]}, + finish_bs_matching([I|Is]); +finish_bs_matching([I|Is]) -> + [I|finish_bs_matching(Is)]; +finish_bs_matching([]) -> []. + +make_cond(Cond, Args, Fail, Succ, St0) -> + {Bool,St} = new_ssa_var('@ssa_bool', St0), + Bif = #b_set{op=Cond,dst=Bool,args=Args}, + Br = #b_br{bool=Bool,succ=Succ,fail=Fail}, + {[Bif,Br],St}. + +make_cond_branch(Cond, Args, Fail, St0) -> + {Bool,St1} = new_ssa_var('@ssa_bool', St0), + {Succ,St} = new_label(St1), + Bif = #b_set{op=Cond,dst=Bool,args=Args}, + Br = #b_br{bool=Bool,succ=Succ,fail=Fail}, + {[Bif,Br,{label,Succ}],St}. + +make_uncond_branch(Fail) -> + #b_br{bool=#b_literal{val=true},succ=Fail,fail=Fail}. + +%% Instructions for selection of binary segments. + +select_bin_segs(Scs, Ivar, Tf, St) -> + match_fmf(fun(S, Fail, Sta) -> + select_bin_seg(S, Ivar, Fail, Sta) + end, Tf, St, Scs). + +select_bin_seg(#k_val_clause{val=#k_bin_seg{size=Size,unit=U,type=T, + seg=Seg,flags=Fs,next=Next}, + body=B,anno=Anno}, + #k_var{}=Src, Fail, St0) -> + LineAnno = line_anno(Anno), + Ctx = get_context(Src, St0), + {Mis,St1} = select_extract_bin(Next, Size, U, T, Fs, Fail, + Ctx, LineAnno, St0), + {Extracted,St2} = new_ssa_var(Seg#k_var.name, St1), + {Bis,St} = bin_match_cg(Size, B, Fail, St2), + BsGet = #b_set{op=bs_extract,dst=Extracted,args=[ssa_arg(Next, St)]}, + Is = Mis ++ [BsGet] ++ Bis, + {Is,St}; +select_bin_seg(#k_val_clause{val=#k_bin_int{size=Sz,unit=U,flags=Fs, + val=Val,next=Next}, + body=B}, + #k_var{}=Src, Fail, St0) -> + Ctx = get_context(Src, St0), + {Mis,St1} = select_extract_int(Next, Val, Sz, U, Fs, Fail, + Ctx, St0), + {Bis,St} = match_cg(B, Fail, St1), + Is = case Mis ++ Bis of + [#b_set{op=bs_match,args=[#b_literal{val=string},OtherCtx1,Bin1]}, + #b_set{op=succeeded,dst=Bool1}, + #b_br{bool=Bool1,succ=Succ,fail=Fail}, + {label,Succ}, + #b_set{op=bs_match,dst=Dst,args=[#b_literal{val=string},_OtherCtx2,Bin2]}| + [#b_set{op=succeeded,dst=Bool2}, + #b_br{bool=Bool2,fail=Fail}|_]=Is0] -> + %% We used to do this optimization later, but it + %% turns out that in huge functions with many + %% string matching instructions, it's a huge win + %% to do the combination now. To avoid copying the + %% binary data again and again, we'll combine bitstrings + %% in a list and convert all of it to a bitstring later. + {#b_literal{val=B1},#b_literal{val=B2}} = {Bin1,Bin2}, + Bin = #b_literal{val=[B1,B2]}, + Set = #b_set{op=bs_match,dst=Dst,args=[#b_literal{val=string},OtherCtx1,Bin]}, + [Set|Is0]; + Is0 -> + Is0 + end, + {Is,St}. + +bin_match_cg(#k_atom{val=all}, B0, Fail, St) -> + #k_select{types=Types} = B0, + [#k_type_clause{type=k_bin_end,values=Values}] = Types, + [#k_val_clause{val=#k_bin_end{},body=B}] = Values, + match_cg(B, Fail, St); +bin_match_cg(_, B, Fail, St) -> + match_cg(B, Fail, St). + +get_context(#k_var{}=Var, St) -> + ssa_arg(Var, St). + +select_bin_end(#k_val_clause{val=#k_bin_end{},body=B}, Src, Tf, St0) -> + Ctx = get_context(Src, St0), + {Bis,St1} = match_cg(B, Tf, St0), + {TestIs,St} = make_cond_branch(bs_test_tail, [Ctx,#b_literal{val=0}], Tf, St1), + Is = TestIs++Bis, + {Is,St}. + +select_extract_bin(#k_var{name=Hd}, Size0, Unit, Type, Flags, Vf, + Ctx, Anno, St0) -> + {Dst,St1} = new_ssa_var(Hd, St0), + Size = ssa_arg(Size0, St0), + build_bs_instr(Anno, Type, Vf, Ctx, Size, Unit, Flags, Dst, St1). + +select_extract_int(#k_var{name=Tl}, 0, #k_int{val=0}, _U, _Fs, _Vf, + Ctx, St0) -> + St = set_ssa_var(Tl, Ctx, St0), + {[],St}; +select_extract_int(#k_var{name=Tl}, Val, #k_int{val=Sz}, U, Fs, Vf, + Ctx, St0) -> + {Dst,St1} = new_ssa_var(Tl, St0), + Bits = U*Sz, + Bin = case member(big, Fs) of + true -> + <>; + false -> + true = member(little, Fs), %Assertion. + <> + end, + Bits = bit_size(Bin), %Assertion. + {TestIs,St} = make_cond_branch(succeeded, [Dst], Vf, St1), + Set = #b_set{op=bs_match,dst=Dst, + args=[#b_literal{val=string},Ctx,#b_literal{val=Bin}]}, + {[Set|TestIs],St}. + +build_bs_instr(Anno, Type, Fail, Ctx, Size, Unit0, Flags0, Dst, St0) -> + Unit = #b_literal{val=Unit0}, + Flags = #b_literal{val=Flags0}, + NeedSize = bs_need_size(Type), + TypeArg = #b_literal{val=Type}, + Get = case NeedSize of + true -> + #b_set{anno=Anno,op=bs_match,dst=Dst, + args=[TypeArg,Ctx,Flags,Size,Unit]}; + false -> + #b_set{anno=Anno,op=bs_match,dst=Dst, + args=[TypeArg,Ctx,Flags]} + end, + {Is,St} = make_cond_branch(succeeded, [Dst], Fail, St0), + {[Get|Is],St}. + +select_val(#k_val_clause{val=#k_tuple{es=Es},body=B}, V, Vf, St0) -> + #k{us=Used} = k_get_anno(B), + {Eis,St1} = select_extract_tuple(V, Es, Used, St0), + {Bis,St2} = match_cg(B, Vf, St1), + {length(Es),Eis ++ Bis,St2}; +select_val(#k_val_clause{val=Val0,body=B}, _V, Vf, St0) -> + Val = case Val0 of + #k_atom{val=Lit} -> Lit; + #k_float{val=Lit} -> Lit; + #k_int{val=Lit} -> Lit; + #k_literal{val=Lit} -> Lit + end, + {Bis,St1} = match_cg(B, Vf, St0), + {Val,Bis,St1}. + +%% select_extract_tuple(Src, [V], State) -> {[E],State}. +%% Extract tuple elements, but only if they are actually used. +%% +%% Not extracting tuple elements that are not used is an +%% optimization for compile time and memory use during compilation. +%% It is probably worthwhile because it is common to extract only a +%% few elements from a huge record. + +select_extract_tuple(Src, Vs, Used, St0) -> + Tuple = ssa_arg(Src, St0), + F = fun (#k_var{name=V}, {Elem,S0}) -> + case member(V, Used) of + true -> + Args = [Tuple,#b_literal{val=Elem}], + {Dst,S} = new_ssa_var(V, S0), + Get = #b_set{op=get_tuple_element,dst=Dst,args=Args}, + {[Get],{Elem+1,S}}; + false -> + {[],{Elem+1,S0}} + end + end, + {Es,{_,St}} = flatmapfoldl(F, {0,St0}, Vs), + {Es,St}. + +select_map(Scs, V, Tf, Vf, St0) -> + MapSrc = ssa_arg(V, St0), + {Is,St1} = + match_fmf(fun(#k_val_clause{val=#k_map{op=exact,es=Es}, + body=B}, Fail, St1) -> + select_map_val(V, Es, B, Fail, St1) + end, Vf, St0, Scs), + {TestIs,St} = make_cond_branch({bif,is_map}, [MapSrc], Tf, St1), + {TestIs++Is,St}. + +select_map_val(V, Es, B, Fail, St0) -> + {Eis,St1} = select_extract_map(Es, V, Fail, St0), + {Bis,St2} = match_cg(B, Fail, St1), + {Eis++Bis,St2}. + +select_extract_map([P|Ps], Src, Fail, St0) -> + MapSrc = ssa_arg(Src, St0), + #k_map_pair{key=Key0,val=#k_var{name=Dst0}} = P, + Key = ssa_arg(Key0, St0), + {Dst,St1} = new_ssa_var(Dst0, St0), + Set = #b_set{op=get_map_element,dst=Dst,args=[MapSrc,Key]}, + {TestIs,St2} = make_cond_branch(succeeded, [Dst], Fail, St1), + {Is,St} = select_extract_map(Ps, Src, Fail, St2), + {[Set|TestIs]++Is,St}; +select_extract_map([], _, _, St) -> + {[],St}. + +select_extract_cons(Src0, [#k_var{name=Hd},#k_var{name=Tl}], St0) -> + Src = ssa_arg(Src0, St0), + {HdDst,St1} = new_ssa_var(Hd, St0), + {TlDst,St2} = new_ssa_var(Tl, St1), + GetHd = #b_set{op=get_hd,dst=HdDst,args=[Src]}, + GetTl = #b_set{op=get_tl,dst=TlDst,args=[Src]}, + {[GetHd,GetTl],St2}. + +guard_clause_cg(#k_guard_clause{guard=G,body=B}, Fail, St0) -> + {Gis,St1} = guard_cg(G, Fail, St0), + {Bis,St} = match_cg(B, Fail, St1), + {Gis ++ Bis,St}. + +%% guard_cg(Guard, Fail, State) -> {[Ainstr],State}. +%% A guard is a boolean expression of tests. Tests return true or +%% false. A fault in a test causes the test to return false. Tests +%% never return the boolean, instead we generate jump code to go to +%% the correct exit point. Primops and tests all go to the next +%% instruction on success or jump to a failure label. + +guard_cg(#k_protected{arg=Ts,ret=Rs,inner=Inner}, Fail, St) -> + protected_cg(Ts, Rs, Inner, Fail, St); +guard_cg(#k_test{op=Test0,args=As,inverted=Inverted}, Fail, St0) -> + #k_remote{mod=#k_atom{val=erlang},name=#k_atom{val=Test}} = Test0, + test_cg(Test, Inverted, As, Fail, St0); +guard_cg(#k_seq{arg=Arg,body=Body}, Fail, St0) -> + {ArgIs,St1} = guard_cg(Arg, Fail, St0), + {BodyIs,St} = guard_cg(Body, Fail, St1), + {ArgIs++BodyIs,St}; +guard_cg(G, _Fail, St) -> + cg(G, St). + +test_cg('=/=', Inverted, As, Fail, St) -> + test_cg('=:=', not Inverted, As, Fail, St); +test_cg('/=', Inverted, As, Fail, St) -> + test_cg('==', not Inverted, As, Fail, St); +test_cg(Test, Inverted, As0, Fail, St0) -> + As = ssa_args(As0, St0), + case {Test,ssa_args(As0, St0)} of + {is_record,[Tuple,#b_literal{val=Atom}=Tag,#b_literal{val=Int}=Arity]} + when is_atom(Atom), is_integer(Int) -> + test_is_record_cg(Inverted, Fail, Tuple, Tag, Arity, St0); + {_,As} -> + {Bool,St1} = new_ssa_var('@ssa_bool', St0), + {Succ,St} = new_label(St1), + Bif = #b_set{op={bif,Test},dst=Bool,args=As}, + Br = case Inverted of + false -> #b_br{bool=Bool,succ=Succ,fail=Fail}; + true -> #b_br{bool=Bool,succ=Fail,fail=Succ} + end, + {[Bif,Br,{label,Succ}],St} + end. + +test_is_record_cg(false, Fail, Tuple, TagVal, ArityVal, St0) -> + {Arity,St1} = new_ssa_var('@ssa_arity', St0), + {Tag,St2} = new_ssa_var('@ssa_tag', St1), + {Is0,St3} = make_cond_branch({bif,is_tuple}, [Tuple], Fail, St2), + GetArity = #b_set{op={bif,tuple_size},dst=Arity,args=[Tuple]}, + {Is1,St4} = make_cond_branch({bif,'=:='}, [Arity,ArityVal], Fail, St3), + GetTag = #b_set{op=get_tuple_element,dst=Tag, + args=[Tuple,#b_literal{val=0}]}, + {Is2,St} = make_cond_branch({bif,'=:='}, [Tag,TagVal], Fail, St4), + Is = Is0 ++ [GetArity] ++ Is1 ++ [GetTag] ++ Is2, + {Is,St}; +test_is_record_cg(true, Fail, Tuple, TagVal, ArityVal, St0) -> + {Succ,St1} = new_label(St0), + {Arity,St2} = new_ssa_var('@ssa_arity', St1), + {Tag,St3} = new_ssa_var('@ssa_tag', St2), + {Is0,St4} = make_cond_branch({bif,is_tuple}, [Tuple], Succ, St3), + GetArity = #b_set{op={bif,tuple_size},dst=Arity,args=[Tuple]}, + {Is1,St5} = make_cond_branch({bif,'=:='}, [Arity,ArityVal], Succ, St4), + GetTag = #b_set{op=get_tuple_element,dst=Tag, + args=[Tuple,#b_literal{val=0}]}, + {Is2,St} = make_cond_branch({bif,'=:='}, [Tag,TagVal], Succ, St5), + Is3 = [make_uncond_branch(Fail),{label,Succ}], + Is = Is0 ++ [GetArity] ++ Is1 ++ [GetTag] ++ Is2 ++ Is3, + {Is,St}. + +%% protected_cg([Kexpr], [Ret], Fail, St) -> {[Ainstr],St}. +%% Do a protected. Protecteds without return values are just done +%% for effect, the return value is not checked, success passes on to +%% the next instruction and failure jumps to Fail. If there are +%% return values then these must be set to 'false' on failure, +%% control always passes to the next instruction. + +protected_cg(Ts, [], _, Fail, St0) -> + %% Protect these calls, revert when done. + {Tis,St1} = guard_cg(Ts, Fail, St0#cg{bfail=Fail}), + {Tis,St1#cg{bfail=St0#cg.bfail}}; +protected_cg(Ts, Rs, Inner0, _Fail, St0) -> + {Pfail,St1} = new_label(St0), + {Br,St2} = new_label(St1), + Prot = duplicate(length(Rs), #b_literal{val=false}), + {Tis,St3} = guard_cg(Ts, Pfail, St2#cg{break=Pfail,bfail=Pfail}), + Inner = ssa_args(Inner0, St3), + {BreakVars,St} = new_ssa_vars(Rs, St3), + Is = Tis ++ [#cg_break{args=Inner,phi=Br}, + {label,Pfail},#cg_break{args=Prot,phi=Br}, + {label,Br},#cg_phi{vars=BreakVars}], + {Is,St#cg{break=St0#cg.break,bfail=St0#cg.bfail}}. + +%% match_fmf(Fun, LastFail, State, [Clause]) -> {Is,State}. +%% This is a special flatmapfoldl for match code gen where we +%% generate a "failure" label for each clause. The last clause uses +%% an externally generated failure label, LastFail. N.B. We do not +%% know or care how the failure labels are used. + +match_fmf(F, LastFail, St, [H]) -> + F(H, LastFail, St); +match_fmf(F, LastFail, St0, [H|T]) -> + {Fail,St1} = new_label(St0), + {R,St2} = F(H, Fail, St1), + {Rs,St3} = match_fmf(F, LastFail, St2, T), + {R ++ [{label,Fail}] ++ Rs,St3}. + +%% fail_label(State) -> {Where,FailureLabel}. +%% Where = guard | no_catch | in_catch +%% Return an indication of which part of a function code is +%% being generated for and the appropriate failure label to +%% use. +%% +%% Where has the following meaning: +%% +%% guard - Inside a guard. +%% no_catch - In a function body, not in the scope of +%% a try/catch or catch. +%% in_catch - In the scope of a try/catch or catch. + +fail_label(#cg{catch_label=Catch,bfail=Fail,ultimate_failure=Ult}) -> + if + Fail =/= Ult -> + {guard,Fail}; + Catch =:= none -> + {no_catch,Fail}; + is_integer(Catch) -> + {in_catch,Catch} + end. + +%% bif_fail_label(State) -> FailureLabel. +%% Return the appropriate failure label for a guard BIF call or +%% primop that fails. + +bif_fail_label(St) -> + {_,Fail} = fail_label(St), + Fail. + +%% call_cg(Func, [Arg], [Ret], Le, State) -> +%% {[Ainstr],State}. +%% enter_cg(Func, [Arg], Le, St) -> {[Ainstr],St}. +%% Generate code for call and enter. + +call_cg(Func, As, [], Le, St) -> + call_cg(Func, As, [#k_var{name='@ssa_ignored'}], Le, St); +call_cg(Func0, As, [#k_var{name=R}|MoreRs]=Rs, Le, St0) -> + case fail_label(St0) of + {guard,Fail} -> + %% Inside a guard. The only allowed function call is to + %% erlang:error/1,2. We will generate a branch to the + %% failure branch. + #k_remote{mod=#k_atom{val=erlang}, + name=#k_atom{val=error}} = Func0, %Assertion. + [#k_var{name=DestVar}] = Rs, + St = set_ssa_var(DestVar, #b_literal{val=unused}, St0), + {[make_uncond_branch(Fail),#cg_unreachable{}],St}; + {Catch,Fail} -> + %% Ordinary function call in a function body. + Args = ssa_args(As, St0), + {Ret,St1} = new_ssa_var(R, St0), + Func = call_target(Func0, Args, St0), + Call = #b_set{anno=line_anno(Le),op=call,dst=Ret,args=[Func|Args]}, + + %% If this is a call to erlang:error(), MoreRs could be a + %% nonempty list of variables that each need a value. + St2 = foldl(fun(#k_var{name=Dummy}, S) -> + set_ssa_var(Dummy, #b_literal{val=unused}, S) + end, St1, MoreRs), + case Catch of + no_catch -> + {[Call],St2}; + in_catch -> + {TestIs,St} = make_cond_branch(succeeded, [Ret], Fail, St2), + {[Call|TestIs],St} + end + end. + +enter_cg(Func0, As0, Le, St0) -> + Anno = line_anno(Le), + Func = call_target(Func0, As0, St0), + As = ssa_args(As0, St0), + {Ret,St} = new_ssa_var('@ssa_ret', St0), + Call = #b_set{anno=Anno,op=call,dst=Ret,args=[Func|As]}, + {[Call,#b_ret{arg=Ret}],St}. + +call_target(Func, As, St) -> + Arity = length(As), + case Func of + #k_remote{mod=Mod0,name=Name0} -> + Mod = ssa_arg(Mod0, St), + Name = ssa_arg(Name0, St), + #b_remote{mod=Mod,name=Name,arity=Arity}; + #k_local{name=Name} when is_atom(Name) -> + #b_local{name=#b_literal{val=Name},arity=Arity}; + #k_var{}=Var -> + ssa_arg(Var, St) + end. + +%% bif_cg(#k_bif{}, Le,State) -> {[Ainstr],State}. +%% Generate code for a guard BIF or primop. + +bif_cg(#k_bif{op=#k_internal{name=Name},args=As,ret=Rs}, Le, St) -> + internal_cg(Name, As, Rs, Le, St); +bif_cg(#k_bif{op=#k_remote{mod=#k_atom{val=erlang},name=#k_atom{val=Name}}, + args=As,ret=Rs}, Le, St) -> + bif_cg(Name, As, Rs, Le, St). + +%% internal_cg(Bif, [Arg], [Ret], Le, State) -> +%% {[Ainstr],State}. + +internal_cg(bs_context_to_binary, [Src0], [], _Le, St) -> + Src = ssa_arg(Src0, St), + Set = #b_set{op=context_to_binary,args=[Src]}, + {[Set],St}; +internal_cg(dsetelement, [Index0,Tuple0,New0], _Rs, _Le, St) -> + [New,Tuple,#b_literal{val=Index1}] = ssa_args([New0,Tuple0,Index0], St), + Index = #b_literal{val=Index1-1}, + Set = #b_set{op=set_tuple_element,args=[New,Tuple,Index]}, + {[Set],St}; +internal_cg(make_fun, [Name0,Arity0|As], Rs, _Le, St0) -> + #k_atom{val=Name} = Name0, + #k_int{val=Arity} = Arity0, + [#k_var{name=Dst0}] = Rs, + {Dst,St} = new_ssa_var(Dst0, St0), + Args = ssa_args(As, St), + Local = #b_local{name=#b_literal{val=Name},arity=Arity}, + MakeFun = #b_set{op=make_fun,dst=Dst,args=[Local|Args]}, + {[MakeFun],St}; +internal_cg(bs_init_writable=I, As, [#k_var{name=Dst0}], _Le, St0) -> + %% This behaves like a function call. + {Dst,St} = new_ssa_var(Dst0, St0), + Args = ssa_args(As, St), + Set = #b_set{op=I,dst=Dst,args=Args}, + {[Set],St}; +internal_cg(build_stacktrace=I, As, [#k_var{name=Dst0}], _Le, St0) -> + {Dst,St} = new_ssa_var(Dst0, St0), + Args = ssa_args(As, St), + Set = #b_set{op=I,dst=Dst,args=Args}, + {[Set],St}; +internal_cg(raise, As, [#k_var{name=Dst0}], _Le, St0) -> + Args = ssa_args(As, St0), + {Dst,St} = new_ssa_var(Dst0, St0), + Resume = #b_set{op=resume,dst=Dst,args=Args}, + case St of + #cg{catch_label=none} -> + {[Resume],St}; + #cg{catch_label=Catch} when is_integer(Catch) -> + Is = [Resume,make_uncond_branch(Catch),#cg_unreachable{}], + {Is,St} + end; +internal_cg(raw_raise=I, As, [#k_var{name=Dst0}], _Le, St0) -> + %% This behaves like a function call. + {Dst,St} = new_ssa_var(Dst0, St0), + Args = ssa_args(As, St), + Set = #b_set{op=I,dst=Dst,args=Args}, + {[Set],St}. + +bif_cg(Bif, As0, [#k_var{name=Dst0}], Le, St0) -> + {Dst,St1} = new_ssa_var(Dst0, St0), + case {Bif,ssa_args(As0, St0)} of + {is_record,[Tuple,#b_literal{val=Atom}=Tag, + #b_literal{val=Int}=Arity]} + when is_atom(Atom), is_integer(Int) -> + bif_is_record_cg(Dst, Tuple, Tag, Arity, St1); + {_,As} -> + I = #b_set{anno=line_anno(Le),op={bif,Bif},dst=Dst,args=As}, + case erl_bifs:is_safe(erlang, Bif, length(As)) of + false -> + Fail = bif_fail_label(St1), + {Is,St} = make_cond_branch(succeeded, [Dst], Fail, St1), + {[I|Is],St}; + true-> + {[I],St1} + end + end. + +bif_is_record_cg(Dst, Tuple, TagVal, ArityVal, St0) -> + {Arity,St1} = new_ssa_var('@ssa_arity', St0), + {Tag,St2} = new_ssa_var('@ssa_tag', St1), + {Phi,St3} = new_label(St2), + {False,St4} = new_label(St3), + {Is0,St5} = make_cond_branch({bif,is_tuple}, [Tuple], False, St4), + GetArity = #b_set{op={bif,tuple_size},dst=Arity,args=[Tuple]}, + {Is1,St6} = make_cond_branch({bif,'=:='}, [Arity,ArityVal], False, St5), + GetTag = #b_set{op=get_tuple_element,dst=Tag, + args=[Tuple,#b_literal{val=0}]}, + {Is2,St} = make_cond_branch({bif,'=:='}, [Tag,TagVal], False, St6), + Is3 = [#cg_break{args=[#b_literal{val=true}],phi=Phi}, + {label,False}, + #cg_break{args=[#b_literal{val=false}],phi=Phi}, + {label,Phi}, + #cg_phi{vars=[Dst]}], + Is = Is0 ++ [GetArity] ++ Is1 ++ [GetTag] ++ Is2 ++ Is3, + {Is,St}. + +%% recv_loop_cg(TimeOut, ReceiveVar, ReceiveMatch, TimeOutExprs, +%% [Ret], Le, St) -> {[Ainstr],St}. + +recv_loop_cg(Te, Rvar, Rm, Tes, Rs, Le, St0) -> + %% Get labels. + {Rl,St1} = new_label(St0), + {Tl,St2} = new_label(St1), + {Bl,St3} = new_label(St2), + St4 = St3#cg{break=Bl,recv=Rl}, + {Ris,St5} = cg_recv_mesg(Rvar, Rm, Tl, Le, St4), + {Wis,St6} = cg_recv_wait(Te, Tes, St5), + {BreakVars,St} = new_ssa_vars(Rs, St6), + {Ris ++ [{label,Tl}] ++ Wis ++ + [{label,Bl},#cg_phi{vars=BreakVars}], + St#cg{break=St0#cg.break,recv=St0#cg.recv}}. + +%% cg_recv_mesg( ) -> {[Ainstr],St}. + +cg_recv_mesg(#k_var{name=R}, Rm, Tl, Le, St0) -> + {Dst,St1} = new_ssa_var(R, St0), + {Mis,St2} = match_cg(Rm, none, St1), + RecvLbl = St1#cg.recv, + {TestIs,St} = make_cond_branch(succeeded, [Dst], Tl, St2), + Is = [#b_br{anno=line_anno(Le),bool=#b_literal{val=true}, + succ=RecvLbl,fail=RecvLbl}, + {label,RecvLbl}, + #b_set{op=peek_message,dst=Dst}|TestIs], + {Is++Mis,St}. + +%% cg_recv_wait(Te, Tes, St) -> {[Ainstr],St}. + +cg_recv_wait(#k_int{val=0}, Es, St0) -> + {Tis,St} = cg(Es, St0), + {[#b_set{op=timeout}|Tis],St}; +cg_recv_wait(Te, Es, St0) -> + {Tis,St1} = cg(Es, St0), + Args = [ssa_arg(Te, St1)], + {WaitDst,St2} = new_ssa_var('@ssa_wait', St1), + {WaitIs,St} = make_cond_branch(succeeded, [WaitDst], St1#cg.recv, St2), + %% Infinite timeout will be optimized later. + Is = [#b_set{op=wait_timeout,dst=WaitDst,args=Args}] ++ WaitIs ++ + [#b_set{op=timeout}] ++ Tis, + {Is,St}. + +%% try_cg(TryBlock, [BodyVar], TryBody, [ExcpVar], TryHandler, [Ret], St) -> +%% {[Ainstr],St}. + +try_cg(Ta, Vs, Tb, Evs, Th, Rs, St0) -> + {B,St1} = new_label(St0), %Body label + {H,St2} = new_label(St1), %Handler label + {E,St3} = new_label(St2), %End label + {Next,St4} = new_label(St3), + {TryTag,St5} = new_ssa_var('@ssa_catch_tag', St4), + {SsaVs,St6} = new_ssa_vars(Vs, St5), + {SsaEvs,St7} = new_ssa_vars(Evs, St6), + {Ais,St8} = cg(Ta, St7#cg{break=B,catch_label=H}), + St9 = St8#cg{break=E,catch_label=St7#cg.catch_label}, + {Bis,St10} = cg(Tb, St9), + {His,St11} = cg(Th, St10), + {BreakVars,St12} = new_ssa_vars(Rs, St11), + {CatchedAgg,St} = new_ssa_var('@ssa_agg', St12), + ExtractVs = extract_vars(SsaEvs, CatchedAgg, 0), + KillTryTag = #b_set{op=kill_try_tag,args=[TryTag]}, + Args = [#b_literal{val='try'},TryTag], + Handler = [{label,H}, + #b_set{op=landingpad,dst=CatchedAgg,args=Args}] ++ + ExtractVs ++ [KillTryTag], + {[#b_set{op=new_try_tag,dst=TryTag,args=[#b_literal{val='try'}]}, + #b_br{bool=TryTag,succ=Next,fail=H}, + {label,Next}] ++ Ais ++ + [{label,B},#cg_phi{vars=SsaVs},KillTryTag] ++ Bis ++ + Handler ++ His ++ + [{label,E},#cg_phi{vars=BreakVars}], + St#cg{break=St0#cg.break}}. + +try_enter_cg(Ta, Vs, Tb, Evs, Th, St0) -> + {B,St1} = new_label(St0), %Body label + {H,St2} = new_label(St1), %Handler label + {Next,St3} = new_label(St2), + {TryTag,St4} = new_ssa_var('@ssa_catch_tag', St3), + {SsaVs,St5} = new_ssa_vars(Vs, St4), + {SsaEvs,St6} = new_ssa_vars(Evs, St5), + {Ais,St7} = cg(Ta, St6#cg{break=B,catch_label=H}), + St8 = St7#cg{catch_label=St6#cg.catch_label}, + {Bis,St9} = cg(Tb, St8), + {His,St10} = cg(Th, St9), + {CatchedAgg,St} = new_ssa_var('@ssa_agg', St10), + ExtractVs = extract_vars(SsaEvs, CatchedAgg, 0), + KillTryTag = #b_set{op=kill_try_tag,args=[TryTag]}, + Args = [#b_literal{val='try'},TryTag], + Handler = [{label,H}, + #b_set{op=landingpad,dst=CatchedAgg,args=Args}] ++ + ExtractVs ++ [KillTryTag], + {[#b_set{op=new_try_tag,dst=TryTag,args=[#b_literal{val='try'}]}, + #b_br{bool=TryTag,succ=Next,fail=H}, + {label,Next}] ++ Ais ++ + [{label,B},#cg_phi{vars=SsaVs},KillTryTag] ++ Bis ++ + Handler ++ His, + St#cg{break=St0#cg.break}}. + +extract_vars([V|Vs], Agg, N) -> + I = #b_set{op=extract,dst=V,args=[Agg,#b_literal{val=N}]}, + [I|extract_vars(Vs, Agg, N+1)]; +extract_vars([], _, _) -> []. + +%% do_catch_cg(CatchBlock, Ret, St) -> {[Ainstr],St}. + +do_catch_cg(Block, #k_var{name=R}, St0) -> + {B,St1} = new_label(St0), + {Next,St2} = new_label(St1), + {H,St3} = new_label(St2), + {CatchReg,St4} = new_ssa_var('@ssa_catch_tag', St3), + {Dst,St5} = new_ssa_var(R, St4), + {Succ,St6} = new_label(St5), + {Cis,St7} = cg(Block, St6#cg{break=Succ,catch_label=H}), + {CatchedVal,St8} = new_ssa_var('@catched_val', St7), + {SuccVal,St9} = new_ssa_var('@success_val', St8), + {CatchedAgg,St10} = new_ssa_var('@ssa_agg', St9), + {CatchEndVal,St} = new_ssa_var('@catch_end_val', St10), + Args = [#b_literal{val='catch'},CatchReg], + {[#b_set{op=new_try_tag,dst=CatchReg,args=[#b_literal{val='catch'}]}, + #b_br{bool=CatchReg,succ=Next,fail=H}, + {label,Next}] ++ Cis ++ + [{label,H}, + #b_set{op=landingpad,dst=CatchedAgg,args=Args}, + #b_set{op=extract,dst=CatchedVal, + args=[CatchedAgg,#b_literal{val=0}]}, + #cg_break{args=[CatchedVal],phi=B}, + {label,Succ}, + #cg_phi{vars=[SuccVal]}, + #cg_break{args=[SuccVal],phi=B}, + {label,B},#cg_phi{vars=[CatchEndVal]}, + #b_set{op=catch_end,dst=Dst,args=[CatchReg,CatchEndVal]}], + St#cg{break=St1#cg.break,catch_label=St1#cg.catch_label}}. + +%% put_cg([Var], Constr, Le, Vdb, Bef, St) -> {[Ainstr],St}. +%% Generate code for constructing terms. + +put_cg([#k_var{name=R}], #k_cons{hd=Hd,tl=Tl}, _Le, St0) -> + Args = ssa_args([Hd,Tl], St0), + {Dst,St} = new_ssa_var(R, St0), + PutList = #b_set{op=put_list,dst=Dst,args=Args}, + {[PutList],St}; +put_cg([#k_var{name=R}], #k_tuple{es=Es}, _Le, St0) -> + {Ret,St} = new_ssa_var(R, St0), + Args = ssa_args(Es, St), + PutTuple = #b_set{op=put_tuple,dst=Ret,args=Args}, + {[PutTuple],St}; +put_cg([#k_var{name=R}], #k_binary{segs=Segs}, Le, St0) -> + Fail = bif_fail_label(St0), + {Dst,St1} = new_ssa_var(R, St0), + cg_binary(Dst, Segs, Fail, Le, St1); +put_cg([#k_var{name=R}], #k_map{op=Op,var=Map, + es=[#k_map_pair{key=#k_var{}=K,val=V}]}, + Le, St0) -> + %% Map: single variable key. + SrcMap = ssa_arg(Map, St0), + LineAnno = line_anno(Le), + List = [ssa_arg(K, St0),ssa_arg(V, St0)], + {Dst,St1} = new_ssa_var(R, St0), + {Is,St} = put_cg_map(LineAnno, Op, SrcMap, Dst, List, St1), + {Is,St}; +put_cg([#k_var{name=R}], #k_map{op=Op,var=Map,es=Es}, Le, St0) -> + %% Map: one or more literal keys. + [] = [Var || #k_map_pair{key=#k_var{}=Var} <- Es], %Assertion + SrcMap = ssa_arg(Map, St0), + LineAnno = line_anno(Le), + List = flatmap(fun(#k_map_pair{key=K,val=V}) -> + [ssa_arg(K, St0),ssa_arg(V, St0)] + end, Es), + {Dst,St1} = new_ssa_var(R, St0), + {Is,St} = put_cg_map(LineAnno, Op, SrcMap, Dst, List, St1), + {Is,St}; +put_cg([#k_var{name=R}], Con0, _Le, St0) -> + %% Create an alias for a variable or literal. + Con = ssa_arg(Con0, St0), + St = set_ssa_var(R, Con, St0), + {[],St}. + +put_cg_map(LineAnno, Op, SrcMap, Dst, List, St0) -> + Fail = bif_fail_label(St0), + Args = [#b_literal{val=Op},SrcMap|List], + PutMap = #b_set{anno=LineAnno,op=put_map,dst=Dst,args=Args}, + if + Op =:= assoc -> + {[PutMap],St0}; + true -> + {Is,St} = make_cond_branch(succeeded, [Dst], Fail, St0), + {[PutMap|Is],St} + end. + +%%% +%%% Code generation for constructing binaries. +%%% + +cg_binary(Dst, Segs0, Fail, Le, St0) -> + {PutCode0,SzCalc0,St1} = cg_bin_put(Segs0, Fail, St0), + LineAnno = line_anno(Le), + Anno = Le#k.a, + case PutCode0 of + [#b_set{op=bs_put,dst=Bool,args=[_,_,Src,#b_literal{val=all}|_]}, + #b_br{bool=Bool}, + {label,_}|_] -> + #k_bin_seg{unit=Unit0,next=Segs} = Segs0, + Unit = #b_literal{val=Unit0}, + {PutCode,SzCalc1,St2} = cg_bin_put(Segs, Fail, St1), + {_,SzVar,SzCode0,St3} = cg_size_calc(1, SzCalc1, Fail, St2), + SzCode = cg_bin_anno(SzCode0, LineAnno), + Args = case member(single_use, Anno) of + true -> + [#b_literal{val=private_append},Src,SzVar,Unit]; + false -> + [#b_literal{val=append},Src,SzVar,Unit] + end, + BsInit = #b_set{anno=LineAnno,op=bs_init,dst=Dst,args=Args}, + {TestIs,St} = make_cond_branch(succeeded, [Dst], Fail, St3), + {SzCode ++ [BsInit] ++ TestIs ++ PutCode,St}; + [#b_set{op=bs_put}|_] -> + {Unit,SzVar,SzCode0,St2} = cg_size_calc(8, SzCalc0, Fail, St1), + SzCode = cg_bin_anno(SzCode0, LineAnno), + Args = [#b_literal{val=new},SzVar,Unit], + BsInit = #b_set{anno=LineAnno,op=bs_init,dst=Dst,args=Args}, + {TestIs,St} = make_cond_branch(succeeded, [Dst], Fail, St2), + {SzCode ++ [BsInit] ++ TestIs ++ PutCode0,St} + end. + +cg_bin_anno([Set|Sets], Anno) -> + [Set#b_set{anno=Anno}|Sets]; +cg_bin_anno([], _) -> []. + +%% cg_size_calc(PreferredUnit, SzCalc, Fail, St0) -> +%% {ActualUnit,SizeVariable,SizeCode,St}. +%% Generate size calculation code. + +cg_size_calc(Unit, error, _Fail, St) -> + {#b_literal{val=Unit},#b_literal{val=badarg},[],St}; +cg_size_calc(8, [{1,_}|_]=SzCalc, Fail, St) -> + cg_size_calc(1, SzCalc, Fail, St); +cg_size_calc(8, SzCalc, Fail, St0) -> + {Var,Pre,St} = cg_size_calc_1(SzCalc, Fail, St0), + {#b_literal{val=8},Var,Pre,St}; +cg_size_calc(1, SzCalc0, Fail, St0) -> + SzCalc = map(fun({8,#b_literal{val=Size}}) -> + {1,#b_literal{val=8*Size}}; + ({8,{{bif,byte_size},Src}}) -> + {1,{{bif,bit_size},Src}}; + ({8,{_,_}=UtfCalc}) -> + {1,{'*',#b_literal{val=8},UtfCalc}}; + ({_,_}=Pair) -> + Pair + end, SzCalc0), + {Var,Pre,St} = cg_size_calc_1(SzCalc, Fail, St0), + {#b_literal{val=1},Var,Pre,St}. + +cg_size_calc_1(SzCalc, Fail, St0) -> + cg_size_calc_2(SzCalc, #b_literal{val=0}, Fail, St0). + +cg_size_calc_2([{_,{'*',Unit,{_,_}=Bif}}|T], Sum0, Fail, St0) -> + {Sum1,Pre0,St1} = cg_size_calc_2(T, Sum0, Fail, St0), + {BifDst,Pre1,St2} = cg_size_bif(Bif, Fail, St1), + {Sum,Pre2,St} = cg_size_add(Sum1, BifDst, Unit, Fail, St2), + {Sum,Pre0++Pre1++Pre2,St}; +cg_size_calc_2([{_,#b_literal{}=Sz}|T], Sum0, Fail, St0) -> + {Sum1,Pre0,St1} = cg_size_calc_2(T, Sum0, Fail, St0), + {Sum,Pre,St} = cg_size_add(Sum1, Sz, #b_literal{val=1}, Fail, St1), + {Sum,Pre0++Pre,St}; +cg_size_calc_2([{_,#b_var{}=Sz}|T], Sum0, Fail, St0) -> + {Sum1,Pre0,St1} = cg_size_calc_2(T, Sum0, Fail, St0), + {Sum,Pre,St} = cg_size_add(Sum1, Sz, #b_literal{val=1}, Fail, St1), + {Sum,Pre0++Pre,St}; +cg_size_calc_2([{_,{_,_}=Bif}|T], Sum0, Fail, St0) -> + {Sum1,Pre0,St1} = cg_size_calc_2(T, Sum0, Fail, St0), + {BifDst,Pre1,St2} = cg_size_bif(Bif, Fail, St1), + {Sum,Pre2,St} = cg_size_add(Sum1, BifDst, #b_literal{val=1}, Fail, St2), + {Sum,Pre0++Pre1++Pre2,St}; +cg_size_calc_2([], Sum, _Fail, St) -> + {Sum,[],St}. + +cg_size_bif(#b_var{}=Var, _Fail, St) -> + {Var,[],St}; +cg_size_bif({Name,Src}, Fail, St0) -> + {Dst,St1} = new_ssa_var('@ssa_bif', St0), + Bif = #b_set{op=Name,dst=Dst,args=[Src]}, + {TestIs,St} = make_cond_branch(succeeded, [Dst], Fail, St1), + {Dst,[Bif|TestIs],St}. + +cg_size_add(#b_literal{val=0}, Val, #b_literal{val=1}, _Fail, St) -> + {Val,[],St}; +cg_size_add(A, B, Unit, Fail, St0) -> + {Dst,St1} = new_ssa_var('@ssa_sum', St0), + {TestIs,St} = make_cond_branch(succeeded, [Dst], Fail, St1), + BsAdd = #b_set{op=bs_add,dst=Dst,args=[A,B,Unit]}, + {Dst,[BsAdd|TestIs],St}. + +cg_bin_put(Seg, Fail, St) -> + cg_bin_put_1(Seg, Fail, [], [], St). + +cg_bin_put_1(#k_bin_seg{size=Size0,unit=U,type=T,flags=Fs,seg=Src0,next=Next}, + Fail, Acc, SzCalcAcc, St0) -> + [Src,Size] = ssa_args([Src0,Size0], St0), + NeedSize = bs_need_size(T), + TypeArg = #b_literal{val=T}, + Flags = #b_literal{val=Fs}, + Unit = #b_literal{val=U}, + Args = case NeedSize of + true -> [TypeArg,Flags,Src,Size,Unit]; + false -> [TypeArg,Flags,Src] + end, + {Is,St} = make_cond_branch(bs_put, Args, Fail, St0), + SzCalc = bin_size_calc(T, Src, Size, U), + cg_bin_put_1(Next, Fail, reverse(Is, Acc), [SzCalc|SzCalcAcc], St); +cg_bin_put_1(#k_bin_end{}, _, Acc, SzCalcAcc, St) -> + SzCalc = fold_size_calc(SzCalcAcc, 0, []), + {reverse(Acc),SzCalc,St}. + +bs_need_size(utf8) -> false; +bs_need_size(utf16) -> false; +bs_need_size(utf32) -> false; +bs_need_size(_) -> true. + +bin_size_calc(utf8, Src, _Size, _Unit) -> + {8,{bs_utf8_size,Src}}; +bin_size_calc(utf16, Src, _Size, _Unit) -> + {8,{bs_utf16_size,Src}}; +bin_size_calc(utf32, _Src, _Size, _Unit) -> + {8,#b_literal{val=4}}; +bin_size_calc(binary, Src, #b_literal{val=all}, Unit) -> + case Unit rem 8 of + 0 -> {8,{{bif,byte_size},Src}}; + _ -> {1,{{bif,bit_size},Src}} + end; +bin_size_calc(_Type, _Src, Size, Unit) -> + {Unit,Size}. + +fold_size_calc([{Unit,#b_literal{val=Size}}|T], Bits, Acc) -> + if + is_integer(Size) -> + fold_size_calc(T, Bits + Unit*Size, Acc); + true -> + error + end; +fold_size_calc([{U,#b_var{}}=H|T], Bits, Acc) when U =:= 1; U =:= 8 -> + fold_size_calc(T, Bits, [H|Acc]); +fold_size_calc([{U,#b_var{}=Var}|T], Bits, Acc) -> + fold_size_calc(T, Bits, [{1,{'*',#b_literal{val=U},Var}}|Acc]); +fold_size_calc([{_,_}=H|T], Bits, Acc) -> + fold_size_calc(T, Bits, [H|Acc]); +fold_size_calc([], Bits, Acc) -> + Bytes = Bits div 8, + RemBits = Bits rem 8, + Sizes = sort([{1,#b_literal{val=RemBits}},{8,#b_literal{val=Bytes}}|Acc]), + [Pair || {_,Sz}=Pair <- Sizes, Sz =/= #b_literal{val=0}]. + +%%% +%%% Utilities for creating the SSA types. +%%% + +ssa_args(As, St) -> + [ssa_arg(A, St) || A <- As]. + +ssa_arg(#k_var{name=V}, #cg{vars=Vars}) -> maps:get(V, Vars); +ssa_arg(#k_literal{val=V}, _) -> #b_literal{val=V}; +ssa_arg(#k_atom{val=V}, _) -> #b_literal{val=V}; +ssa_arg(#k_float{val=V}, _) -> #b_literal{val=V}; +ssa_arg(#k_int{val=V}, _) -> #b_literal{val=V}; +ssa_arg(#k_nil{}, _) -> #b_literal{val=[]}. + +new_ssa_vars(Vs, St) -> + mapfoldl(fun(#k_var{name=V}, S) -> + new_ssa_var(V, S) + end, St, Vs). + +new_ssa_var(VarBase, #cg{lcount=Uniq,vars=Vars}=St0) + when is_atom(VarBase); is_integer(VarBase) -> + case Vars of + #{VarBase:=_} -> + Var = #b_var{name={VarBase,Uniq}}, + St = St0#cg{lcount=Uniq+1,vars=Vars#{VarBase=>Var}}, + {Var,St}; + #{} -> + Var = #b_var{name=VarBase}, + St = St0#cg{vars=Vars#{VarBase=>Var}}, + {Var,St} + end. + +set_ssa_var(VarBase, Val, #cg{vars=Vars}=St) + when is_atom(VarBase); is_integer(VarBase) -> + St#cg{vars=Vars#{VarBase=>Val}}. + +%% new_label(St) -> {L,St}. + +new_label(#cg{lcount=Next}=St) -> + {Next,St#cg{lcount=Next+1}}. + +%% line_anno(Le) -> #{} | #{location:={File,Line}}. +%% Create a location annotation, containing information about the +%% current filename and line number. The annotation should be +%% included in any operation that could cause an exception. + +line_anno(#k{a=Anno}) -> + line_anno_1(Anno). + +line_anno_1([Line,{file,Name}]) when is_integer(Line) -> + line_anno_2(Name, Line); +line_anno_1([_|_]=A) -> + {Name,Line} = find_loc(A, no_file, 0), + line_anno_2(Name, Line); +line_anno_1([]) -> + #{}. + +line_anno_2(no_file, _) -> + #{}; +line_anno_2(_, 0) -> + %% Missing line number or line number 0. + #{}; +line_anno_2(Name, Line) -> + #{location=>{Name,Line}}. + +find_loc([Line|T], File, _) when is_integer(Line) -> + find_loc(T, File, Line); +find_loc([{file,File}|T], _, Line) -> + find_loc(T, File, Line); +find_loc([_|T], File, Line) -> + find_loc(T, File, Line); +find_loc([], File, Line) -> {File,Line}. + +flatmapfoldl(F, Accu0, [Hd|Tail]) -> + {R,Accu1} = F(Hd, Accu0), + {Rs,Accu2} = flatmapfoldl(F, Accu1, Tail), + {R++Rs,Accu2}; +flatmapfoldl(_, Accu, []) -> {[],Accu}. + +%%% +%%% Finalize the code. +%%% + +finalize(Asm0, St0) -> + Asm1 = fix_phis(Asm0), + {Asm,St} = fix_sets(Asm1, [], St0), + {build_map(Asm),St}. + +fix_phis(Is) -> + fix_phis_1(Is, none, #{}). + +fix_phis_1([{label,L},#cg_phi{vars=[]}=Phi|Is0], _Lbl, Map0) -> + case maps:is_key(L, Map0) of + false -> + %% No #cg_break{} references this label. Nothing else can + %% reference it, so it can be safely be removed. + {Is,Map} = drop_upto_label(Is0, Map0), + fix_phis_1(Is, none, Map); + true -> + %% There is a break referencing this label; probably caused + %% by a try/catch whose return value is ignored. + [{label,L}|fix_phis_1([Phi|Is0], L, Map0)] + end; +fix_phis_1([{label,L}=I|Is], _Lbl, Map) -> + [I|fix_phis_1(Is, L, Map)]; +fix_phis_1([#cg_unreachable{}|Is0], _Lbl, Map0) -> + {Is,Map} = drop_upto_label(Is0, Map0), + fix_phis_1(Is, none, Map); +fix_phis_1([#cg_break{args=Args,phi=Target}|Is], Lbl, Map) when is_integer(Lbl) -> + Pairs1 = case Map of + #{Target:=Pairs0} -> Pairs0; + #{} -> [] + end, + Pairs = [[{Arg,Lbl} || Arg <- Args]|Pairs1], + I = make_uncond_branch(Target), + [I|fix_phis_1(Is, none, Map#{Target=>Pairs})]; +fix_phis_1([#cg_phi{vars=Vars}|Is0], Lbl, Map0) -> + Pairs = maps:get(Lbl, Map0), + Map1 = maps:remove(Lbl, Map0), + case gen_phis(Vars, Pairs) of + [#b_set{op=phi,args=[]}] -> + {Is,Map} = drop_upto_label(Is0, Map1), + Ret = #b_ret{arg=#b_literal{val=unreachable}}, + [Ret|fix_phis_1(Is, none, Map)]; + Phis -> + Phis ++ fix_phis_1(Is0, Lbl, Map1) + end; +fix_phis_1([I|Is], Lbl, Map) -> + [I|fix_phis_1(Is, Lbl, Map)]; +fix_phis_1([], _, Map) -> + [] = maps:to_list(Map), %Assertion. + []. + +gen_phis([V|Vs], Preds0) -> + {Pairs,Preds} = collect_preds(Preds0, [], []), + [#b_set{op=phi,dst=V,args=Pairs}|gen_phis(Vs, Preds)]; +gen_phis([], _) -> []. + +collect_preds([[First|Rest]|T], ColAcc, RestAcc) -> + collect_preds(T, [First|ColAcc], [Rest|RestAcc]); +collect_preds([], ColAcc, RestAcc) -> + {keysort(2, ColAcc),RestAcc}. + +fix_sets([#b_set{dst=none}=Set|Is], Acc, St0) -> + {Dst,St} = new_ssa_var('@ssa_ignored', St0), + I = Set#b_set{dst=Dst}, + fix_sets(Is, [I|Acc], St); +fix_sets([I|Is], Acc, St) -> + fix_sets(Is, [I|Acc], St); +fix_sets([], Acc, St) -> + {reverse(Acc),St}. + +build_map(Is) -> + Blocks = build_graph_1(Is, [], []), + maps:from_list(Blocks). + +build_graph_1([{label,L}|Is], Lbls, []) -> + build_graph_1(Is, [L|Lbls], []); +build_graph_1([{label,L}|Is], Lbls, [_|_]=BlockAcc) -> + make_blocks(Lbls, BlockAcc) ++ build_graph_1(Is, [L], []); +build_graph_1([I|Is], Lbls, BlockAcc) -> + build_graph_1(Is, Lbls, [I|BlockAcc]); +build_graph_1([], Lbls, BlockAcc) -> + make_blocks(Lbls, BlockAcc). + +make_blocks(Lbls, [Last|Is0]) -> + Is = reverse(Is0), + Block = #b_blk{is=Is,last=Last}, + [{L,Block} || L <- Lbls]. + +drop_upto_label([{label,_}|_]=Is, Map) -> + {Is,Map}; +drop_upto_label([#cg_break{phi=Target}|Is], Map) -> + Pairs = case Map of + #{Target:=Pairs0} -> Pairs0; + #{} -> [] + end, + drop_upto_label(Is, Map#{Target=>Pairs}); +drop_upto_label([_|Is], Map) -> + drop_upto_label(Is, Map). + +k_get_anno(Thing) -> element(2, Thing). diff --git a/lib/compiler/src/beam_listing.erl b/lib/compiler/src/beam_listing.erl index 518b958794..8a0ce5b50a 100644 --- a/lib/compiler/src/beam_listing.erl +++ b/lib/compiler/src/beam_listing.erl @@ -23,6 +23,7 @@ -include("core_parse.hrl"). -include("v3_kernel.hrl"). +-include("beam_ssa.hrl"). -include("beam_disasm.hrl"). -import(lists, [foreach/2]). @@ -41,6 +42,12 @@ module(File, #k_mdef{}=Kern) -> %% This is a kernel module. io:put_chars(File, v3_kernel_pp:format(Kern)); %%io:put_chars(File, io_lib:format("~p~n", [Kern])); +module(File, #b_module{name=Mod,exports=Exp,attributes=Attr,body=Fs}) -> + io:format(File, "module ~p.\n", [Mod]), + io:format(File, "exports ~p.\n", [Exp]), + io:format(File, "attributes ~p.\n\n", [Attr]), + PP = [beam_ssa_pp:format_function(F) || F <- Fs], + io:put_chars(File, lists:join($\n, PP)); module(Stream, {Mod,Exp,Attr,Code,NumLabels}) -> %% This is output from v3_codegen. io:format(Stream, "{module, ~p}. %% version = ~w\n", diff --git a/lib/compiler/src/beam_peep.erl b/lib/compiler/src/beam_peep.erl index 2b8dd40e29..74da6aa704 100644 --- a/lib/compiler/src/beam_peep.erl +++ b/lib/compiler/src/beam_peep.erl @@ -101,23 +101,13 @@ peep([{select,Op,R,F,Vls0}|Is], SeenTests0, Acc0) -> I = {jump,F}, peep([I|Is], gb_sets:empty(), Acc0); [{atom,_}=Value,Lbl] when Op =:= select_val -> - %% Single value left. Convert to regular test and pop redundant tests. + %% Single value left. Convert to regular test. Is1 = [{test,is_eq_exact,F,[R,Value]},{jump,Lbl}|Is], - case Acc0 of - [{test,is_atom,F,[R]}|Acc] -> - peep(Is1, SeenTests0, Acc); - _ -> - peep(Is1, SeenTests0, Acc0) - end; + peep(Is1, SeenTests0, Acc0); [{integer,_}=Value,Lbl] when Op =:= select_val -> - %% Single value left. Convert to regular test and pop redundant tests. + %% Single value left. Convert to regular test. Is1 = [{test,is_eq_exact,F,[R,Value]},{jump,Lbl}|Is], - case Acc0 of - [{test,is_integer,F,[R]}|Acc] -> - peep(Is1, SeenTests0, Acc); - _ -> - peep(Is1, SeenTests0, Acc0) - end; + peep(Is1, SeenTests0, Acc0); [Arity,Lbl] when Op =:= select_tuple_arity -> %% Single value left. Convert to regular test Is1 = [{test,test_arity,F,[R,Arity]},{jump,Lbl}|Is], @@ -126,6 +116,21 @@ peep([{select,Op,R,F,Vls0}|Is], SeenTests0, Acc0) -> I = {select,Op,R,F,Vls}, peep(Is, gb_sets:empty(), [I|Acc0]) end; +peep([{get_map_elements,Fail,Src,List}=I|Is], _SeenTests, Acc0) -> + SeenTests = gb_sets:empty(), + case simplify_get_map_elements(Fail, Src, List, Acc0) of + {ok,Acc} -> + peep(Is, SeenTests, Acc); + error -> + peep(Is, SeenTests, [I|Acc0]) + end; +peep([{test,has_map_fields,Fail,Ops}=I|Is], SeenTests, Acc0) -> + case simplify_has_map_fields(Fail, Ops, Acc0) of + {ok,Acc} -> + peep(Is, SeenTests, Acc); + error -> + peep(Is, SeenTests, [I|Acc0]) + end; peep([{test,Op,_,Ops}=I|Is], SeenTests0, Acc) -> case beam_utils:is_pure_test(I) of false -> @@ -176,3 +181,39 @@ prune_redundant_values([_Val,F|Vls], F) -> prune_redundant_values([Val,Lbl|Vls], F) -> [Val,Lbl|prune_redundant_values(Vls, F)]; prune_redundant_values([], _) -> []. + +simplify_get_map_elements(Fail, Src, {list,[Key,Dst]}, + [{get_map_elements,Fail,Src,{list,List1}}|Acc]) -> + case are_keys_literals([Key]) andalso are_keys_literals(List1) of + true -> + case member(Key, List1) of + true -> + %% The key is already in the other list. That is + %% very unusual, because there are optimizations to get + %% rid of duplicate keys. Therefore, don't try to + %% do anything smart here; just keep the + %% get_map_elements instructions separate. + error; + false -> + List = [Key,Dst|List1], + {ok,[{get_map_elements,Fail,Src,{list,List}}|Acc]} + end; + false -> + error + end; +simplify_get_map_elements(_, _, _, _) -> error. + +simplify_has_map_fields(Fail, [Src|Keys0], + [{test,has_map_fields,Fail,[Src|Keys1]}|Acc]) -> + case are_keys_literals(Keys0) andalso are_keys_literals(Keys1) of + true -> + Keys = Keys0 ++ Keys1, + {ok,[{test,has_map_fields,Fail,[Src|Keys]}|Acc]}; + false -> + error + end; +simplify_has_map_fields(_, _, _) -> error. + +are_keys_literals([{x,_}|_]) -> false; +are_keys_literals([{y,_}|_]) -> false; +are_keys_literals([_|_]) -> true. diff --git a/lib/compiler/src/beam_split.erl b/lib/compiler/src/beam_split.erl index 809e49b3d0..7b18946ae0 100644 --- a/lib/compiler/src/beam_split.erl +++ b/lib/compiler/src/beam_split.erl @@ -44,10 +44,6 @@ split_blocks([I|Is], Acc) -> split_blocks(Is, [I|Acc]); split_blocks([], Acc) -> reverse(Acc). -split_block([{set,[R],[_,_,_]=As,{bif,is_record,{f,Lbl}}}|Is], Bl, Acc) -> - %% is_record/3 must be translated by beam_clean; therefore, - %% it must be outside of any block. - split_block(Is, [], [{bif,is_record,{f,Lbl},As,R}|make_block(Bl, Acc)]); split_block([{set,[R],As,{bif,N,{f,Lbl}=Fail}}|Is], Bl, Acc) when Lbl =/= 0 -> split_block(Is, [], [{bif,N,Fail,As,R}|make_block(Bl, Acc)]); split_block([{set,[],[],{line,_}=Line}, diff --git a/lib/compiler/src/beam_ssa.erl b/lib/compiler/src/beam_ssa.erl new file mode 100644 index 0000000000..a2766a0501 --- /dev/null +++ b/lib/compiler/src/beam_ssa.erl @@ -0,0 +1,616 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2018. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +%% Purpose: Type definitions and utilities for the SSA format. + +-module(beam_ssa). +-export([add_anno/3,get_anno/2, + clobbers_xregs/1,def/2,def_used/2,dominators/1, + flatmapfold_instrs_rpo/4, + fold_po/3,fold_po/4,fold_rpo/3,fold_rpo/4, + fold_instrs_rpo/4, + linearize/1, + mapfold_instrs_rpo/4, + predecessors/1, + rename_vars/3, + rpo/1,rpo/2, + split_blocks/3, + successors/1,successors/2, + update_phi_labels/4,used/1]). + +-export_type([b_module/0,b_function/0,b_blk/0,b_set/0, + b_ret/0,b_br/0,b_switch/0,terminator/0, + b_var/0,b_literal/0,b_remote/0,b_local/0, + value/0,argument/0,label/0, + var_name/0,var_base/0,literal_value/0, + op/0,anno/0,block_map/0]). + +-include("beam_ssa.hrl"). + +-type b_module() :: #b_module{}. +-type b_function() :: #b_function{}. +-type b_blk() :: #b_blk{}. +-type b_set() :: #b_set{}. + +-type b_br() :: #b_br{}. +-type b_ret() :: #b_ret{}. +-type b_switch() :: #b_switch{}. +-type terminator() :: b_br() | b_ret() | b_switch(). + +-type b_var() :: #b_var{}. +-type b_literal() :: #b_literal{}. +-type b_remote() :: #b_remote{}. +-type b_local() :: #b_local{}. + +-type value() :: b_var() | b_literal(). +-type phi_value() :: {value(),label()}. +-type argument() :: value() | b_remote() | b_local() | phi_value(). +-type label() :: non_neg_integer(). + +-type var_name() :: var_base() | {var_base(),non_neg_integer()}. +-type var_base() :: atom() | non_neg_integer(). + +-type literal_value() :: atom() | integer() | float() | list() | + nil() | tuple() | map() | binary(). + +-type op() :: {'bif',atom()} | {'float',float_op()} | prim_op() | cg_prim_op(). +-type anno() :: #{atom() := any()}. + +-type block_map() :: #{label():=b_blk()}. + +%% Note: By default, dialyzer will collapse this type to atom(). +%% To avoid the collapsing, change the value of SET_LIMIT to 50 in the +%% file erl_types.erl in the hipe application. + +-type prim_op() :: 'bs_add' | 'bs_extract' | 'bs_init' | 'bs_init_writable' | + 'bs_match' | 'bs_put' | 'bs_start_match' | 'bs_test_tail' | + 'bs_utf16_size' | 'bs_utf8_size' | 'build_stacktrace' | + 'call' | 'catch_end' | 'context_to_binary' | + 'extract' | + 'get_hd' | 'get_map_element' | 'get_tl' | 'get_tuple_element' | + 'has_map_field' | + 'is_nonempty_list' | 'is_tagged_tuple' | + 'kill_try_tag' | + 'landingpad' | + 'make_fun' | 'new_try_tag' | + 'peek_message' | 'phi' | 'put_list' | 'put_map' | 'put_tuple' | + 'raw_raise' | 'recv_next' | 'remove_message' | 'resume' | + 'set_tuple_element' | 'succeeded' | + 'timeout' | + 'wait' | 'wait_timeout'. + +-type float_op() :: 'checkerror' | 'clearerror' | 'convert' | 'get' | 'put' | + '+' | '-' | '*' | '/'. + +%% Primops only used internally during code generation. +-type cg_prim_op() :: 'bs_get' | 'bs_match_string' | 'bs_restore' | 'bs_skip' | +'copy' | 'put_tuple_arity' | 'put_tuple_element'. + +-import(lists, [foldl/3,mapfoldl/3,reverse/1]). + +-spec add_anno(Key, Value, Construct) -> Construct when + Key :: atom(), + Value :: any(), + Construct :: b_function() | b_blk() | b_set() | terminator(). + +add_anno(Key, Val, #b_function{anno=Anno}=Bl) -> + Bl#b_function{anno=Anno#{Key=>Val}}; +add_anno(Key, Val, #b_blk{anno=Anno}=Bl) -> + Bl#b_blk{anno=Anno#{Key=>Val}}; +add_anno(Key, Val, #b_set{anno=Anno}=Bl) -> + Bl#b_set{anno=Anno#{Key=>Val}}; +add_anno(Key, Val, #b_br{anno=Anno}=Bl) -> + Bl#b_br{anno=Anno#{Key=>Val}}; +add_anno(Key, Val, #b_ret{anno=Anno}=Bl) -> + Bl#b_ret{anno=Anno#{Key=>Val}}; +add_anno(Key, Val, #b_switch{anno=Anno}=Bl) -> + Bl#b_switch{anno=Anno#{Key=>Val}}. + +-spec get_anno(atom(), b_blk()|b_set()|terminator()) -> any(). + +get_anno(Key, Construct) -> + maps:get(Key, get_anno(Construct)). + +get_anno(#b_blk{anno=Anno}) -> Anno; +get_anno(#b_set{anno=Anno}) -> Anno; +get_anno(#b_br{anno=Anno}) -> Anno; +get_anno(#b_ret{anno=Anno}) -> Anno; +get_anno(#b_switch{anno=Anno}) -> Anno. + +%% clobbers_xregs(#b_set{}) -> true|false. +%% Test whether the instruction invalidates all X registers. + +-spec clobbers_xregs(b_set()) -> boolean(). + +clobbers_xregs(#b_set{op=Op}) -> + case Op of + bs_init_writable -> true; + build_stacktrace -> true; + call -> true; + landingpad -> true; + make_fun -> true; + peek_message -> true; + raw_raise -> true; + _ -> false + end. + +-spec predecessors(Blocks) -> #{BlockNumber:=[Predecessor]} when + Blocks :: block_map(), + BlockNumber :: label(), + Predecessor :: label(). + +predecessors(Blocks) -> + P0 = [{S,L} || {L,Blk} <- maps:to_list(Blocks), + S <- successors(Blk)], + P1 = sofs:relation(P0), + P2 = sofs:rel2fam(P1), + P3 = sofs:to_external(P2), + P = [{0,[]}|P3], + maps:from_list(P). + +-spec successors(b_blk()) -> [label()]. + +successors(#b_blk{last=Terminator}) -> + case Terminator of + #b_br{bool=#b_literal{val=true},succ=Succ} -> + [Succ]; + #b_br{bool=#b_literal{val=false},fail=Fail} -> + [Fail]; + #b_br{succ=Succ,fail=Fail} -> + [Fail,Succ]; + #b_switch{fail=Fail,list=List} -> + [Fail|[L || {_,L} <- List]]; + #b_ret{} -> + [] + end. + +-spec successors(label(), block_map()) -> [label()]. + +successors(L, Blocks) -> + successors(maps:get(L, Blocks)). + +-spec def(Ls, Blocks) -> Def when + Ls :: [label()], + Blocks :: block_map(), + Def :: ordsets:ordset(var_name()). + +def(Ls, Blocks) -> + Top = rpo(Ls, Blocks), + Blks = [maps:get(L, Blocks) || L <- Top], + def_1(Blks, []). + +-spec def_used(Ls, Blocks) -> {Def,Used} when + Ls :: [label()], + Blocks :: block_map(), + Def :: ordsets:ordset(var_name()), + Used :: ordsets:ordset(var_name()). + +def_used(Ls, Blocks) -> + Top = rpo(Ls, Blocks), + Blks = [maps:get(L, Blocks) || L <- Top], + Preds = gb_sets:from_list(Top), + def_used_1(Blks, Preds, [], gb_sets:empty()). + +-spec dominators(Blocks) -> Result when + Blocks :: block_map(), + Result :: #{label():=ordsets:ordset(label())}. + +dominators(Blocks) -> + Preds = predecessors(Blocks), + Top0 = rpo(Blocks), + Top = [{L,maps:get(L, Preds)} || L <- Top0], + + %% The flow graph for an Erlang function is reducible, and + %% therefore one traversal in reverse postorder is sufficient. + iter_dominators(Top, #{}). + +-spec fold_instrs_rpo(Fun, From, Acc0, Blocks) -> any() when + Fun :: fun((b_blk()|terminator(), any()) -> any()), + From :: [label()], + Acc0 :: any(), + Blocks :: block_map(). + +fold_instrs_rpo(Fun, From, Acc0, Blocks) -> + Top = rpo(From, Blocks), + fold_instrs_rpo_1(Top, Fun, Blocks, Acc0). + +-spec mapfold_instrs_rpo(Fun, From, Acc0, Blocks0) -> {Blocks,Acc} when + Fun :: fun((b_blk()|terminator(), any()) -> any()), + From :: [label()], + Acc0 :: any(), + Acc :: any(), + Blocks0 :: block_map(), + Blocks :: block_map(). + +mapfold_instrs_rpo(Fun, From, Acc0, Blocks) -> + Top = rpo(From, Blocks), + mapfold_instrs_rpo_1(Top, Fun, Blocks, Acc0). + +-spec flatmapfold_instrs_rpo(Fun, From, Acc0, Blocks0) -> {Blocks,Acc} when + Fun :: fun((b_blk()|terminator(), any()) -> any()), + From :: [label()], + Acc0 :: any(), + Acc :: any(), + Blocks0 :: block_map(), + Blocks :: block_map(). + +flatmapfold_instrs_rpo(Fun, From, Acc0, Blocks) -> + Top = rpo(From, Blocks), + flatmapfold_instrs_rpo_1(Top, Fun, Blocks, Acc0). + +-type fold_fun() :: fun((label(), b_blk(), any()) -> any()). + +%% fold_rpo(Fun, [Label], Acc0, Blocks) -> Acc. +%% Fold over all blocks a reverse postorder traversal of the block +%% graph; that is, first visit a block, then visit its successors. + +-spec fold_rpo(Fun, Acc0, Blocks) -> any() when + Fun :: fold_fun(), + Acc0 :: any(), + Blocks :: #{label():=b_blk()}. + +fold_rpo(Fun, Acc0, Blocks) -> + fold_rpo(Fun, [0], Acc0, Blocks). + +%% fold_rpo(Fun, [Label], Acc0, Blocks) -> Acc. Fold over all blocks +%% reachable from a given set of labels in a reverse postorder +%% traversal of the block graph; that is, first visit a block, then +%% visit its successors. + +-spec fold_rpo(Fun, Labels, Acc0, Blocks) -> any() when + Fun :: fold_fun(), + Labels :: [label()], + Acc0 :: any(), + Blocks :: #{label():=b_blk()}. + +fold_rpo(Fun, From, Acc0, Blocks) -> + Top = rpo(From, Blocks), + fold_rpo_1(Top, Fun, Blocks, Acc0). + +%% fold_po(Fun, Acc0, Blocks) -> Acc. +%% Fold over all blocks in a postorder traversal of the block graph; +%% that is, first visit all successors of block, then the block +%% itself. + +-spec fold_po(Fun, Acc0, Blocks) -> any() when + Fun :: fold_fun(), + Acc0 :: any(), + Blocks :: #{label():=b_blk()}. + +%% fold_po(Fun, From, Acc0, Blocks) -> Acc. +%% Fold over the blocks reachable from the block numbers given +%% by From in a postorder traversal of the block graph. + +fold_po(Fun, Acc0, Blocks) -> + fold_po(Fun, [0], Acc0, Blocks). + +-spec fold_po(Fun, Labels, Acc0, Blocks) -> any() when + Fun :: fold_fun(), + Labels :: [label()], + Acc0 :: any(), + Blocks :: block_map(). + +fold_po(Fun, From, Acc0, Blocks) -> + Top = reverse(rpo(From, Blocks)), + fold_rpo_1(Top, Fun, Blocks, Acc0). + +%% linearize(Blocks) -> [{BlockLabel,#b_blk{}}]. +%% Linearize the intermediate representation of the code. + +-spec linearize(Blocks) -> Linear when + Blocks :: block_map(), + Linear :: [{label(),b_blk()}]. + +linearize(Blocks) -> + Seen = gb_sets:empty(), + {Linear,_} = linearize_1([0], Blocks, Seen, []), + Linear. + +-spec rpo(Blocks) -> [Label] when + Blocks :: block_map(), + Label :: label(). + +rpo(Blocks) -> + rpo([0], Blocks). + +-spec rpo(From, Blocks) -> Labels when + From :: [label()], + Blocks :: block_map(), + Labels :: [label()]. + +rpo(From, Blocks) -> + Seen = gb_sets:empty(), + {Ls,_} = rpo_1(From, Blocks, Seen, []), + Ls. + +-spec rename_vars(Rename, [label()], block_map()) -> block_map() when + Rename :: [{var_name(),value()}] | #{var_name():=value()}. + +rename_vars(Rename, From, Blocks) when is_list(Rename) -> + rename_vars(maps:from_list(Rename), From, Blocks); +rename_vars(Rename, From, Blocks) when is_map(Rename)-> + Top = rpo(From, Blocks), + Preds = gb_sets:from_list(Top), + F = fun(#b_set{op=phi,args=Args0}=Set) -> + Args = rename_phi_vars(Args0, Preds, Rename), + Set#b_set{args=Args}; + (#b_set{args=Args0}=Set) -> + Args = [rename_var(A, Rename) || A <- Args0], + Set#b_set{args=Args}; + (#b_switch{arg=Bool}=Sw) -> + Sw#b_switch{arg=rename_var(Bool, Rename)}; + (#b_br{bool=Bool}=Br) -> + Br#b_br{bool=rename_var(Bool, Rename)}; + (#b_ret{arg=Arg}=Ret) -> + Ret#b_ret{arg=rename_var(Arg, Rename)} + end, + map_instrs_1(Top, F, Blocks). + +%% split_blocks(Predicate, Blocks0, Count0) -> {Blocks,Count}. +%% Call Predicate(Instruction) for each instruction in all +%% blocks. If Predicate/1 returns true, split the block +%% before this instruction. + +-spec split_blocks(Pred, Blocks0, Count0) -> {Blocks,Count} when + Pred :: fun((b_set()) -> boolean()), + Blocks :: block_map(), + Count0 :: beam_ssa:label(), + Blocks0 :: block_map(), + Blocks :: block_map(), + Count :: beam_ssa:label(). + +split_blocks(P, Blocks, Count) -> + Ls = beam_ssa:rpo(Blocks), + split_blocks_1(Ls, P, Blocks, Count). + +%% update_phi_labels([BlockLabel], Old, New, Blocks0) -> Blocks. +%% In the given blocks, replace label Old in with New in all +%% phi nodes. This is useful after merging or splitting +%% blocks. + +-spec update_phi_labels(From, Old, New, Blocks0) -> Blocks when + From :: [label()], + Old :: label(), + New :: label(), + Blocks0 :: block_map(), + Blocks :: block_map(). + +update_phi_labels([L|Ls], Old, New, Blocks0) -> + case Blocks0 of + #{L:=#b_blk{is=[#b_set{op=phi}|_]=Is0}=Blk0} -> + Is = update_phi_labels_is(Is0, Old, New), + Blk = Blk0#b_blk{is=Is}, + Blocks = Blocks0#{L:=Blk}, + update_phi_labels(Ls, Old, New, Blocks); + #{L:=#b_blk{}} -> + %% No phi nodes in this block. + update_phi_labels(Ls, Old, New, Blocks0) + end; +update_phi_labels([], _, _, Blocks) -> Blocks. + +-spec used(b_blk() | b_set() | terminator()) -> [var_name()]. + +used(#b_blk{is=Is,last=Last}) -> + used_1([Last|Is], ordsets:new()); +used(#b_br{bool=#b_var{name=V}}) -> + [V]; +used(#b_ret{arg=#b_var{name=V}}) -> + [V]; +used(#b_set{op=phi,args=Args}) -> + ordsets:from_list([V || {#b_var{name=V},_} <- Args]); +used(#b_set{args=Args}) -> + ordsets:from_list(used_args(Args)); +used(#b_switch{arg=#b_var{name=V}}) -> + [V]; +used(_) -> []. + +%%% +%%% Internal functions. +%%% + +def_used_1([#b_blk{is=Is,last=Last}|Bs], Preds, Def0, Used0) -> + {Def,Used1} = def_used_is(Is, Preds, Def0, Used0), + Used = gb_sets:union(gb_sets:from_list(used(Last)), Used1), + def_used_1(Bs, Preds, Def, Used); +def_used_1([], _Preds, Def, Used) -> + {ordsets:from_list(Def),gb_sets:to_list(Used)}. + +def_used_is([#b_set{op=phi,dst=#b_var{name=Dst},args=Args}|Is], + Preds, Def0, Used0) -> + Def = [Dst|Def0], + %% We must be careful to only include variables that will + %% be used when arriving from one of the predecessor blocks + %% in Preds. + Used1 = [V || {#b_var{name=V},L} <- Args, + gb_sets:is_member(L, Preds)], + Used = gb_sets:union(gb_sets:from_list(Used1), Used0), + def_used_is(Is, Preds, Def, Used); +def_used_is([#b_set{dst=#b_var{name=Dst}}=I|Is], Preds, Def0, Used0) -> + Def = [Dst|Def0], + Used = gb_sets:union(gb_sets:from_list(used(I)), Used0), + def_used_is(Is, Preds, Def, Used); +def_used_is([], _Preds, Def, Used) -> + {Def,Used}. + +def_1([#b_blk{is=Is}|Bs], Def0) -> + Def = def_is(Is, Def0), + def_1(Bs, Def); +def_1([], Def) -> + ordsets:from_list(Def). + +def_is([#b_set{dst=#b_var{name=Dst}}|Is], Def) -> + def_is(Is, [Dst|Def]); +def_is([], Def) -> Def. + +iter_dominators([{0,[]}|Ls], _Doms) -> + Dom = [0], + iter_dominators(Ls, #{0=>Dom}); +iter_dominators([{L,Preds}|Ls], Doms) -> + DomPreds = [maps:get(P, Doms) || P <- Preds, maps:is_key(P, Doms)], + Dom = ordsets:add_element(L, ordsets:intersection(DomPreds)), + iter_dominators(Ls, Doms#{L=>Dom}); +iter_dominators([], Doms) -> Doms. + +fold_rpo_1([L|Ls], Fun, Blocks, Acc0) -> + Block = maps:get(L, Blocks), + Acc = Fun(L, Block, Acc0), + fold_rpo_1(Ls, Fun, Blocks, Acc); +fold_rpo_1([], _, _, Acc) -> Acc. + +fold_instrs_rpo_1([L|Ls], Fun, Blocks, Acc0) -> + #b_blk{is=Is,last=Last} = maps:get(L, Blocks), + Acc1 = foldl(Fun, Acc0, Is), + Acc = Fun(Last, Acc1), + fold_instrs_rpo_1(Ls, Fun, Blocks, Acc); +fold_instrs_rpo_1([], _, _, Acc) -> Acc. + +mapfold_instrs_rpo_1([L|Ls], Fun, Blocks0, Acc0) -> + #b_blk{is=Is0,last=Last0} = Block0 = maps:get(L, Blocks0), + {Is,Acc1} = mapfoldl(Fun, Acc0, Is0), + {Last,Acc} = Fun(Last0, Acc1), + Block = Block0#b_blk{is=Is,last=Last}, + Blocks = maps:put(L, Block, Blocks0), + mapfold_instrs_rpo_1(Ls, Fun, Blocks, Acc); +mapfold_instrs_rpo_1([], _, Blocks, Acc) -> + {Blocks,Acc}. + +flatmapfold_instrs_rpo_1([L|Ls], Fun, Blocks0, Acc0) -> + #b_blk{is=Is0,last=Last0} = Block0 = maps:get(L, Blocks0), + {Is,Acc1} = flatmapfoldl(Fun, Acc0, Is0), + {[Last],Acc} = Fun(Last0, Acc1), + Block = Block0#b_blk{is=Is,last=Last}, + Blocks = maps:put(L, Block, Blocks0), + flatmapfold_instrs_rpo_1(Ls, Fun, Blocks, Acc); +flatmapfold_instrs_rpo_1([], _, Blocks, Acc) -> + {Blocks,Acc}. + +linearize_1([L|Ls], Blocks, Seen0, Acc0) -> + case gb_sets:is_member(L, Seen0) of + true -> + linearize_1(Ls, Blocks, Seen0, Acc0); + false -> + Seen1 = gb_sets:insert(L, Seen0), + Block = maps:get(L, Blocks), + Successors = successors(Block), + {Acc,Seen} = linearize_1(Successors, Blocks, Seen1, Acc0), + linearize_1(Ls, Blocks, Seen, [{L,Block}|Acc]) + end; +linearize_1([], _, Seen, Acc) -> + {Acc,Seen}. + +rpo_1([L|Ls], Blocks, Seen0, Acc0) -> + case gb_sets:is_member(L, Seen0) of + true -> + rpo_1(Ls, Blocks, Seen0, Acc0); + false -> + Block = maps:get(L, Blocks), + Seen1 = gb_sets:insert(L, Seen0), + Successors = successors(Block), + {Acc,Seen} = rpo_1(Successors, Blocks, Seen1, Acc0), + rpo_1(Ls, Blocks, Seen, [L|Acc]) + end; +rpo_1([], _, Seen, Acc) -> + {Acc,Seen}. + +rename_var(#b_var{name=Old}=V, Rename) -> + case Rename of + #{Old:=New} -> New; + #{} -> V + end; +rename_var(#b_remote{mod=Mod0,name=Name0}=Remote, Rename) -> + Mod = rename_var(Mod0, Rename), + Name = rename_var(Name0, Rename), + Remote#b_remote{mod=Mod,name=Name}; +rename_var(Old, _) -> Old. + +rename_phi_vars([{Var,L}|As], Preds, Ren) -> + case gb_sets:is_member(L, Preds) of + true -> + [{rename_var(Var, Ren),L}|rename_phi_vars(As, Preds, Ren)]; + false -> + [{Var,L}|rename_phi_vars(As, Preds, Ren)] + end; +rename_phi_vars([], _, _) -> []. + +map_instrs_1([L|Ls], Fun, Blocks0) -> + #b_blk{is=Is0,last=Last0} = Blk0 = maps:get(L, Blocks0), + Is = [Fun(I) || I <- Is0], + Last = Fun(Last0), + Blk = Blk0#b_blk{is=Is,last=Last}, + Blocks = maps:put(L, Blk, Blocks0), + map_instrs_1(Ls, Fun, Blocks); +map_instrs_1([], _, Blocks) -> Blocks. + +flatmapfoldl(F, Accu0, [Hd|Tail]) -> + {R,Accu1} = F(Hd, Accu0), + {Rs,Accu2} = flatmapfoldl(F, Accu1, Tail), + {R++Rs,Accu2}; +flatmapfoldl(_, Accu, []) -> {[],Accu}. + +split_blocks_1([L|Ls], P, Blocks0, Count0) -> + #b_blk{is=Is0} = Blk = maps:get(L, Blocks0), + case split_blocks_is(Is0, P, []) of + {yes,Bef,Aft} -> + NewLbl = Count0, + Count = Count0 + 1, + Br = #b_br{bool=#b_literal{val=true},succ=NewLbl,fail=NewLbl}, + BefBlk = Blk#b_blk{is=Bef,last=Br}, + NewBlk = Blk#b_blk{is=Aft}, + Blocks1 = Blocks0#{L:=BefBlk,NewLbl=>NewBlk}, + Successors = beam_ssa:successors(NewBlk), + Blocks = beam_ssa:update_phi_labels(Successors, L, NewLbl, Blocks1), + split_blocks_1([NewLbl|Ls], P, Blocks, Count); + no -> + split_blocks_1(Ls, P, Blocks0, Count0) + end; +split_blocks_1([], _, Blocks, Count) -> + {Blocks,Count}. + +split_blocks_is([I|Is], P, []) -> + split_blocks_is(Is, P, [I]); +split_blocks_is([I|Is], P, Acc) -> + case P(I) of + true -> + {yes,reverse(Acc),[I|Is]}; + false -> + split_blocks_is(Is, P, [I|Acc]) + end; +split_blocks_is([], _, _) -> no. + +update_phi_labels_is([#b_set{op=phi,args=Args0}=I0|Is], Old, New) -> + Args = [{Arg,rename_label(Lbl, Old, New)} || {Arg,Lbl} <- Args0], + I = I0#b_set{args=Args}, + [I|update_phi_labels_is(Is, Old, New)]; +update_phi_labels_is(Is, _, _) -> Is. + +rename_label(Old, Old, New) -> New; +rename_label(Lbl, _Old, _New) -> Lbl. + +used_args([#b_var{name=V}|As]) -> + [V|used_args(As)]; +used_args([#b_remote{mod=Mod,name=Name}|As]) -> + used_args([Mod,Name|As]); +used_args([_|As]) -> + used_args(As); +used_args([]) -> []. + +used_1([H|T], Used0) -> + Used = ordsets:union(used(H), Used0), + used_1(T, Used); +used_1([], Used) -> Used. diff --git a/lib/compiler/src/beam_ssa.hrl b/lib/compiler/src/beam_ssa.hrl new file mode 100644 index 0000000000..fa76b08453 --- /dev/null +++ b/lib/compiler/src/beam_ssa.hrl @@ -0,0 +1,66 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2018. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% + +-record(b_module, {anno=#{} :: beam_ssa:anno(), + name :: module(), + exports :: [{atom(),arity()}], + attributes :: list(), + body :: [beam_ssa:b_function()]}). +-record(b_function, {anno=#{} :: beam_ssa:anno(), + args :: [beam_ssa:b_var()], + bs :: #{beam_ssa:label():=beam_ssa:b_blk()}, + cnt :: beam_ssa:label()}). + +-record(b_blk, {anno=#{} :: beam_ssa:anno(), + is :: [beam_ssa:b_set()], + last :: beam_ssa:terminator()}). +-record(b_set, {anno=#{} :: beam_ssa:anno(), + dst=none :: 'none'|beam_ssa:b_var(), + op :: beam_ssa:op(), + args=[] :: [beam_ssa:argument()]}). + +%% Terminators. +-record(b_ret, {anno=#{} :: beam_ssa:anno(), + arg :: beam_ssa:value()}). + +-record(b_br, {anno=#{}, + bool :: beam_ssa:value(), + succ :: beam_ssa:label(), + fail :: beam_ssa:label()}). + +-record(b_switch, {anno=#{} :: beam_ssa:anno(), + arg :: beam_ssa:value(), + fail :: beam_ssa:label(), + list :: [{beam_ssa:b_literal(),beam_ssa:label()}]}). + +%% Values. +-record(b_var, {name :: beam_ssa:var_name()}). + +-record(b_literal, {val :: beam_ssa:literal_value()}). + +-record(b_remote, {mod :: beam_ssa:value(), + name :: beam_ssa:value(), + arity :: non_neg_integer()}). + +-record(b_local, {name :: beam_ssa:b_literal(), + arity :: non_neg_integer()}). + +%% If this block exists, it calls erlang:error(badarg). +-define(BADARG_BLOCK, 1). diff --git a/lib/compiler/src/beam_ssa_codegen.erl b/lib/compiler/src/beam_ssa_codegen.erl new file mode 100644 index 0000000000..357352269c --- /dev/null +++ b/lib/compiler/src/beam_ssa_codegen.erl @@ -0,0 +1,1827 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2018. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +%% Purpose: Generate BEAM assembly code from the SSA format. + +-module(beam_ssa_codegen). + +-export([module/2]). +-export([classify_heap_need/2]). %Called from beam_ssa_pre_codegen. + +-export_type([ssa_register/0]). + +-include("beam_ssa.hrl"). + +-import(lists, [foldl/3,keymember/3,keysort/2,last/1,map/2,mapfoldl/3, + reverse/1,reverse/2,sort/1,splitwith/2,takewhile/2]). + +-record(cg, {lcount=1 :: beam_label(), %Label counter + functable=#{} :: #{fa()=>beam_label()}, + labels=#{} :: #{ssa_label()=>0|beam_label()}, + used_labels=gb_sets:empty() :: gb_sets:set(ssa_label()), + regs=#{} :: #{beam_ssa:var_name()=>ssa_register()}, + ultimate_fail=1 :: beam_label(), + catches=gb_sets:empty() :: gb_sets:set(ssa_label()) + }). + +-spec module(beam_ssa:b_module(), [compile:option()]) -> + {'ok',beam_asm:module_code()}. + +module(#b_module{name=Mod,exports=Es,attributes=Attrs,body=Fs}, _Opts) -> + {Asm,St} = functions(Fs, {atom,Mod}), + {ok,{Mod,Es,Attrs,Asm,St#cg.lcount}}. + +-record(need, {h=0 :: non_neg_integer(), + f=0 :: non_neg_integer()}). + +-record(cg_blk, {anno=#{} :: anno(), + is=[] :: [instruction()], + last :: terminator()}). + +-record(cg_set, {anno=#{} :: anno(), + dst :: b_var(), + op :: beam_ssa:op(), + args :: [beam_ssa:argument() | xreg()]}). + +-record(cg_alloc, {anno=#{} :: anno(), + stack=none :: 'none' | pos_integer(), + words=#need{} :: #need{}, + live :: 'undefined' | pos_integer(), + def_yregs=[] :: [yreg()] + }). + +-record(cg_br, {bool :: beam_ssa:value(), + succ :: ssa_label(), + fail :: ssa_label() + }). +-record(cg_ret, {arg :: cg_value(), + dealloc=none :: 'none' | pos_integer() + }). +-record(cg_switch, {arg :: cg_value(), + fail :: ssa_label(), + list :: [sw_list_item()] + }). + +-type fa() :: {beam_asm:function_name(),arity()}. +-type ssa_label() :: beam_ssa:label(). +-type beam_label() :: beam_asm:label(). + +-type anno() :: beam_ssa:anno(). + +-type b_var() :: beam_ssa:b_var(). +-type b_literal() :: beam_ssa:b_literal(). + +-type cg_value() :: beam_ssa:value() | xreg(). + +-type cg_set() :: #cg_set{}. +-type cg_alloc() :: #cg_alloc{}. + +-type instruction() :: cg_set() | cg_alloc(). + +-type cg_br() :: #cg_br{}. +-type cg_ret() :: #cg_ret{}. +-type cg_switch() :: #cg_switch{}. +-type terminator() :: cg_br() | cg_ret() | cg_switch(). + +-type sw_list_item() :: {b_literal(),ssa_label()}. + +-type reg_num() :: beam_asm:reg_num(). +-type xreg() :: {'x',reg_num()}. +-type yreg() :: {'y',reg_num()}. + +-type ssa_register() :: xreg() | yreg() | {'fr',reg_num()} | {'z',reg_num()}. + +functions(Forms, AtomMod) -> + mapfoldl(fun (F, St) -> function(F, AtomMod, St) end, #cg{lcount=1}, Forms). + +function(#b_function{anno=Anno,bs=Blocks}, AtomMod, St0) -> + #{func_info:={_,Name,Arity}} = Anno, + try + assert_badarg_block(Blocks), %Assertion. + Regs = maps:get(registers, Anno), + St1 = St0#cg{labels=#{},used_labels=gb_sets:empty(), + regs=Regs}, + {Fi,St2} = new_label(St1), %FuncInfo label + {Entry,St3} = local_func_label(Name, Arity, St2), + {Ult,St4} = new_label(St3), %Ultimate failure + Labels = (St4#cg.labels)#{0=>Entry,?BADARG_BLOCK=>0}, + St5 = St4#cg{labels=Labels,used_labels=gb_sets:singleton(Entry), + ultimate_fail=Ult}, + {Body,St} = cg_fun(Blocks, St5), + Asm = [{label,Fi},line(Anno), + {func_info,AtomMod,{atom,Name},Arity}] ++ Body ++ + [{label,Ult},if_end], + Func = {function,Name,Arity,Entry,Asm}, + {Func,St} + catch + Class:Error:Stack -> + io:fwrite("Function: ~w/~w\n", [Name,Arity]), + erlang:raise(Class, Error, Stack) + end. + +assert_badarg_block(Blocks) -> + %% Assertion: ?BADARG_BLOCK must be the call erlang:error(badarg). + case Blocks of + #{?BADARG_BLOCK:=Blk} -> + #b_blk{is=[#b_set{op=call,dst=Ret, + args=[#b_remote{mod=#b_literal{val=erlang}, + name=#b_literal{val=error}}, + #b_literal{val=badarg}]}], + last=#b_ret{arg=Ret}} = Blk, + ok; + #{} -> + %% ?BADARG_BLOCK has been removed because it was never used. + ok + end. + +cg_fun(Blocks, St0) -> + Linear0 = linearize(Blocks), + St = collect_catch_labels(Linear0, St0), + Linear1 = need_heap(Linear0), + Linear2 = prefer_xregs(Linear1, St), + Linear3 = liveness(Linear2, St), + Linear4 = defined(Linear3, St), + Linear = opt_allocate(Linear4, St), + cg_linear(Linear, St). + +%% collect_catch_labels(Linear, St) -> St. +%% Collect all catch labels (labels for blocks that begin +%% with 'landingpad' instructions) for later use. + +collect_catch_labels(Linear, St) -> + Labels = collect_catch_labels_1(Linear), + St#cg{catches=gb_sets:from_list(Labels)}. + +collect_catch_labels_1([{L,#cg_blk{is=[#cg_set{op=landingpad}|_]}}|Bs]) -> + [L|collect_catch_labels_1(Bs)]; +collect_catch_labels_1([_|Bs]) -> + collect_catch_labels_1(Bs); +collect_catch_labels_1([]) -> []. + +%% need_heap([{BlockLabel,Block]) -> [{BlockLabel,Block}]. +%% Insert need_heap instructions in the instruction list. Try to be smart and +%% collect them together as much as possible. + +need_heap(Bs0) -> + Bs1 = need_heap_allocs(Bs0, #{}), + {Bs,#need{h=0,f=0}} = need_heap_blks(reverse(Bs1), #need{}, []), + Bs. + +need_heap_allocs([{L,#cg_blk{is=Is0,last=Terminator}=Blk0}|Bs], Counts0) -> + Next = next_block(Bs), + Successors = successors(Terminator), + Counts = foldl(fun(S, Cnts) -> + case Cnts of + #{S:=C} -> Cnts#{S:=C+1}; + #{} when S =:= Next -> Cnts#{S=>1}; + #{} -> Cnts#{S=>42} + end + end, Counts0, Successors), + case Counts of + #{L:=1} -> + [{L,Blk0}|need_heap_allocs(Bs, Counts)]; + #{L:=_} -> + %% This block has multiple predecessors. Force an allocation + %% in this block so that the predecessors don't need to do + %% an allocation on behalf of this block. + Is = case need_heap_never(Is0) of + true -> Is0; + false -> [#cg_alloc{}|Is0] + end, + Blk = Blk0#cg_blk{is=Is}, + [{L,Blk}|need_heap_allocs(Bs, Counts)]; + #{} -> + [{L,Blk0}|need_heap_allocs(Bs, Counts)] + end; +need_heap_allocs([], _) -> []. + +need_heap_never([#cg_alloc{}|_]) -> true; +need_heap_never([#cg_set{op=recv_next}|_]) -> true; +need_heap_never([#cg_set{op=wait}|_]) -> true; +need_heap_never(_) -> false. + +need_heap_blks([{L,#cg_blk{is=Is0}=Blk0}|Bs], H0, Acc) -> + {Is1,H1} = need_heap_is(reverse(Is0), H0, []), + {Ns,H} = need_heap_terminator(Bs, H1), + Is = Ns ++ Is1, + Blk = Blk0#cg_blk{is=Is}, + need_heap_blks(Bs, H, [{L,Blk}|Acc]); +need_heap_blks([], H, Acc) -> + {Acc,H}. + +need_heap_is([#cg_alloc{words=Words}=Alloc0|Is], N, Acc) -> + Alloc = Alloc0#cg_alloc{words=add_heap_words(N, Words)}, + need_heap_is(Is, #need{}, [Alloc|Acc]); +need_heap_is([#cg_set{op=Op,args=Args}=I|Is], N, Acc) -> + case classify_heap_need(Op, Args) of + {put,Words} -> + %% Pass through adding to needed heap. + need_heap_is(Is, add_heap_words(N, Words), [I|Acc]); + put_float -> + need_heap_is(Is, add_heap_float(N), [I|Acc]); + neutral -> + need_heap_is(Is, N, [I|Acc]); + gc -> + need_heap_is(Is, #need{}, [I]++need_heap_need(N)++Acc) + end; +need_heap_is([], N, Acc) -> + {Acc,N}. + +need_heap_terminator([{_,#cg_blk{last=#cg_br{succ=Same,fail=Same}}}|_], N) -> + {[],N}; +need_heap_terminator([{_,#cg_blk{}}|_], N) -> + {need_heap_need(N),#need{}}; +need_heap_terminator([], H) -> + {need_heap_need(H),#need{}}. + +need_heap_need(#need{h=0,f=0}) -> []; +need_heap_need(#need{}=N) -> [#cg_alloc{words=N}]. + +add_heap_words(#need{h=H1,f=F1}, #need{h=H2,f=F2}) -> + #need{h=H1+H2,f=F1+F2}; +add_heap_words(#need{h=Heap}=N, Words) when is_integer(Words) -> + N#need{h=Heap+Words}. + +add_heap_float(#need{f=F}=N) -> + N#need{f=F+1}. + +%% classify_heap_need(Operation, Arguments) -> +%% gc | neutral | {put,Words} | put_float. +%% Classify the heap need for this instruction. The return +%% values have the following meaning. +%% +%% {put,Words} means that the instruction will use Words words to build +%% something on the heap. +%% +%% 'put_float' means that the instruction will build one floating point +%% number on the heap. +%% +%% 'gc' means that that the instruction can potentially do a GC or throw an +%% exception. That means that an allocation instruction for any building +%% must be placed after this instruction. +%% +%% 'neutral' means that the instruction does nothing to disturb the heap. + +-spec classify_heap_need(beam_ssa:op(), [beam_ssa:value()]) -> + 'gc' | 'neutral' | + {'put',non_neg_integer()} | 'put_float'. + +classify_heap_need(put_list, _) -> + {put,2}; +classify_heap_need(put_tuple_arity, [#b_literal{val=Words}]) -> + {put,Words+1}; +classify_heap_need({bif,Name}, Args) -> + case is_gc_bif(Name, Args) of + false -> neutral; + true -> gc + end; +classify_heap_need({float,Op}, _Args) -> + case Op of + get -> put_float; + _ -> neutral + end; +classify_heap_need(Name, _Args) -> + classify_heap_need(Name). + +%% classify_heap_need(Operation) -> gc | neutral. +%% Return either 'gc' or 'neutral'. +%% +%% 'gc' means that that the instruction can potentially do a GC or throw an +%% exception. That means that an allocation instruction for any building +%% must be placed after this instruction. +%% +%% 'neutral' means that the instruction does nothing to disturb the heap. +%% +%% Note: Only handle operations in this function that are not handled +%% by classify_heap_need/2. + +classify_heap_need(bs_add) -> gc; +classify_heap_need(bs_get) -> gc; +classify_heap_need(bs_init) -> gc; +classify_heap_need(bs_init_writable) -> gc; +classify_heap_need(bs_match_string) -> gc; +classify_heap_need(bs_put) -> neutral; +classify_heap_need(bs_restore) -> neutral; +classify_heap_need(bs_save) -> neutral; +classify_heap_need(bs_skip) -> gc; +classify_heap_need(bs_start_match) -> neutral; +classify_heap_need(bs_test_tail) -> neutral; +classify_heap_need(bs_utf16_size) -> neutral; +classify_heap_need(bs_utf8_size) -> neutral; +classify_heap_need(build_stacktrace) -> gc; +classify_heap_need(call) -> gc; +classify_heap_need(catch_end) -> gc; +classify_heap_need(context_to_binary) -> gc; +classify_heap_need(copy) -> neutral; +classify_heap_need(extract) -> gc; +classify_heap_need(get_hd) -> neutral; +classify_heap_need(get_map_element) -> neutral; +classify_heap_need(get_tl) -> neutral; +classify_heap_need(get_tuple_element) -> neutral; +classify_heap_need(has_map_field) -> neutral; +classify_heap_need(is_nonempty_list) -> neutral; +classify_heap_need(is_tagged_tuple) -> neutral; +classify_heap_need(kill_try_tag) -> gc; +classify_heap_need(landingpad) -> gc; +classify_heap_need(make_fun) -> gc; +classify_heap_need(new_try_tag) -> gc; +classify_heap_need(peek_message) -> gc; +classify_heap_need(put_map) -> gc; +classify_heap_need(put_tuple_elements) -> neutral; +classify_heap_need(raw_raise) -> gc; +classify_heap_need(recv_next) -> gc; +classify_heap_need(remove_message) -> neutral; +classify_heap_need(resume) -> gc; +classify_heap_need(set_tuple_element) -> gc; +classify_heap_need(succeeded) -> neutral; +classify_heap_need(timeout) -> gc; +classify_heap_need(wait) -> gc; +classify_heap_need(wait_timeout) -> gc. + +%%% +%%% Because beam_ssa_pre_codegen has inserted 'copy' instructions to copy +%%% variables that must be saved on the stack, a value can for some time +%%% be in both an X register and a Y register. +%%% +%%% Here we will keep track of variables that have the same value and +%%% rewrite instructions to use the variable that refers to the X +%%% register instead of the Y register. That could improve performance, +%%% since the BEAM interpreter have more optimized instructions +%%% operating on X registers than on Y registers. +%%% +%%% 'call' and 'make_fun' are handled somewhat specially. If a value +%%% already is in the correct X register, the X register will always +%%% be used instead of the Y register. However, if there are one or more +%%% values in the wrong X registers, the X registers variables will be +%%% used only if that does not cause more 'move' instructions to be +%%% be emitted than if the Y register variables were used. +%%% +%%% Here are some examples. The first example shows how a 'move' from +%%% an Y register is eliminated: +%%% +%%% move x0 y1 +%%% move y1 x0 %%Will be eliminated. +%%% +%%% call f/1 +%%% +%%% Here is an example when x0 and x1 must be swapped to load the argument +%%% registers. Here the 'call' instruction will use the Y registers to +%%% avoid introducing an extra 'move' insruction: +%%% +%%% move x0 y0 +%%% move x1 y1 +%%% +%%% move y0 x1 +%%% move y1 x0 +%%% +%%% call f/2 +%%% +%%% Using the X register to load the argument registers would need +%%% an extra 'move' instruction like this: +%%% +%%% move x0 y0 +%%% move x1 y1 +%%% +%%% move x1 x2 +%%% move x0 x1 +%%% move x2 x0 +%%% +%%% call f/2 +%%% + +prefer_xregs(Linear, St) -> + prefer_xregs(Linear, St, #{0=>#{}}). + +prefer_xregs([{L,#cg_blk{is=Is0,last=Last0}=Blk0}|Bs], St, Map0) -> + Copies0 = maps:get(L, Map0), + {Is,Copies} = prefer_xregs_is(Is0, St, Copies0, []), + Last = prefer_xregs_terminator(Last0, Copies, St), + Blk = Blk0#cg_blk{is=Is,last=Last}, + Successors = successors(Last), + Map = prefer_xregs_successors(Successors, Copies, Map0), + [{L,Blk}|prefer_xregs(Bs, St, Map)]; +prefer_xregs([], _St, _Map) -> []. + +prefer_xregs_successors([L|Ls], Copies0, Map0) -> + case Map0 of + #{L:=Copies1} -> + Copies = merge_copies(Copies0, Copies1), + Map = Map0#{L:=Copies}, + prefer_xregs_successors(Ls, Copies0, Map); + #{} -> + Map = Map0#{L=>Copies0}, + prefer_xregs_successors(Ls, Copies0, Map) + end; +prefer_xregs_successors([], _, Map) -> Map. + +prefer_xregs_is([#cg_alloc{}=I|Is], St, Copies0, Acc) -> + Copies = case I of + #cg_alloc{stack=none,words=#need{h=0,f=0}} -> + Copies0; + #cg_alloc{} -> + #{} + end, + prefer_xregs_is(Is, St, Copies, [I|Acc]); +prefer_xregs_is([#cg_set{op=copy,dst=Dst,args=[Src]}=I|Is], St, Copies0, Acc) -> + Copies1 = prefer_xregs_prune(I, Copies0, St), + Copies = case beam_args([Src,Dst], St) of + [Same,Same] -> Copies1; + [_,_] -> Copies1#{Dst=>Src} + end, + prefer_xregs_is(Is, St, Copies, [I|Acc]); +prefer_xregs_is([#cg_set{op=call,dst=Dst}=I0|Is], St, Copies, Acc) -> + I = prefer_xregs_call(I0, Copies, St), + prefer_xregs_is(Is, St, #{Dst=>{x,0}}, [I|Acc]); +prefer_xregs_is([#cg_set{op=make_fun,dst=Dst}=I0|Is], St, Copies, Acc) -> + I = prefer_xregs_call(I0, Copies, St), + prefer_xregs_is(Is, St, #{Dst=>{x,0}}, [I|Acc]); +prefer_xregs_is([#cg_set{op=set_tuple_element}=I|Is], St, Copies, Acc) -> + %% FIXME: HiPE translates the following code segment incorrectly: + %% {call_ext,3,{extfunc,erlang,setelement,3}}. + %% {move,{x,0},{y,3}}. + %% {set_tuple_element,{y,1},{y,3},1}. + %% Therefore, skip the translation of the arguments for set_tuple_element. + prefer_xregs_is(Is, St, Copies, [I|Acc]); +prefer_xregs_is([#cg_set{args=Args0}=I0|Is], St, Copies0, Acc) -> + Args = [do_prefer_xreg(A, Copies0, St) || A <- Args0], + I = I0#cg_set{args=Args}, + Copies = prefer_xregs_prune(I, Copies0, St), + prefer_xregs_is(Is, St, Copies, [I|Acc]); +prefer_xregs_is([], _St, Copies, Acc) -> + {reverse(Acc),Copies}. + +prefer_xregs_terminator(#cg_br{bool=Arg0}=I, Copies, St) -> + Arg = do_prefer_xreg(Arg0, Copies, St), + I#cg_br{bool=Arg}; +prefer_xregs_terminator(#cg_ret{arg=Arg0}=I, Copies, St) -> + Arg = do_prefer_xreg(Arg0, Copies, St), + I#cg_ret{arg=Arg}; +prefer_xregs_terminator(#cg_switch{arg=Arg0}=I, Copies, St) -> + Arg = do_prefer_xreg(Arg0, Copies, St), + I#cg_switch{arg=Arg}. + +prefer_xregs_prune(#cg_set{anno=#{clobbers:=true}}, _, _) -> + #{}; +prefer_xregs_prune(#cg_set{dst=Dst}, Copies, St) -> + DstReg = beam_arg(Dst, St), + F = fun(_, Alias) -> + beam_arg(Alias, St) =/= DstReg + end, + maps:filter(F, Copies). + +%% prefer_xregs_call(Instruction, Copies, St) -> Instruction. +%% Given a 'call' or 'make_fun' instruction, minimize the number +%% of 'move' instructions to set up the argument registers. +%% Prefer using X registers over Y registers, unless that will +%% result in more 'move' instructions. + +prefer_xregs_call(#cg_set{args=[_]}=I, _Copies, _St) -> + I; +prefer_xregs_call(#cg_set{args=[F|Args0]}=I, Copies, St) -> + case Args0 of + [A0] -> + %% Only one argument. Always prefer the X register + %% if available. + A = do_prefer_xreg(A0, Copies, St), + I#cg_set{args=[F,A]}; + [_|_] -> + %% Two or more arguments. Try rewriting arguments in + %% two ways and see which way produces the least + %% number of 'move' instructions. + Args1 = prefer_xregs_call_1(Args0, Copies, 0, St), + Args2 = [do_prefer_xreg(A, Copies, St) || A <- Args0], + case {count_moves(Args1, St),count_moves(Args2, St)} of + {N1,N2} when N1 < N2 -> + %% There will be fewer 'move' instructions if + %% we keep using Y registers. + I#cg_set{args=[F|Args1]}; + {_,_} -> + %% Always use the values in X registers. + I#cg_set{args=[F|Args2]} + end + end. + +count_moves(Args, St) -> + length(setup_args(beam_args(Args, St))). + +prefer_xregs_call_1([#b_var{}=A|As], Copies, X, St) -> + case {beam_arg(A, St),Copies} of + {{y,_},#{A:=Other}} -> + case beam_arg(Other, St) of + {x,X} -> + %% This value is already in the correct X register. + %% It is always benefical to use the X register variable. + [Other|prefer_xregs_call_1(As, Copies, X+1, St)]; + _ -> + %% This value is another X register. Keep using + %% the Y register variable. + [A|prefer_xregs_call_1(As, Copies, X+1, St)] + end; + {_,_} -> + %% The value is not available in an X register. + [A|prefer_xregs_call_1(As, Copies, X+1, St)] + end; +prefer_xregs_call_1([A|As], Copies, X, St) -> + [A|prefer_xregs_call_1(As, Copies, X+1, St)]; +prefer_xregs_call_1([], _, _, _) -> []. + +do_prefer_xreg(#b_var{}=A, Copies, St) -> + case {beam_arg(A, St),Copies} of + {{y,_},#{A:=Copy}} -> + Copy; + {_,_} -> + A + end; +do_prefer_xreg(A, _, _) -> A. + +merge_copies(Copies0, Copies1) when map_size(Copies0) =< map_size(Copies1) -> + maps:filter(fun(K, V) -> + case Copies1 of + #{K:=V} -> true; + #{} -> false + end + end, Copies0); +merge_copies(Copies0, Copies1) -> + merge_copies(Copies1, Copies0). + + +%%% +%%% Add annotations for the number of live registers. +%%% + +liveness(Linear, #cg{regs=Regs}) -> + liveness(reverse(Linear), #{}, Regs, []). + +liveness([{L,#cg_blk{is=Is0,last=Last0}=Blk0}|Bs], LiveMap0, Regs, Acc) -> + Successors = liveness_successors(Last0), + Live0 = ordsets:union([liveness_get(S, LiveMap0) || S <- Successors]), + Live1 = liveness_terminator(Last0, Live0), + {Is,Live} = liveness_is(reverse(Is0), Regs, Live1, []), + LiveMap = LiveMap0#{L=>Live}, + Blk = Blk0#cg_blk{is=Is}, + liveness(Bs, LiveMap, Regs, [{L,Blk}|Acc]); +liveness([], _LiveMap, _Regs, Acc) -> Acc. + +liveness_get(S, LiveMap) -> + case LiveMap of + #{S:=Live} -> Live; + #{} -> [] + end. + +liveness_successors(Terminator) -> + successors(Terminator) -- [?BADARG_BLOCK]. + +liveness_is([#cg_alloc{}=I0|Is], Regs, Live, Acc) -> + I = I0#cg_alloc{live=num_live(Live, Regs)}, + liveness_is(Is, Regs, Live, [I|Acc]); +liveness_is([#cg_set{dst=Dst0,args=Args}=I0|Is], Regs, Live0, Acc) -> + #b_var{name=Dst} = Dst0, + Live1 = liveness_clobber(I0, Live0, Regs), + I1 = liveness_yregs_anno(I0, Live1, Regs), + Live2 = liveness_args(Args, Live1), + Live = ordsets:del_element(Dst, Live2), + I = liveness_anno(I1, Live, Regs), + liveness_is(Is, Regs, Live, [I|Acc]); +liveness_is([], _, Live, Acc) -> + {Acc,Live}. + +liveness_terminator(#cg_br{bool=Arg}, Live) -> + liveness_terminator_1(Arg, Live); +liveness_terminator(#cg_switch{arg=Arg}, Live) -> + liveness_terminator_1(Arg, Live); +liveness_terminator(#cg_ret{arg=Arg}, Live) -> + liveness_terminator_1(Arg, Live). + +liveness_terminator_1(#b_var{name=V}, Live) -> + ordsets:add_element(V, Live); +liveness_terminator_1(#b_literal{}, Live) -> + Live; +liveness_terminator_1(Reg, Live) -> + _ = verify_beam_register(Reg), + ordsets:add_element(Reg, Live). + +liveness_args([#b_var{name=V}|As], Live) -> + liveness_args(As, ordsets:add_element(V, Live)); +liveness_args([#b_remote{mod=Mod,name=Name}|As], Live) -> + liveness_args([Mod,Name|As], Live); +liveness_args([A|As], Live) -> + case is_beam_register(A) of + true -> + liveness_args(As, ordsets:add_element(A, Live)); + false -> + liveness_args(As, Live) + end; +liveness_args([], Live) -> Live. + +liveness_anno(#cg_set{op=Op}=I, Live, Regs) -> + case need_live_anno(Op) of + true -> + NumLive = num_live(Live, Regs), + Anno = (I#cg_set.anno)#{live=>NumLive}, + I#cg_set{anno=Anno}; + false -> + I + end. + +liveness_yregs_anno(#cg_set{op=Op,dst=#b_var{name=Dst}}=I, Live0, Regs) -> + case need_live_anno(Op) of + true -> + Live = ordsets:del_element(Dst, Live0), + LiveYregs = [V || V <- Live, is_yreg(V, Regs)], + Anno = (I#cg_set.anno)#{live_yregs=>LiveYregs}, + I#cg_set{anno=Anno}; + false -> + I + end. + +liveness_clobber(#cg_set{anno=Anno}, Live, Regs) -> + case Anno of + #{clobbers:=true} -> + [R || R <- Live, is_yreg(R, Regs)]; + _ -> + Live + end. + +is_yreg(R, Regs) -> + case Regs of + #{R:={y,_}} -> true; + #{} -> false + end. + +num_live(Live, Regs) -> + Rs = ordsets:from_list([get_register(V, Regs) || V <- Live]), + num_live_1(Rs, 0). + +num_live_1([{x,X}|T], X) -> + num_live_1(T, X+1); +num_live_1([{x,_}|_]=T, X) -> + %% error({hole,{x,X},expected,Next}); + num_live_1(T, X+1); +num_live_1([{y,_}|_], X) -> + X; +num_live_1([{z,_}|_], X) -> + X; +num_live_1([{fr,_}|T], X) -> + num_live_1(T, X); +num_live_1([], X) -> + X. + +get_live(#cg_set{anno=#{live:=Live}}) -> + Live. + +%% need_live_anno(Operation) -> true|false. +%% Return 'true' if the instruction needs a 'live' annotation with +%% the number live X registers, or 'false' otherwise. + +need_live_anno(Op) -> + case Op of + {bif,_} -> true; + bs_get -> true; + bs_init -> true; + bs_start_match -> true; + bs_skip -> true; + call -> true; + put_map -> true; + _ -> false + end. + +%%% +%%% Add annotations for defined Y registers. +%%% + +defined(Linear, #cg{regs=Regs}) -> + def(Linear, #{}, Regs). + +def([{L,#cg_blk{is=Is0,last=Last}=Blk0}|Bs], DefMap0, Regs) -> + Def0 = def_get(L, DefMap0), + {Is,Def} = def_is(Is0, Regs, Def0, []), + Successors = successors(Last), + DefMap = def_successors(Successors, Def, DefMap0), + Blk = Blk0#cg_blk{is=Is}, + [{L,Blk}|def(Bs, DefMap, Regs)]; +def([], _, _) -> []. + +def_get(L, DefMap) -> + case DefMap of + #{L:=Def} -> Def; + #{} -> [] + end. + +def_is([#cg_alloc{anno=Anno0}=I0|Is], Regs, Def, Acc) -> + I = I0#cg_alloc{anno=Anno0#{def_yregs=>Def}}, + def_is(Is, Regs, Def, [I|Acc]); +def_is([#cg_set{op=kill_try_tag,args=[#b_var{name=Tag}]}=I|Is], Regs, Def0, Acc) -> + Def = ordsets:del_element(Tag, Def0), + def_is(Is, Regs, Def, [I|Acc]); +def_is([#cg_set{op=catch_end,args=[#b_var{name=Tag}|_]}=I|Is], Regs, Def0, Acc) -> + Def = ordsets:del_element(Tag, Def0), + def_is(Is, Regs, Def, [I|Acc]); +def_is([#cg_set{anno=Anno0,op=call,dst=#b_var{name=Dst}}=I0|Is], + Regs, Def0, Acc) -> + #{live_yregs:=LiveYregVars} = Anno0, + LiveRegs = gb_sets:from_list([maps:get(V, Regs) || V <- LiveYregVars]), + Kill0 = ordsets:subtract(Def0, LiveYregVars), + + %% Kill0 is the set of variables that have just died. However, the registers + %% used for killed variables may have been reused, so we must check that the + %% registers to be killed are not used by other variables. + Kill = [K || K <- Kill0, not gb_sets:is_element(maps:get(K, Regs), LiveRegs)], + Anno = Anno0#{def_yregs=>Def0,kill_yregs=>Kill}, + I = I0#cg_set{anno=Anno}, + Def1 = ordsets:subtract(Def0, Kill), + Def = def_add_yreg(Dst, Def1, Regs), + def_is(Is, Regs, Def, [I|Acc]); +def_is([#cg_set{anno=Anno0,op={bif,Bif},dst=#b_var{name=Dst},args=Args}=I0|Is], + Regs, Def0, Acc) -> + Arity = length(Args), + I = case is_gc_bif(Bif, Args) orelse not erl_bifs:is_safe(erlang, Bif, Arity) of + true -> + I0#cg_set{anno=Anno0#{def_yregs=>Def0}}; + false -> + I0 + end, + Def = def_add_yreg(Dst, Def0, Regs), + def_is(Is, Regs, Def, [I|Acc]); +def_is([#cg_set{anno=Anno0,dst=#b_var{name=Dst}}=I0|Is], Regs, Def0, Acc) -> + I = case need_y_init(I0) of + true -> + I0#cg_set{anno=Anno0#{def_yregs=>Def0}}; + false -> + I0 + end, + Def = def_add_yreg(Dst, Def0, Regs), + def_is(Is, Regs, Def, [I|Acc]); +def_is([], _, Def, Acc) -> + {reverse(Acc),Def}. + +def_add_yreg(Dst, Def, Regs) -> + case is_yreg(Dst, Regs) of + true -> ordsets:add_element(Dst, Def); + false -> Def + end. + +def_successors([S|Ss], Def0, DefMap) -> + case DefMap of + #{S:=Def1} -> + Def = ordsets:intersection(Def0, Def1), + def_successors(Ss, Def0, DefMap#{S:=Def}); + #{} -> + def_successors(Ss, Def0, DefMap#{S=>Def0}) + end; +def_successors([], _, DefMap) -> DefMap. + +%% need_y_init(#cg_set{}) -> true|false. +%% Return true if this instructions needs initialized Y registers +%% (because the instruction may do a GC or cause an exception +%% so that the stack will be scanned), or false otherwise. + +need_y_init(#cg_set{anno=#{clobbers:=Clobbers}}) -> Clobbers; +need_y_init(#cg_set{op=bs_get}) -> true; +need_y_init(#cg_set{op=bs_init}) -> true; +need_y_init(#cg_set{op=bs_skip,args=[#b_literal{val=Type}|_]}) -> + case Type of + utf8 -> true; + utf16 -> true; + utf32 -> true; + _ -> false + end; +need_y_init(#cg_set{op=bs_start_match}) -> true; +need_y_init(#cg_set{op=put_map}) -> true; +need_y_init(#cg_set{}) -> false. + +%% opt_allocate([{BlockLabel,Block}], #st{}) -> [BeamInstruction]. +%% Update the def_yregs field of each #cg_alloc{} that allocates +%% a stack frame. #cg_alloc.def_yregs will list all Y registers +%% that will be initialized by the subsequent code (thus, the +%% listed Y registers don't require init/1 instructions). + +opt_allocate(Linear, #cg{regs=Regs}) -> + opt_allocate_1(Linear, Regs). + +opt_allocate_1([{L,#cg_blk{is=[#cg_alloc{stack=Stk}=I0|Is]}=Blk0}|Bs]=Bs0, Regs) + when is_integer(Stk) -> + Yregs = opt_alloc_def(Bs0, gb_sets:singleton(L), []), + I = I0#cg_alloc{def_yregs=Yregs}, + [{L,Blk0#cg_blk{is=[I|Is]}}|opt_allocate_1(Bs, Regs)]; +opt_allocate_1([B|Bs], Regs) -> + [B|opt_allocate_1(Bs, Regs)]; +opt_allocate_1([], _) -> []. + +opt_alloc_def([{L,#cg_blk{is=Is,last=Last}}|Bs], Ws0, Def0) -> + case gb_sets:is_member(L, Ws0) of + false -> + opt_alloc_def(Bs, Ws0, Def0); + true -> + case opt_allocate_is(Is) of + none -> + Succ = successors(Last), + Ws = gb_sets:union(Ws0, gb_sets:from_list(Succ)), + opt_alloc_def(Bs, Ws, Def0); + Def1 when is_list(Def1) -> + Def = [Def1|Def0], + opt_alloc_def(Bs, Ws0, Def) + end + end; +opt_alloc_def([], _, Def) -> + ordsets:intersection(Def). + +opt_allocate_is([#cg_set{anno=Anno}|Is]) -> + case Anno of + #{def_yregs:=Yregs} -> + Yregs; + #{} -> + opt_allocate_is(Is) + end; +opt_allocate_is([#cg_alloc{anno=#{def_yregs:=Yregs},stack=none}|_]) -> + Yregs; +opt_allocate_is([#cg_alloc{}|Is]) -> + opt_allocate_is(Is); +opt_allocate_is([]) -> none. + +%%% +%%% Here follows the main code generation functions. +%%% + +%% cg_linear([{BlockLabel,Block}]) -> [BeamInstruction]. +%% Generate BEAM instructions. + +cg_linear([{L,#cg_blk{anno=#{recv_set:=L}=Anno0}=B0}|Bs], St0) -> + Anno = maps:remove(recv_set, Anno0), + B = B0#cg_blk{anno=Anno}, + {Is,St1} = cg_linear([{L,B}|Bs], St0), + {Fail,St} = use_block_label(L, St1), + {[{recv_set,Fail}|Is],St}; +cg_linear([{L,#cg_blk{is=Is0,last=Last}}|Bs], St0) -> + Next = next_block(Bs), + St1 = new_block_label(L, St0), + {Is1,St2} = cg_block(Is0, Last, Next, St1), + {Is2,St} = cg_linear(Bs, St2), + {def_block_label(L, St)++Is1++Is2,St}; +cg_linear([], St) -> {[],St}. + +cg_block([#cg_set{op=recv_next}], #cg_br{succ=Lr0}, _Next, St0) -> + {Lr,St} = use_block_label(Lr0, St0), + {[{loop_rec_end,Lr}],St}; +cg_block([#cg_set{op=wait}], #cg_br{succ=Lr0}, _Next, St0) -> + {Lr,St} = use_block_label(Lr0, St0), + {[{wait,Lr}],St}; +cg_block(Is0, Last, Next, St0) -> + case Last of + #cg_br{succ=Next,fail=Next} -> + cg_block(Is0, none, St0); + #cg_br{succ=Same,fail=Same} -> + {Fail,St1} = use_block_label(Same, St0), + {Is,St} = cg_block(Is0, none, St1), + {Is++[jump(Fail)],St}; + #cg_br{bool=Bool,succ=Next,fail=Fail0} -> + {Fail,St1} = use_block_label(Fail0, St0), + {Is,St} = cg_block(Is0, {Bool,Fail}, St1), + {Is,St}; + #cg_br{bool=Bool,succ=Succ0,fail=Fail0} -> + {[Succ,Fail],St1} = use_block_labels([Succ0,Fail0], St0), + {Is,St} = cg_block(Is0, {Bool,Fail}, St1), + {Is++[jump(Succ)],St}; + #cg_ret{arg=Src0,dealloc=N} -> + Src = beam_arg(Src0, St0), + cg_block(Is0, {return,Src,N}, St0); + #cg_switch{} -> + cg_switch(Is0, Last, St0) + end. + +cg_switch(Is0, Last, St0) -> + #cg_switch{arg=Src0,fail=Fail0,list=List0} = Last, + Src = beam_arg(Src0, St0), + {Fail1,St1} = use_block_label(Fail0, St0), + Fail = ensure_label(Fail1, St1), + {List1,St2} = + flatmapfoldl(fun({V,L}, S0) -> + {Lbl,S} = use_block_label(L, S0), + {[beam_arg(V, S),Lbl],S} + end, St1, List0), + {Is1,St} = cg_block(Is0, none, St2), + case reverse(Is1) of + [{bif,tuple_size,_,[Tuple],{z,_}=Src}|More] -> + List = map(fun({integer,Arity}) -> Arity; + ({f,_}=F) -> F + end, List1), + Is = reverse(More, [{select_tuple_arity,Tuple,Fail,{list,List}}]), + {Is,St}; + _ -> + SelectVal = {select_val,Src,Fail,{list,List1}}, + {Is1 ++ [SelectVal],St} + end. + +jump({f,_}=Fail) -> + {jump,Fail}; +jump({catch_tag,Fail}) -> + {jump,Fail}. + +bif_fail({f,_}=Fail) -> Fail; +bif_fail({catch_tag,_}) -> {f,0}. + +next_block([]) -> none; +next_block([{Next,_}|_]) -> Next. + +ensure_label(Fail0, #cg{ultimate_fail=Lbl}) -> + case bif_fail(Fail0) of + {f,0} -> {f,Lbl}; + {f,_}=Fail -> Fail + end. + +cg_block([#cg_set{anno=#{recv_mark:=L}=Anno0}=I0|T], Context, St0) -> + Anno = maps:remove(recv_mark, Anno0), + I = I0#cg_set{anno=Anno}, + {Is,St1} = cg_block([I|T], Context, St0), + {Fail,St} = use_block_label(L, St1), + {[{recv_mark,Fail}|Is],St}; +cg_block([#cg_set{op=new_try_tag,dst=Tag,args=Args}], {Tag,Fail0}, St) -> + {catch_tag,Fail} = Fail0, + [Reg,{atom,Kind}] = beam_args([Tag|Args], St), + {[{Kind,Reg,Fail}],St}; +cg_block([#cg_set{anno=Anno,op={bif,Name},dst=Dst0,args=Args0}=I, + #cg_set{op=succeeded,dst=Bool}], {Bool,Fail0}, St) -> + [Dst|Args] = beam_args([Dst0|Args0], St), + Line0 = call_line(body, {extfunc,erlang,Name,length(Args)}, Anno), + Fail = bif_fail(Fail0), + Line = case Fail of + {f,0} -> Line0; + {f,_} -> [] + end, + case is_gc_bif(Name, Args) of + true -> + Live = get_live(I), + Kill = kill_yregs(Anno, St), + {Kill++Line++[{gc_bif,Name,Fail,Live,Args,Dst}],St}; + false -> + {Line++[{bif,Name,Fail,Args,Dst}],St} + end; +cg_block([#cg_set{op={bif,tuple_size},dst=Arity0,args=[Tuple0]}, + #cg_set{op={bif,'=:='},dst=Bool,args=[Arity0,#b_literal{val=Ar}]}=Eq], + {Bool,Fail}=Context, St0) -> + Tuple = beam_arg(Tuple0, St0), + case beam_arg(Arity0, St0) of + {z,_} -> + %% The size will only be used once. Combine to a test_arity instruction. + Test = {test,test_arity,ensure_label(Fail, St0),[Tuple,Ar]}, + {[Test],St0}; + Arity -> + %% The size will be used more than once. Must do an explicit + %% BIF call followed by the '==' test. + TupleSize = {bif,tuple_size,{f,0},[Tuple],Arity}, + {Is,St} = cg_block([Eq], Context, St0), + {[TupleSize|Is],St} + end; +cg_block([#cg_set{op={bif,Name},dst=Dst0,args=Args0}]=Is0, {Dst0,Fail}, St0) -> + [Dst|Args] = beam_args([Dst0|Args0], St0), + case Dst of + {z,_} -> + %% The result of the BIF call will only be used once. Convert to + %% a test instruction. + Test = bif_to_test(Name, Args, ensure_label(Fail, St0)), + {Test,St0}; + _ -> + %% Must explicitly call the BIF since the result will be used + %% more than once. + {Is1,St1} = cg_block(Is0, none, St0), + {Is2,St} = cg_block([], {Dst0,Fail}, St1), + {Is1++Is2,St} + end; +cg_block([#cg_set{anno=Anno,op={bif,Name},dst=Dst0,args=Args0}=I|T], + Context, St0) -> + [Dst|Args] = beam_args([Dst0|Args0], St0), + {Is0,St} = cg_block(T, Context, St0), + case is_gc_bif(Name, Args) of + true -> + Line = call_line(body, {extfunc,erlang,Name,length(Args)}, Anno), + Live = get_live(I), + Kill = kill_yregs(Anno, St), + Is = Kill++Line++[{gc_bif,Name,{f,0},Live,Args,Dst}|Is0], + {Is,St}; + false -> + Is = [{bif,Name,{f,0},Args,Dst}|Is0], + {Is,St} + end; +cg_block([#cg_set{op=bs_init,dst=Dst0,args=Args0,anno=Anno}=I, + #cg_set{op=succeeded,dst=Bool}], {Bool,Fail0}, St) -> + Fail = bif_fail(Fail0), + Line = line(Anno), + [#b_literal{val=Kind}|Args1] = Args0, + case Kind of + new -> + [Dst,Size,{integer,Unit}] = beam_args([Dst0|Args1], St), + Live = get_live(I), + {[Line|cg_bs_init(Dst, Size, Unit, Live, Fail)],St}; + private_append -> + [Dst,Src,Bits,{integer,Unit}] = beam_args([Dst0|Args1], St), + Flags = {field_flags,[]}, + Is = [Line,{bs_private_append,Fail,Bits,Unit,Src,Flags,Dst}], + {Is,St}; + append -> + [Dst,Src,Bits,{integer,Unit}] = beam_args([Dst0|Args1], St), + Flags = {field_flags,[]}, + Live = get_live(I), + Is = [Line,{bs_append,Fail,Bits,0,Live,Unit,Src,Flags,Dst}], + {Is,St} + end; +cg_block([#cg_set{anno=Anno,op=bs_start_match,dst=Ctx0,args=[Bin0]}=I, + #cg_set{op=succeeded,dst=Bool}], {Bool,Fail}, St) -> + #{num_slots:=Slots} = Anno, + [Dst,Bin1] = beam_args([Ctx0,Bin0], St), + {Bin,Pre} = force_reg(Bin1, Dst), + Live = get_live(I), + Is = Pre ++ [{test,bs_start_match2,Fail,Live,[Bin,Slots],Dst}], + {Is,St}; +cg_block([#cg_set{op=bs_get}=Set, + #cg_set{op=succeeded,dst=Bool}], {Bool,Fail}, St) -> + {cg_bs_get(Fail, Set, St),St}; +cg_block([#cg_set{op=bs_match_string,args=[CtxVar,#b_literal{val=String}]}, + #cg_set{op=succeeded,dst=Bool}], {Bool,Fail}, St) -> + CtxReg = beam_arg(CtxVar, St), + Is = [{test,bs_match_string,Fail,[CtxReg,String]}], + {Is,St}; +cg_block([#cg_set{dst=Dst0,op=landingpad,args=Args0}|T], Context, St0) -> + [Dst,{atom,Kind},Tag] = beam_args([Dst0|Args0], St0), + case Kind of + 'catch' -> + cg_catch(Dst, T, Context, St0); + 'try' -> + cg_try(Dst, Tag, T, Context, St0) + end; +cg_block([#cg_set{op=kill_try_tag,args=Args0}|Is], Context, St0) -> + [Reg] = beam_args(Args0, St0), + {Is0,St} = cg_block(Is, Context, St0), + {[{try_end,Reg}|Is0],St}; +cg_block([#cg_set{op=catch_end,dst=Dst0,args=Args0}|Is], Context, St0) -> + [Dst,Reg,{x,0}] = beam_args([Dst0|Args0], St0), + {Is0,St} = cg_block(Is, Context, St0), + {[{catch_end,Reg}|copy({x,0}, Dst)++Is0],St}; +cg_block([#cg_set{op=call}=I, + #cg_set{op=succeeded,dst=Bool}], {Bool,_Fail}, St) -> + %% A call in try/catch block. + cg_block([I], none, St); +cg_block([#cg_set{op=Op,dst=Dst0,args=Args0}=I, + #cg_set{op=succeeded,dst=Bool}], {Bool,Fail}, St) -> + [Dst|Args] = beam_args([Dst0|Args0], St), + {cg_test(Op, bif_fail(Fail), Args, Dst, I),St}; +cg_block([#cg_set{op=bs_put,dst=Bool,args=Args0}], {Bool,Fail}, St) -> + Args = beam_args(Args0, St), + {cg_bs_put(bif_fail(Fail), Args),St}; +cg_block([#cg_set{op=bs_test_tail,dst=Bool,args=Args0}], {Bool,Fail}, St) -> + [Ctx,{integer,Bits}] = beam_args(Args0, St), + {[{test,bs_test_tail2,bif_fail(Fail),[Ctx,Bits]}],St}; +cg_block([#cg_set{op={float,checkerror},dst=Bool}], {Bool,Fail}, St) -> + {[{fcheckerror,bif_fail(Fail)}],St}; +cg_block([#cg_set{op=is_tagged_tuple,dst=Bool,args=Args0}], {Bool,Fail}, St) -> + [Src,{integer,Arity},Tag] = beam_args(Args0, St), + {[{test,is_tagged_tuple,ensure_label(Fail, St),[Src,Arity,Tag]}],St}; +cg_block([#cg_set{op=is_nonempty_list,dst=Bool,args=Args0}], {Bool,Fail}, St) -> + Args = beam_args(Args0, St), + {[{test,is_nonempty_list,ensure_label(Fail, St),Args}],St}; +cg_block([#cg_set{op=has_map_field,dst=Bool,args=Args0}], {Bool,Fail}, St) -> + [Src,Key] = beam_args(Args0, St), + {[{test,has_map_fields,Fail,Src,{list,[Key]}}],St}; +cg_block([#cg_set{op=call}=Call], {_Bool,_Fail}=Context, St0) -> + {Is0,St1} = cg_call(Call, body, none, St0), + {Is1,St} = cg_block([], Context, St1), + {Is0++Is1,St}; +cg_block([#cg_set{op=call,dst=Dst0}=Call], Context, St) -> + Dst = beam_arg(Dst0, St), + case Context of + {return,Dst,_} -> + cg_call(Call, tail, Context, St); + _ -> + cg_call(Call, body, Context, St) + end; +cg_block([#cg_set{op=call}=Call|T], Context, St0) -> + {Is0,St1} = cg_call(Call, body, none, St0), + {Is1,St} = cg_block(T, Context, St1), + {Is0++Is1,St}; +cg_block([#cg_set{op=make_fun,dst=Dst0,args=[Local|Args0]}|T], + Context, St0) -> + #b_local{name=#b_literal{val=Func},arity=Arity} = Local, + [Dst|Args] = beam_args([Dst0|Args0], St0), + {FuncLbl,St1} = local_func_label(Func, Arity, St0), + Is0 = setup_args(Args) ++ + [{make_fun2,{f,FuncLbl},0,0,length(Args)}|copy({x,0}, Dst)], + {Is1,St} = cg_block(T, Context, St1), + {Is0++Is1,St}; +cg_block([#cg_set{op=copy}|_]=T0, Context, St0) -> + {Is0,T} = cg_copy(T0, St0), + {Is1,St} = cg_block(T, Context, St0), + Is = Is0 ++ Is1, + case is_call(T) of + {yes,Arity} -> + {opt_call_moves(Is, Arity),St}; + no -> + {Is,St} + end; +cg_block([#cg_set{op=Op,dst=Dst0,args=Args0}=Set], none, St) -> + [Dst|Args] = beam_args([Dst0|Args0], St), + Is = cg_instr(Op, Args, Dst, Set), + {Is,St}; +cg_block([#cg_set{op=Op,dst=Dst0,args=Args0}=Set|T], Context, St0) -> + [Dst|Args] = beam_args([Dst0|Args0], St0), + Is0 = cg_instr(Op, Args, Dst, Set), + {Is1,St} = cg_block(T, Context, St0), + {Is0++Is1,St}; +cg_block([#cg_alloc{}=Alloc|T], Context, St0) -> + Is0 = cg_alloc(Alloc, St0), + {Is1,St} = cg_block(T, Context, St0), + {Is0++Is1,St}; +cg_block([], {return,Arg,none}, St) -> + Is = copy(Arg, {x,0}) ++ [return], + {Is,St}; +cg_block([], {return,Arg,N}, St) -> + Is = copy(Arg, {x,0}) ++ [{deallocate,N},return], + {Is,St}; +cg_block([], none, St) -> + {[],St}; +cg_block([], {Bool0,Fail}, St) -> + [Bool] = beam_args([Bool0], St), + {[{test,is_eq_exact,Fail,[Bool,{atom,true}]}],St}. + +cg_copy(T0, St) -> + {Copies,T} = splitwith(fun(#cg_set{op=copy}) -> true; + (_) -> false + end, T0), + Moves0 = cg_copy_1(Copies, St), + Moves1 = [Move || {move,Src,Dst}=Move <- Moves0, Src =/= Dst], + Scratch = {x,1022}, + Moves = order_moves(Moves1, Scratch), + {Moves,T}. + +cg_copy_1([#cg_set{dst=Dst0,args=Args}|T], St) -> + [Dst,Src] = beam_args([Dst0|Args], St), + Copies = cg_copy_1(T, St), + case keymember(Dst, 3, Copies) of + true -> + %% Will be overwritten. Don't generate a move instruction. + Copies; + false -> + [{move,Src,Dst}|Copies] + end; +cg_copy_1([], _St) -> []. + +bif_to_test('not', [Var], Fail) -> + [{test,is_eq_exact,Fail,[Var,{atom,false}]}]; +bif_to_test(Name, Args, Fail) -> + [beam_utils:bif_to_test(Name, Args, Fail)]. + +opt_call_moves(Is0, Arity) -> + {Moves0,Is} = splitwith(fun({move,_,_}) -> true; + (_) -> false + end, Is0), + Moves = opt_call_moves_1(Moves0, Arity), + Moves ++ Is. + +opt_call_moves_1([{move,Src,{x,_}=Tmp}=M1,{move,Tmp,Dst}=M2|Is], Arity) -> + case is_killed(Tmp, Is, Arity) of + true -> + %% The X register Tmp is never used again. We can collapse + %% the two move instruction into one. + [{move,Src,Dst}|opt_call_moves_1(Is, Arity)]; + false -> + [M1|opt_call_moves_1([M2|Is], Arity)] + end; +opt_call_moves_1([M|Ms], Arity) -> + [M|opt_call_moves_1(Ms, Arity)]; +opt_call_moves_1([], _Arity) -> []. + +is_killed(R, [{move,R,_}|_], _) -> + false; +is_killed(R, [{move,_,R}|_], _) -> + true; +is_killed(R, [{move,_,_}|Is], Arity) -> + is_killed(R, Is, Arity); +is_killed({x,X}, [], Arity) -> + X >= Arity. + +cg_alloc(#cg_alloc{stack=none,words=#need{h=0,f=0}}, _St) -> + []; +cg_alloc(#cg_alloc{stack=none,words=Need,live=Live}, _St) -> + [{test_heap,alloc(Need),Live}]; +cg_alloc(#cg_alloc{stack=Stk,words=Need,live=Live,def_yregs=DefYregs}, + #cg{regs=Regs}) when is_integer(Stk) -> + Alloc = alloc(Need), + All = [{y,Y} || Y <- lists:seq(0, Stk-1)], + Def = ordsets:from_list([maps:get(V, Regs) || V <- DefYregs]), + NeedInit = ordsets:subtract(All, Def), + NoZero = length(Def)*2 > Stk, + I = case {NoZero,Alloc} of + {true,0} -> {allocate,Stk,Live}; + {true,_} -> {allocate_heap,Stk,Alloc,Live}; + {false,0} -> {allocate_zero,Stk,Live}; + {false,_} -> {allocate_heap_zero,Stk,Alloc,Live} + end, + [I|case NoZero of + true -> [{init,Y} || Y <- NeedInit]; + false -> [] + end]. + +alloc(#need{h=Words,f=0}) -> + Words; +alloc(#need{h=Words,f=Floats}) -> + {alloc,[{words,Words},{floats,Floats}]}. + +is_call([#cg_set{op=call,args=[#b_var{}|Args]}|_]) -> + {yes,1+length(Args)}; +is_call([#cg_set{op=call,args=[_|Args]}|_]) -> + {yes,length(Args)}; +is_call([#cg_set{op=make_fun,args=[_|Args]}|_]) -> + {yes,length(Args)}; +is_call(_) -> + no. + +cg_call(#cg_set{anno=Anno,op=call,dst=Dst0,args=[#b_local{}=Func0|Args0]}, + Where, Context, St0) -> + [Dst|Args] = beam_args([Dst0|Args0], St0), + #b_local{name=Name0,arity=Arity} = Func0, + {atom,Name} = beam_arg(Name0, St0), + {FuncLbl,St} = local_func_label(Name, Arity, St0), + Line = call_line(Where, local, Anno), + Call = build_call(call, Arity, {f,FuncLbl}, Context, Dst), + Is = setup_args(Args, Anno, Context, St) ++ Line ++ Call, + {Is,St}; +cg_call(#cg_set{anno=Anno,op=call,dst=Dst0,args=[#b_remote{}=Func0|Args0]}, + Where, Context, St) -> + [Dst|Args] = beam_args([Dst0|Args0], St), + #b_remote{mod=Mod0,name=Name0,arity=Arity} = Func0, + case {beam_arg(Mod0, St),beam_arg(Name0, St)} of + {{atom,Mod},{atom,Name}} -> + Func = {extfunc,Mod,Name,Arity}, + Line = call_line(Where, Func, Anno), + Call = build_call(call_ext, Arity, Func, Context, Dst), + Is = setup_args(Args, Anno, Context, St) ++ Line ++ Call, + {Is,St}; + {Mod,Name} -> + Apply = build_apply(Arity, Context, Dst), + Is = setup_args(Args++[Mod,Name], Anno, Context, St) ++ + [line(Anno)] ++ Apply, + {Is,St} + end; +cg_call(#cg_set{anno=Anno,op=call,dst=Dst0,args=Args0}, + Where, Context, St) -> + [Dst,Func|Args] = beam_args([Dst0|Args0], St), + Line = call_line(Where, Func, Anno), + Arity = length(Args), + Call = build_call(call_fun, Arity, Func, Context, Dst), + Is = setup_args(Args++[Func], Anno, Context, St) ++ Line ++ Call, + {Is,St}. + +build_call(call_fun, Arity, _Func, none, Dst) -> + [{call_fun,Arity}|copy({x,0}, Dst)]; +build_call(call_fun, Arity, _Func, {return,Dst,N}, Dst) when is_integer(N) -> + [{call_fun,Arity},{deallocate,N},return]; +build_call(call_fun, Arity, _Func, {return,Val,N}, _Dst) when is_integer(N) -> + [{call_fun,Arity},{move,Val,{x,0}},{deallocate,N},return]; +build_call(call_ext, 2, {extfunc,erlang,'!',2}, none, Dst) -> + [send|copy({x,0}, Dst)]; +build_call(call_ext, 2, {extfunc,erlang,'!',2}, {return,Dst,N}, Dst) + when is_integer(N) -> + [send,{deallocate,N},return]; +build_call(Prefix, Arity, Func, {return,Dst,none}, Dst) -> + I = case Prefix of + call -> call_only; + call_ext -> call_ext_only + end, + [{I,Arity,Func}]; +build_call(call_ext, Arity, {extfunc,Mod,Name,Arity}=Func, {return,_,none}, _Dst) -> + true = erl_bifs:is_exit_bif(Mod, Name, Arity), %Assertion. + [{call_ext_only,Arity,Func}]; +build_call(Prefix, Arity, Func, {return,Dst,N}, Dst) when is_integer(N) -> + I = case Prefix of + call -> call_last; + call_ext -> call_ext_last + end, + [{I,Arity,Func,N}]; +build_call(I, Arity, Func, {return,Val,N}, _Dst) when is_integer(N) -> + [{I,Arity,Func}|copy(Val, {x,0})++[{deallocate,N},return]]; +build_call(I, Arity, Func, none, Dst) -> + [{I,Arity,Func}|copy({x,0}, Dst)]. + +build_apply(Arity, {return,Dst,N}, Dst) when is_integer(N) -> + [{apply_last,Arity,N}]; +build_apply(Arity, {return,Val,N}, _Dst) when is_integer(N) -> + [{apply,Arity}|copy(Val, {x,0})++[{deallocate,N},return]]; +build_apply(Arity, none, Dst) -> + [{apply,Arity}|copy({x,0}, Dst)]. + +cg_instr(put_map, [{atom,assoc},SrcMap|Ss], Dst, Set) -> + Live = get_live(Set), + [{put_map_assoc,{f,0},SrcMap,Dst,Live,{list,Ss}}]; +cg_instr(Op, Args, Dst, _Set) -> + cg_instr(Op, Args, Dst). + +cg_instr(bs_init_writable, Args, Dst) -> + setup_args(Args) ++ [bs_init_writable|copy({x,0}, Dst)]; +cg_instr(bs_restore, [Ctx,Slot], _Dst) -> + case Slot of + {integer,N} -> + [{bs_restore2,Ctx,N}]; + {atom,start} -> + [{bs_restore2,Ctx,Slot}] + end; +cg_instr(bs_save, [Ctx,Slot], _Dst) -> + {integer,N} = Slot, + [{bs_save2,Ctx,N}]; +cg_instr(build_stacktrace, Args, Dst) -> + setup_args(Args) ++ [build_stacktrace|copy({x,0}, Dst)]; +cg_instr(context_to_binary, [Src], _Dst) -> + [{bs_context_to_binary,Src}]; +cg_instr(set_tuple_element=Op, [New,Tuple,{integer,Index}], _Dst) -> + [{Op,New,Tuple,Index}]; +cg_instr({float,clearerror}, [], _Dst) -> + [fclearerror]; +cg_instr({float,get}, [Src], Dst) -> + [{fmove,Src,Dst}]; +cg_instr({float,put}, [Src], Dst) -> + [{fmove,Src,Dst}]; +cg_instr(get_hd=Op, [Src], Dst) -> + [{Op,Src,Dst}]; +cg_instr(get_tl=Op, [Src], Dst) -> + [{Op,Src,Dst}]; +cg_instr(get_tuple_element=Op, [Src,{integer,N}], Dst) -> + [{Op,Src,N,Dst}]; +cg_instr(put_list=Op, [Hd,Tl], Dst) -> + [{Op,Hd,Tl,Dst}]; +cg_instr(put_tuple_arity, [{integer,Arity}], Dst) -> + [{put_tuple,Arity,Dst}]; +cg_instr(put_tuple_elements, Elements, _Dst) -> + [{put,E} || E <- Elements]; +cg_instr(raw_raise, Args, Dst) -> + setup_args(Args) ++ [raw_raise|copy({x,0}, Dst)]; +cg_instr(remove_message, [], _Dst) -> + [remove_message]; +cg_instr(resume, [A,B], _Dst) -> + [{bif,raise,{f,0},[A,B],{x,0}}]; +cg_instr(timeout, [], _Dst) -> + [timeout]. + +cg_test(bs_add=Op, Fail, [Src1,Src2,{integer,Unit}], Dst, _I) -> + [{Op,Fail,[Src1,Src2,Unit],Dst}]; +cg_test(bs_skip, Fail, Args, _Dst, I) -> + cg_bs_skip(Fail, Args, I); +cg_test(bs_utf8_size=Op, Fail, [Src], Dst, _I) -> + [{Op,Fail,Src,Dst}]; +cg_test(bs_utf16_size=Op, Fail, [Src], Dst, _I) -> + [{Op,Fail,Src,Dst}]; +cg_test({float,convert}, Fail, [Src], Dst, _I) -> + {f,0} = Fail, %Assertion. + [{fconv,Src,Dst}]; +cg_test({float,Op0}, Fail, Args, Dst, #cg_set{anno=Anno}) -> + Op = case Op0 of + '+' -> fadd; + '-' when length(Args) =:= 2 -> fsub; + '-' -> fnegate; + '*' -> fmul; + '/' -> fdiv + end, + [line(Anno),{bif,Op,Fail,Args,Dst}]; +cg_test(get_map_element, Fail, [Map,Key], Dst, _I) -> + [{get_map_elements,Fail,Map,{list,[Key,Dst]}}]; +cg_test(peek_message, Fail, [], Dst, _I) -> + [{loop_rec,Fail,{x,0}}|copy({x,0}, Dst)]; +cg_test(put_map, Fail, [{atom,exact},SrcMap|Ss], Dst, Set) -> + Live = get_live(Set), + [{put_map_exact,Fail,SrcMap,Dst,Live,{list,Ss}}]; +cg_test(wait_timeout, Fail, [Timeout], _Dst, _) -> + case Timeout of + {atom,infinity} -> + [{wait,Fail}]; + _ -> + [{wait_timeout,Fail,Timeout}] + end. + +cg_bs_get(Fail, #cg_set{dst=Dst0,args=[#b_literal{val=Type}|Ss0]}=Set, St) -> + Op = case Type of + integer -> bs_get_integer2; + float -> bs_get_float2; + binary -> bs_get_binary2; + utf8 -> bs_get_utf8; + utf16 -> bs_get_utf16; + utf32 -> bs_get_utf32 + end, + [Dst|Ss1] = beam_args([Dst0|Ss0], St), + Ss = case Ss1 of + [Ctx,{literal,Flags},Size,{integer,Unit}] -> + %% Plain integer/float/binary. + [Ctx,Size,Unit,field_flags(Flags, Set)]; + [Ctx,{literal,Flags}] -> + %% Utf8/16/32. + [Ctx,field_flags(Flags, Set)] + end, + Live = get_live(Set), + [{test,Op,Fail,Live,Ss,Dst}]. + +cg_bs_skip(Fail, [{atom,Type}|Ss0], Set) -> + Op = case Type of + utf8 -> bs_skip_utf8; + utf16 -> bs_skip_utf16; + utf32 -> bs_skip_utf32; + _ -> bs_skip_bits2 + end, + Live = get_live(Set), + Ss = case Ss0 of + [Ctx,{literal,Flags},Size,{integer,Unit}] -> + %% Plain integer/float/binary. + [Ctx,Size,Unit,field_flags(Flags, Set)]; + [Ctx,{literal,Flags}] -> + %% Utf8/16/32. + [Ctx,Live,field_flags(Flags, Set)] + end, + case {Type,Ss} of + {binary,[_,{atom,all},1,_]} -> + []; + {binary,[R,{atom,all},U,_]} -> + [{test,bs_test_unit,Fail,[R,U]}]; + {_,_} -> + [{test,Op,Fail,Ss}] + end. + +field_flags(Flags, #cg_set{anno=#{location:={File,Line}}}) -> + {field_flags,[{anno,[Line,{file,File}]}|Flags]}; +field_flags(Flags, _) -> + {field_flags,Flags}. + +cg_bs_put(Fail, [{atom,Type},{literal,Flags}|Args]) -> + Op = case Type of + integer -> bs_put_integer; + float -> bs_put_float; + binary -> bs_put_binary; + utf8 -> bs_put_utf8; + utf16 -> bs_put_utf16; + utf32 -> bs_put_utf32 + end, + case Args of + [Src,Size,{integer,Unit}] -> + [{Op,Fail,Size,Unit,{field_flags,Flags},Src}]; + [Src] -> + [{Op,Fail,{field_flags,Flags},Src}] + end. + +cg_bs_init(Dst, Size0, Unit, Live, Fail) -> + Op = case Unit of + 1 -> bs_init_bits; + 8 -> bs_init2 + end, + Size = cg_bs_init_size(Size0), + [{Op,Fail,Size,0,Live,{field_flags,[]},Dst}]. + +cg_bs_init_size({x,_}=R) -> R; +cg_bs_init_size({y,_}=R) -> R; +cg_bs_init_size({integer,Int}) -> Int. + +cg_catch(Agg, T0, Context, St0) -> + {Moves,T1} = cg_extract(T0, Agg, St0), + {T,St} = cg_block(T1, Context, St0), + {Moves++T,St}. + +cg_try(Agg, Tag, T0, Context, St0) -> + {Moves0,T1} = cg_extract(T0, Agg, St0), + Moves = order_moves(Moves0, {x,3}), + [#cg_set{op=kill_try_tag}|T2] = T1, + {T,St} = cg_block(T2, Context, St0), + {[{try_case,Tag}|Moves++T],St}. + +cg_extract([#cg_set{op=extract,dst=Dst0,args=Args0}|Is0], Agg, St) -> + [Dst,Agg,{integer,X}] = beam_args([Dst0|Args0], St), + {Ds,Is} = cg_extract(Is0, Agg, St), + case keymember(Dst, 3, Ds) of + true -> + %% This destination will be overwritten. + {Ds,Is}; + false -> + {copy({x,X}, Dst)++Ds,Is} + end; +cg_extract(Is, _, _) -> + {[],Is}. + +copy(Src, Src) -> []; +copy(Src, Dst) -> [{move,Src,Dst}]. + +force_reg({literal,_}=Lit, Reg) -> + {Reg,[{move,Lit,Reg}]}; +force_reg({Kind,_}=R, _) when Kind =:= x; Kind =:= y -> + {R,[]}. + +%% successors(Terminator) -> [Successor]. +%% Return an ordset of all successors for the given terminator. + +successors(#cg_br{succ=Succ,fail=Fail}) -> + ordsets:from_list([Succ,Fail]); +successors(#cg_switch{fail=Fail,list=List}) -> + ordsets:from_list([Fail|[Lbl || {_,Lbl} <- List]]); +successors(#cg_ret{}) -> []. + +%% linearize(Blocks) -> [{BlockLabel,#cg_blk{}}]. +%% Linearize the intermediate representation of the code. Also +%% translate blocks from the SSA records to internal record types +%% used only in this module. + +linearize(Blocks) -> + Linear = beam_ssa:linearize(Blocks), + linearize_1(Linear, Blocks). + +linearize_1([{?BADARG_BLOCK,_}|Ls], Blocks) -> + linearize_1(Ls, Blocks); +linearize_1([{L,Block0}|Ls], Blocks) -> + Block = translate_block(L, Block0, Blocks), + [{L,Block}|linearize_1(Ls, Blocks)]; +linearize_1([], _Blocks) -> []. + +%% translate_block(BlockLabel, #b_blk{}, Blocks) -> #cg_blk{}. +%% Translate a block to the internal records used in this module. +%% Also eliminate phi nodes, replacing them with 'copy' instructions +%% in the predecessor blocks. + +translate_block(L, #b_blk{anno=Anno,is=Is0,last=Last0}, Blocks) -> + Last = translate_terminator(Last0), + PhiCopies = translate_phis(L, Last, Blocks), + Is1 = translate_is(Is0, PhiCopies), + Is = case Anno of + #{frame_size:=Size} -> + Alloc = #cg_alloc{stack=Size}, + [Alloc|Is1]; + #{} -> Is1 + end, + #cg_blk{anno=Anno,is=Is,last=Last}. + +translate_is([#b_set{op=phi}|Is], Tail) -> + translate_is(Is, Tail); +translate_is([#b_set{anno=Anno0,op=Op,dst=Dst,args=Args}=I|Is], Tail) -> + Anno = case beam_ssa:clobbers_xregs(I) of + true -> Anno0#{clobbers=>true}; + false -> Anno0 + end, + [#cg_set{anno=Anno,op=Op,dst=Dst,args=Args}|translate_is(Is, Tail)]; +translate_is([], Tail) -> Tail. + +translate_terminator(#b_ret{anno=Anno,arg=Arg}) -> + Dealloc = case Anno of + #{deallocate:=N} -> N; + #{} -> none + end, + #cg_ret{arg=Arg,dealloc=Dealloc}; +translate_terminator(#b_br{bool=#b_literal{val=true},succ=Succ}) -> + #cg_br{bool=#b_literal{val=true},succ=Succ,fail=Succ}; +translate_terminator(#b_br{bool=#b_literal{val=false},fail=Fail}) -> + #cg_br{bool=#b_literal{val=true},succ=Fail,fail=Fail}; +translate_terminator(#b_br{bool=Bool,succ=Succ,fail=Fail}) -> + #cg_br{bool=Bool,succ=Succ,fail=Fail}; +translate_terminator(#b_switch{arg=Bool,fail=Fail,list=List}) -> + #cg_switch{arg=Bool,fail=Fail,list=List}. + +translate_phis(L, #cg_br{succ=Target,fail=Target}, Blocks) -> + #b_blk{is=Is} = maps:get(Target, Blocks), + Phis = takewhile(fun(#b_set{op=phi}) -> true; + (#b_set{}) -> false + end, Is), + phi_copies(Phis, L); +translate_phis(_, _, _) -> []. + +phi_copies([#b_set{dst=Dst,args=PhiArgs}|Sets], L) -> + CopyArgs = [V || {V,Target} <- PhiArgs, Target =:= L], + [#cg_set{op=copy,dst=Dst,args=CopyArgs}|phi_copies(Sets, L)]; +phi_copies([], _) -> []. + +%% setup_args(Args, Anno, Context) -> [Instruction]. +%% setup_args(Args) -> [Instruction]. +%% Set up X registers for a call. + +setup_args(Args, Anno, none, St) -> + setup_args(Args) ++ kill_yregs(Anno, St); +setup_args(Args, _, _, _) -> + setup_args(Args). + +setup_args([]) -> + []; +setup_args([_|_]=Args) -> + Moves = gen_moves(Args, 0, []), + Scratch = {x,1+last(sort([length(Args)-1|[X || {x,X} <- Args]]))}, + order_moves(Moves, Scratch). + +%% kill_yregs(Anno, #cg{}) -> [{kill,{y,Y}}]. +%% Kill Y registers that will not be used again. + +kill_yregs(#{kill_yregs:=Kill}, #cg{regs=Regs}) -> + ordsets:from_list([{kill,maps:get(V, Regs)} || V <- Kill]); +kill_yregs(#{}, #cg{}) -> []. + +%% gen_moves(As, I, Acc) +%% Generate the basic move instruction to move the arguments +%% to their proper registers. The list will be sorted on +%% destinations. (I.e. the move to {x,0} will be first -- +%% see the comment to order_moves/2.) + +gen_moves([A|As], I, Acc) -> + gen_moves(As, I+1, copy(A, {x,I}) ++ Acc); +gen_moves([], _, Acc) -> + keysort(3, Acc). + +%% order_moves([Move], ScratchReg) -> [Move] +%% Orders move instruction so that source registers are not +%% destroyed before they are used. If there are cycles +%% (such as {move,{x,0},{x,1}}, {move,{x,1},{x,1}}), +%% the scratch register is used to break up the cycle. +%% If possible, the first move of the input list is placed +%% last in the result list (to make the move to {x,0} occur +%% just before the call to allow the Beam loader to coalesce +%% the instructions). + +order_moves(Ms, Scr) -> order_moves(Ms, Scr, []). + +order_moves([{move,_,_}=M|Ms0], ScrReg, Acc0) -> + {Chain,Ms} = collect_chain(Ms0, [M], ScrReg), + Acc = reverse(Chain, Acc0), + order_moves(Ms, ScrReg, Acc); +order_moves([], _, Acc) -> Acc. + +collect_chain(Ms, Path, ScrReg) -> + collect_chain(Ms, Path, [], ScrReg). + +collect_chain([{move,Src,Same}=M|Ms0], [{move,Same,_}|_]=Path, Others, ScrReg) -> + case keymember(Src, 3, Path) of + false -> + collect_chain(reverse(Others, Ms0), [M|Path], [], ScrReg); + true -> + %% There is a cycle, which we must break up. + {break_up_cycle(M, Path, ScrReg),reverse(Others, Ms0)} + end; +collect_chain([M|Ms], Path, Others, ScrReg) -> + collect_chain(Ms, Path, [M|Others], ScrReg); +collect_chain([], Path, Others, _) -> + {Path,Others}. + +break_up_cycle({move,Src,_}=M, Path, ScrReg) -> + [{move,ScrReg,Src},M|break_up_cycle1(Src, Path, ScrReg)]. + +break_up_cycle1(Dst, [{move,Src,Dst}|Path], ScrReg) -> + [{move,Src,ScrReg}|Path]; +break_up_cycle1(Dst, [M|Path], LastMove) -> + [M|break_up_cycle1(Dst, Path, LastMove)]. + +%%% +%%% General utility functions. +%%% + +verify_beam_register({x,_}=Reg) -> Reg. + +is_beam_register({x,_}) -> true; +is_beam_register(_) -> false. + +get_register(V, Regs) -> + case is_beam_register(V) of + true -> V; + false -> maps:get(V, Regs) + end. + +beam_args(As, St) -> + [beam_arg(A, St) || A <- As]. + +beam_arg(#b_var{name=Name}, #cg{regs=Regs}) -> + maps:get(Name, Regs); +beam_arg(#b_literal{val=Val}, _) -> + if + is_atom(Val) -> {atom,Val}; + is_float(Val) -> {float,Val}; + is_integer(Val) -> {integer,Val}; + Val =:= [] -> nil; + true -> {literal,Val} + end; +beam_arg(Reg, _) -> + verify_beam_register(Reg). + +new_block_label(L, St0) -> + {_Lbl,St} = label_for_block(L, St0), + St. + +def_block_label(L, #cg{labels=Labels,used_labels=Used}) -> + Lbl = maps:get(L, Labels), + case gb_sets:is_member(Lbl, Used) of + false -> []; + true -> [{label,Lbl}] + end. + +use_block_labels(Ls, St) -> + mapfoldl(fun use_block_label/2, St, Ls). + +use_block_label(L, #cg{used_labels=Used,catches=Catches}=St0) -> + {Lbl,St} = label_for_block(L, St0), + case gb_sets:is_member(L, Catches) of + true -> + {{catch_tag,{f,Lbl}}, + St#cg{used_labels=gb_sets:add(Lbl, Used)}}; + false -> + {{f,Lbl},St#cg{used_labels=gb_sets:add(Lbl, Used)}} + end. + +label_for_block(L, #cg{labels=Labels0}=St0) -> + case Labels0 of + #{L:=Lbl} -> + {Lbl,St0}; + #{} -> + {Lbl,St} = new_label(St0), + Labels = Labels0#{L=>Lbl}, + {Lbl,St#cg{labels=Labels}} + end. + +%% local_func_label(Name, Arity, State) -> {Label,State'} +%% local_func_label({Name,Arity}, State) -> {Label,State'} +%% Get the function entry label for a local function. + +local_func_label(Name, Arity, St) -> + local_func_label({Name,Arity}, St). + +local_func_label(Key, #cg{functable=Map}=St0) -> + case Map of + #{Key := Label} -> + {Label,St0}; + _ -> + {Label,St} = new_label(St0), + {Label,St#cg{functable=Map#{Key => Label}}} + end. + +%% is_gc_bif(Name, Args) -> true|false. +%% Determines whether the BIF Name/Arity might do a GC. + +-spec is_gc_bif(atom(), [beam_ssa:value()]) -> boolean(). + +is_gc_bif(hd, [_]) -> false; +is_gc_bif(tl, [_]) -> false; +is_gc_bif(self, []) -> false; +is_gc_bif(node, []) -> false; +is_gc_bif(node, [_]) -> false; +is_gc_bif(element, [_,_]) -> false; +is_gc_bif(get, [_]) -> false; +is_gc_bif(is_map_key, [_,_]) -> false; +is_gc_bif(map_get, [_,_]) -> false; +is_gc_bif(tuple_size, [_]) -> false; +is_gc_bif(Bif, Args) -> + Arity = length(Args), + not (erl_internal:bool_op(Bif, Arity) orelse + erl_internal:new_type_test(Bif, Arity) orelse + erl_internal:comp_op(Bif, Arity)). + +%% new_label(St) -> {L,St}. + +new_label(#cg{lcount=Next}=St) -> + {Next,St#cg{lcount=Next+1}}. + +%% call_line(tail|body, Func, Anno) -> [] | [{line,...}]. +%% Produce a line instruction if it will be needed by the +%% call to Func. + +call_line(_Context, {extfunc,Mod,Name,Arity}, Anno) -> + case erl_bifs:is_safe(Mod, Name, Arity) of + false -> + %% The call could be to a BIF. + %% We'll need a line instruction in case the + %% BIF call fails. + [line(Anno)]; + true -> + %% Call to a safe BIF. Since it cannot fail, + %% we don't need any line instruction here. + [] + end; +call_line(body, _, Anno) -> + [line(Anno)]; +call_line(tail, local, _) -> + %% Tail-recursive call to a local function. A line + %% instruction will not be useful. + []; +call_line(tail, _, Anno) -> + %% Call to a fun. + [line(Anno)]. + +%% line(Le) -> {line,[] | {location,File,Line}} +%% Create a line instruction, containing information about +%% the current filename and line number. A line information +%% instruction should be placed before any operation that could +%% cause an exception. + +line(#{location:={File,Line}}) -> + {line,[{location,File,Line}]}; +line(#{}) -> + {line,[]}. + +flatmapfoldl(F, Accu0, [Hd|Tail]) -> + {R,Accu1} = F(Hd, Accu0), + {Rs,Accu2} = flatmapfoldl(F, Accu1, Tail), + {R++Rs,Accu2}; +flatmapfoldl(_, Accu, []) -> {[],Accu}. diff --git a/lib/compiler/src/beam_ssa_lint.erl b/lib/compiler/src/beam_ssa_lint.erl new file mode 100644 index 0000000000..a003607dab --- /dev/null +++ b/lib/compiler/src/beam_ssa_lint.erl @@ -0,0 +1,349 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2018. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +%% Purpose: Internal consistency checks for the beam_ssa format. + +-module(beam_ssa_lint). + +-export([module/2, format_error/1]). + +-import(lists, [append/1, foldl/3, foreach/2]). + +-include("beam_ssa.hrl"). + +-spec module(#b_module{}, [compile:option()]) -> + {'ok',#b_module{}} | {'error',list()}. +module(#b_module{body=Fs,name=Name}=Mod0, _Options) -> + Es0 = append([validate_function(F) || F <- Fs]), + case [{?MODULE,E} || E <- Es0] of + [] -> + {ok, Mod0}; + [_|_]=Es -> + {error,[{atom_to_list(Name), Es}]} + end. + +-spec format_error(term()) -> iolist(). +format_error({{_M,F,A},{redefined_variable, Name, Old, I}}) -> + io_lib:format("~p/~p: Variable ~ts (~ts) redefined by ~ts", + [F, A, format_var(Name), format_instr(Old), format_instr(I)]); +format_error({{_M,F,A},{missing_phi_paths, Paths, I}}) -> + io_lib:format("~p/~p: Phi node ~ts doesn't define a value for these " + "branches: ~w", + [F, A, format_instr(I), Paths]); +format_error({{_M,F,A},{garbage_phi_paths, Paths, I}}) -> + io_lib:format("~p/~p: Phi node ~ts defines a value for these unreachable " + "or non-existent branches: ~w", + [F, A, format_instr(I), Paths]); +format_error({{_M,F,A},{unknown_phi_variable, Name, {From, _To}, I}}) -> + io_lib:format("~p/~p: Variable ~ts used in phi node ~ts is undefined on " + "branch ~w", + [F, A, format_var(Name), format_instr(I), From]); +format_error({{_M,F,A},{unknown_block, Label, I}}) -> + io_lib:format("~p/~p: Unknown block ~p referenced in ~ts", + [F, A, Label, I]); +format_error({{_M,F,A},{unknown_variable, Name, I}}) -> + io_lib:format("~p/~p: Unbound variable ~ts used in ~ts", + [F, A, format_var(Name), format_instr(I)]); +format_error({{_M,F,A},{phi_inside_block, Name, Id}}) -> + io_lib:format("~p/~p: Phi node defining ~ts is not at start of block ~p", + [F, A, format_var(Name), Id]); +format_error({{_M,F,A},{undefined_label_in_phi, Label, I}}) -> + io_lib:format("~p/~p: Unknown block label ~p in phi node ~ts", + [F, A, Label, format_instr(I)]). + +format_instr(I) -> + [$',beam_ssa_pp:format_instr(I),$']. + +format_var(V) -> + beam_ssa_pp:format_var(#b_var{name=V}). + +validate_function(F) -> + try + validate_variables(F), + [] + catch + throw:Reason -> + #{func_info:=MFA} = F#b_function.anno, + [{MFA,Reason}]; + Class:Error:Stack -> + io:fwrite("Function: ~p\n", [F#b_function.anno]), + erlang:raise(Class, Error, Stack) + end. + +-type defined_vars() :: gb_sets:set(beam_ssa:var_name()). + +-record(vvars, + {blocks :: #{ beam_ssa:label() => beam_ssa:b_blk() }, + branch_def_vars :: #{ + %% Describes the variable state at the time of this exact branch (phi + %% node validation). + {From :: beam_ssa:label(), To :: beam_ssa:label()} => defined_vars(), + %% Describes the variable state common to all branches leading to this + %% label (un/redefined variable validation). + beam_ssa:label() => defined_vars() }, + defined_vars :: defined_vars()}). + +-spec validate_variables(beam_ssa:b_function()) -> ok. +validate_variables(#b_function{ args = Args, bs = Blocks }) -> + %% Prefill the mapping with function arguments. + ArgNames = vvars_get_varnames(Args), + DefVars = gb_sets:from_list(ArgNames), + Entry = 0, + + State = #vvars{blocks = Blocks, + branch_def_vars = #{ Entry => DefVars }, + defined_vars = DefVars}, + ok = vvars_assert_unique(Blocks, ArgNames), + vvars_phi_nodes(vvars_block(Entry, State)). + +%% Checks the uniqueness of all variables across all blocks. +-spec vvars_assert_unique(Blocks, [beam_ssa:var_name()]) -> ok when + Blocks :: #{ beam_ssa:label() => beam_ssa:b_blk() }. +vvars_assert_unique(Blocks, Args) -> + BlockIs = [Is || #b_blk{is=Is} <- maps:values(Blocks)], + Defined0 = maps:from_list([{V,argument} || V <- Args]), + _ = foldl(fun(Is, Defined) -> + vvars_assert_unique_1(Is, Defined) + end, Defined0, BlockIs), + ok. + +-spec vvars_assert_unique_1(Is, Defined) -> ok when + Is :: list(beam_ssa:b_set()), + Defined :: #{ beam_ssa:var_name() => beam_ssa:b_set() }. +vvars_assert_unique_1([#b_set{dst=#b_var{name=DstName}}=I|Is], Defined) -> + case Defined of + #{DstName:=Old} -> throw({redefined_variable, DstName, Old, I}); + _ -> vvars_assert_unique_1(Is, Defined#{DstName=>I}) + end; +vvars_assert_unique_1([], Defined) -> + Defined. + +-spec vvars_phi_nodes(State :: #vvars{}) -> ok. +vvars_phi_nodes(#vvars{ blocks = Blocks }=State) -> + _ = [vvars_phi_nodes_1(Is, Id, State) || + {Id, #b_blk{ is = Is }} <- maps:to_list(Blocks)], + ok. + +-spec vvars_phi_nodes_1(Is, Id, State) -> ok when + Is :: list(beam_ssa:b_set()), + Id :: beam_ssa:label(), + State :: #vvars{}. +vvars_phi_nodes_1([#b_set{ op = phi, args = Phis }=I | Is], Id, State) -> + ok = vvars_assert_phi_paths(Phis, I, Id, State), + ok = vvars_assert_phi_vars(Phis, I, Id, State), + vvars_phi_nodes_1(Is, Id, State); +vvars_phi_nodes_1([_ | Is], Id, _State) -> + case [Dst || #b_set{op=phi,dst=#b_var{name=Dst}} <- Is] of + [Name|_] -> + throw({phi_inside_block, Name, Id}); + [] -> + ok + end; +vvars_phi_nodes_1([], _Id, _State) -> + ok. + +%% Checks whether all paths leading to this phi node are represented, and that +%% it doesn't reference any non-existent paths. +-spec vvars_assert_phi_paths(Phis, I, Id, State) -> ok when + Phis :: list({beam_ssa:argument(), beam_ssa:label()}), + Id :: beam_ssa:label(), + I :: beam_ssa:b_set(), + State :: #vvars{}. +vvars_assert_phi_paths(Phis, I, Id, State) -> + BranchKeys = maps:keys(State#vvars.branch_def_vars), + RequiredPaths = ordsets:from_list([From || {From, To} <- BranchKeys, To =:= Id]), + ProvidedPaths = ordsets:from_list([From || {_Value, From} <- Phis]), + case ordsets:subtract(RequiredPaths, ProvidedPaths) of + [_|_]=MissingPaths -> throw({missing_phi_paths, MissingPaths, I}); + [] -> ok + end. + %% %% The following test is sometimes useful to find missing optimizations. + %% %% It is commented out, though, because it can be triggered by + %% %% by weird but legal code. + %% case ordsets:subtract(ProvidedPaths, RequiredPaths) of + %% [_|_]=GarbagePaths -> throw({garbage_phi_paths, GarbagePaths, I}); + %% [] -> ok + %% end. + +%% Checks whether all variables used in this phi node are defined in the branch +%% they arrived on. +-spec vvars_assert_phi_vars(Phis, I, Id, State) -> ok when + Phis :: list({beam_ssa:argument(), beam_ssa:label()}), + Id :: beam_ssa:label(), + I :: beam_ssa:b_set(), + State :: #vvars{}. +vvars_assert_phi_vars(Phis, I, Id, #vvars{blocks=Blocks, + branch_def_vars=BranchDefVars}) -> + Vars = [{Var, From} || {#b_var{}=Var, From} <- Phis], + foreach(fun({#b_var{name=VarName}, From}) -> + BranchKey = {From, Id}, + case BranchDefVars of + #{BranchKey:=DefVars} -> + case gb_sets:is_member(VarName, DefVars) of + true -> ok; + false -> throw({unknown_variable, VarName, I}) + end; + #{} -> + throw({unknown_phi_variable, VarName, BranchKey, I}) + end + end, Vars), + Labels = [From || {#b_literal{},From} <- Phis], + foreach(fun(Label) -> + case Blocks of + #{Label:=_} -> + ok; + #{} -> + throw({undefined_label_in_phi, Label, I}) + end + end, Labels). + +-spec vvars_block(Id, State) -> #vvars{} when + Id :: beam_ssa:label(), + State :: #vvars{}. +vvars_block(Id, State0) -> + #{ Id := #b_blk{ is = Is, last = Terminator} } = State0#vvars.blocks, + #{ Id := DefVars } = State0#vvars.branch_def_vars, + State = State0#vvars{ defined_vars = DefVars }, + vvars_terminator(Terminator, Id, vvars_block_1(Is, State)). + +-spec vvars_block_1(Blocks, State) -> #vvars{} when + Blocks :: list(beam_ssa:b_blk()), + State :: #vvars{}. +vvars_block_1([], State) -> + State; +vvars_block_1([#b_set{ dst = #b_var{ name = DstName }, op = phi } | Is], State0) -> + %% We don't check phi node arguments at this point since we may not have + %% visited their definition yet. They'll be handled later on in + %% vvars_phi_nodes/1 after all blocks are processed. + vvars_block_1(Is, vvars_save_var(DstName, State0)); +vvars_block_1([#b_set{ dst = #b_var{ name = DstName }, args = Args }=I | Is], State0) -> + ok = vvars_assert_args(Args, I, State0), + vvars_block_1(Is, vvars_save_var(DstName, State0)). + +-spec vvars_terminator(Terminator, From, State) -> #vvars{} when + Terminator :: beam_ssa:terminator(), + From :: beam_ssa:label(), + State :: #vvars{}. +vvars_terminator(#b_ret{ arg = Arg }=I, _From, State) -> + ok = vvars_assert_args([Arg], I, State), + State; +vvars_terminator(#b_switch{arg=Arg,fail=Fail,list=Switch}=I, From, State) -> + ok = vvars_assert_args([Arg], I, State), + ok = vvars_assert_args([A || {A,_Lbl} <- Switch], I, State), + Labels = [Fail | [Lbl || {_Arg, Lbl} <- Switch]], + ok = vvars_assert_labels(Labels, I, State), + vvars_terminator_1(Labels, From, State); +vvars_terminator(#b_br{bool=#b_literal{val=true},succ=Succ}=I, From, State) -> + Labels = [Succ], + ok = vvars_assert_labels(Labels, I, State), + vvars_terminator_1(Labels, From, State); +vvars_terminator(#b_br{bool=#b_literal{val=false},fail=Fail}=I, From, State) -> + Labels = [Fail], + ok = vvars_assert_labels(Labels, I, State), + vvars_terminator_1(Labels, From, State); +vvars_terminator(#b_br{ bool = Arg, succ = Succ, fail = Fail }=I, From, State) -> + ok = vvars_assert_args([Arg], I, State), + Labels = [Fail, Succ], + ok = vvars_assert_labels(Labels, I, State), + vvars_terminator_1(Labels, From, State). + +-spec vvars_terminator_1(Labels, From, State) -> #vvars{} when + Labels :: list(beam_ssa:label()), + From :: beam_ssa:label(), + State :: #vvars{}. +vvars_terminator_1(Labels0, From, State0) -> + %% Filter out all branches that have already been taken. This should result + %% in either all of Labels0 or an empty list. + Labels = [To || To <- Labels0, + not maps:is_key({From, To}, State0#vvars.branch_def_vars)], + true = Labels =:= Labels0 orelse Labels =:= [], %Assertion + State1 = foldl(fun(To, State) -> + vvars_save_branch(From, To, State) + end, State0, Labels), + foldl(fun(To, State) -> + vvars_block(To, State) + end, State1, Labels). + +%% Gets all variable names in args, ignoring literals etc +-spec vvars_get_varnames(Args) -> list(beam_ssa:var_name()) when + Args :: list(beam_ssa:argument()). +vvars_get_varnames(Args) -> + [Name || #b_var{ name = Name } <- Args]. + +%% Checks that all variables in Args are defined in all paths leading to the +%% current State. +-spec vvars_assert_args(Args, I, State) -> ok when + Args :: list(beam_ssa:argument()), + I :: beam_ssa:terminator() | beam_ssa:b_set(), + State :: #vvars{}. +vvars_assert_args(Args, I, #vvars{defined_vars=DefVars}=State) -> + foreach(fun(#b_remote{mod=Mod,name=Name}) -> + vvars_assert_args([Mod,Name], I, State); + (#b_var{name=Name}) -> + case gb_sets:is_member(Name, DefVars) of + true -> ok; + false -> throw({unknown_variable,Name,I}) + end; + (_) -> ok + end, Args). + +%% Checks that all given labels are defined in State. +-spec vvars_assert_labels(Labels, I, State) -> ok when + Labels :: list(beam_ssa:label()), + I :: beam_ssa:terminator(), + State :: #vvars{}. +vvars_assert_labels(Labels, I, #vvars{blocks=Blocks}) -> + foreach(fun(Label) -> + case maps:is_key(Label, Blocks) of + false -> throw({unknown_block, Label, I}); + true -> ok + end + end, Labels). + +-spec vvars_save_branch(From, To, State) -> #vvars{} when + From :: beam_ssa:label(), + To :: beam_ssa:label(), + State :: #vvars{}. +vvars_save_branch(From, To, State) -> + DefVars = State#vvars.defined_vars, + Branches0 = State#vvars.branch_def_vars, + case Branches0 of + #{ To := LblDefVars } -> + MergedVars = vvars_merge_branches(DefVars, LblDefVars), + + Branches = Branches0#{ To => MergedVars, {From, To} => DefVars }, + State#vvars { branch_def_vars = Branches }; + _ -> + Branches = Branches0#{ To => DefVars, {From, To} => DefVars }, + State#vvars { branch_def_vars = Branches } + end. + +-spec vvars_merge_branches(New, Existing) -> defined_vars() when + New :: defined_vars(), + Existing :: defined_vars(). +vvars_merge_branches(New, Existing) -> + gb_sets:intersection(New, Existing). + +-spec vvars_save_var(VarName, State) -> #vvars{} when + VarName :: beam_ssa:var_name(), + State :: #vvars{}. +vvars_save_var(VarName, State0) -> + %% vvars_assert_unique guarantees that variables are never set twice. + DefVars = gb_sets:insert(VarName, State0#vvars.defined_vars), + State0#vvars{ defined_vars = DefVars }. diff --git a/lib/compiler/src/beam_ssa_opt.erl b/lib/compiler/src/beam_ssa_opt.erl new file mode 100644 index 0000000000..da466a3316 --- /dev/null +++ b/lib/compiler/src/beam_ssa_opt.erl @@ -0,0 +1,1307 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2018. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% + +-module(beam_ssa_opt). +-export([module/2]). + +-include("beam_ssa.hrl"). +-import(lists, [all/2,append/1,foldl/3,keyfind/3,member/2,reverse/1,reverse/2, + splitwith/2,takewhile/2,unzip/1]). + +-spec module(beam_ssa:b_module(), [compile:option()]) -> + {'ok',beam_ssa:b_module()}. + +module(#b_module{body=Fs0}=Module, Opts) -> + Ps = passes(Opts), + Fs = functions(Fs0, Ps), + {ok,Module#b_module{body=Fs}}. + +functions([F|Fs], Ps) -> + [function(F, Ps)|functions(Fs, Ps)]; +functions([], _Ps) -> []. + +-type b_blk() :: beam_ssa:b_blk(). +-type b_var() :: beam_ssa:b_var(). +-type label() :: beam_ssa:label(). + +-record(st, {ssa :: beam_ssa:block_map() | [{label(),b_blk()}], + args :: [b_var()], + cnt :: label()}). +-define(PASS(N), {N,fun N/1}). + +passes(Opts0) -> + Ps = [?PASS(ssa_opt_split_blocks), + ?PASS(ssa_opt_element), + ?PASS(ssa_opt_linearize), + ?PASS(ssa_opt_record), + ?PASS(ssa_opt_cse), + ?PASS(ssa_opt_type), + ?PASS(ssa_opt_float), + ?PASS(ssa_opt_live), + ?PASS(ssa_opt_bsm), + ?PASS(ssa_opt_bsm_shortcut), + ?PASS(ssa_opt_misc), + ?PASS(ssa_opt_blockify), + ?PASS(ssa_opt_sink), + ?PASS(ssa_opt_merge_blocks)], + Negations = [{list_to_atom("no_"++atom_to_list(N)),N} || + {N,_} <- Ps], + Opts = proplists:substitute_negations(Negations, Opts0), + [case proplists:get_value(Name, Opts, true) of + true -> + P; + false -> + {NoName,Name} = keyfind(Name, 2, Negations), + {NoName,fun(S) -> S end} + end || {Name,_}=P <- Ps]. + +function(#b_function{anno=Anno,bs=Blocks0,args=Args,cnt=Count0}=F, Ps) -> + try + St = #st{ssa=Blocks0,args=Args,cnt=Count0}, + #st{ssa=Blocks,cnt=Count} = compile:run_sub_passes(Ps, St), + F#b_function{bs=Blocks,cnt=Count} + catch + Class:Error:Stack -> + #{func_info:={_,Name,Arity}} = Anno, + io:fwrite("Function: ~w/~w\n", [Name,Arity]), + erlang:raise(Class, Error, Stack) + end. + +%%% +%%% Trivial sub passes. +%%% + +ssa_opt_linearize(#st{ssa=Blocks}=St) -> + St#st{ssa=beam_ssa:linearize(Blocks)}. + +ssa_opt_type(#st{ssa=Linear,args=Args}=St) -> + St#st{ssa=beam_ssa_type:opt(Linear, Args)}. + +ssa_opt_blockify(#st{ssa=Linear}=St) -> + St#st{ssa=maps:from_list(Linear)}. + +%%% +%%% Split blocks before certain instructions to enable more optimizations. +%%% +%%% Splitting before element/2 enables the optimization that swaps +%%% element/2 instructions. +%%% +%%% Splitting before call and make_fun instructions gives more opportunities +%%% for sinking get_tuple_element instructions. +%%% + +ssa_opt_split_blocks(#st{ssa=Blocks0,cnt=Count0}=St) -> + P = fun(#b_set{op={bif,element}}) -> true; + (#b_set{op=call}) -> true; + (#b_set{op=make_fun}) -> true; + (_) -> false + end, + {Blocks,Count} = beam_ssa:split_blocks(P, Blocks0, Count0), + St#st{ssa=Blocks,cnt=Count}. + +%%% +%%% Order element/2 calls. +%%% +%%% Order an unbroken chain of element/2 calls for the same tuple +%%% with the same failure label so that the highest element is +%%% retrieved first. That will allow the other element/2 calls to +%%% be replaced with get_tuple_element/3 instructions. +%%% + +ssa_opt_element(#st{ssa=Blocks}=St) -> + %% Collect the information about element instructions in this + %% function. + GetEls = collect_element_calls(beam_ssa:linearize(Blocks)), + + %% Collect the element instructions into chains. The + %% element calls in each chain are ordered in reverse + %% execution order. + Chains = collect_chains(GetEls, []), + + %% For each chain, swap the first element call with the + %% element call with the highest index. + St#st{ssa=swap_element_calls(Chains, Blocks)}. + +collect_element_calls([{L,#b_blk{is=Is0,last=Last}}|Bs]) -> + case {Is0,Last} of + {[#b_set{op={bif,element},dst=Element, + args=[#b_literal{val=N},#b_var{}=Tuple]}, + #b_set{op=succeeded,dst=Bool,args=[Element]}], + #b_br{bool=Bool,succ=Succ,fail=Fail}} -> + Info = {L,Succ,{Tuple,Fail},N}, + [Info|collect_element_calls(Bs)]; + {_,_} -> + collect_element_calls(Bs) + end; +collect_element_calls([]) -> []. + +collect_chains([{This,_,V,_}=El|Els], [{_,This,V,_}|_]=Chain) -> + %% Add to the previous chain. + collect_chains(Els, [El|Chain]); +collect_chains([El|Els], [_,_|_]=Chain) -> + %% Save the previous chain and start a new chain. + [Chain|collect_chains(Els, [El])]; +collect_chains([El|Els], _Chain) -> + %% The previous chain is too short; discard it and start a new. + collect_chains(Els, [El]); +collect_chains([], [_,_|_]=Chain) -> + %% Save the last chain. + [Chain]; +collect_chains([], _) -> []. + +swap_element_calls([[{L,_,_,N}|_]=Chain|Chains], Blocks0) -> + Blocks = swap_element_calls_1(Chain, {N,L}, Blocks0), + swap_element_calls(Chains, Blocks); +swap_element_calls([], Blocks) -> Blocks. + +swap_element_calls_1([{L1,_,_,N1}], {N2,L2}, Blocks) when N2 > N1 -> + %% We have reached the end of the chain, and the first + %% element instrution to be executed. Its index is lower + %% than the maximum index found while traversing the chain, + %% so we will need to swap the instructions. + #{L1:=Blk1,L2:=Blk2} = Blocks, + [#b_set{dst=Dst1}=GetEl1,Succ1] = Blk1#b_blk.is, + [#b_set{dst=Dst2}=GetEl2,Succ2] = Blk2#b_blk.is, + Is1 = [GetEl2,Succ1#b_set{args=[Dst2]}], + Is2 = [GetEl1,Succ2#b_set{args=[Dst1]}], + Blocks#{L1:=Blk1#b_blk{is=Is1},L2:=Blk2#b_blk{is=Is2}}; +swap_element_calls_1([{L,_,_,N1}|Els], {N2,_}, Blocks) when N1 > N2 -> + swap_element_calls_1(Els, {N2,L}, Blocks); +swap_element_calls_1([_|Els], Highest, Blocks) -> + swap_element_calls_1(Els, Highest, Blocks); +swap_element_calls_1([], _, Blocks) -> + %% Nothing to do. The element call with highest index + %% is already the first one to be executed. + Blocks. + +%%% +%%% Record optimization. +%%% +%%% Replace tuple matching with an is_tagged_tuple instruction +%%% when applicable. +%%% + +ssa_opt_record(#st{ssa=Linear}=St) -> + Blocks = maps:from_list(Linear), + St#st{ssa=record_opt(Linear, Blocks)}. + +record_opt([{L,#b_blk{is=Is0,last=Last}=Blk0}|Bs], Blocks) -> + Is = record_opt_is(Is0, Last, Blocks), + Blk = Blk0#b_blk{is=Is}, + [{L,Blk}|record_opt(Bs, Blocks)]; +record_opt([], _Blocks) -> []. + +record_opt_is([#b_set{op={bif,is_tuple},dst=#b_var{name=Bool}, + args=[Tuple]}=Set], + Last, Blocks) -> + case is_tagged_tuple(Tuple, Bool, Last, Blocks) of + {yes,Size,Tag} -> + Args = [Tuple,Size,Tag], + [Set#b_set{op=is_tagged_tuple,args=Args}]; + no -> + [Set] + end; +record_opt_is([I|Is], Last, Blocks) -> + [I|record_opt_is(Is, Last, Blocks)]; +record_opt_is([], _Last, _Blocks) -> []. + +is_tagged_tuple(#b_var{name=Tuple}, Bool, + #b_br{bool=#b_var{name=Bool},succ=Succ,fail=Fail}, + Blocks) -> + SuccBlk = maps:get(Succ, Blocks), + is_tagged_tuple_1(SuccBlk, Tuple, Fail, Blocks); +is_tagged_tuple(_, _, _, _) -> no. + +is_tagged_tuple_1(#b_blk{is=Is,last=Last}, Tuple, Fail, Blocks) -> + case Is of + [#b_set{op={bif,tuple_size},dst=#b_var{name=ArityVar}, + args=[#b_var{name=Tuple}]}, + #b_set{op={bif,'=:='}, + dst=#b_var{name=Bool}, + args=[#b_var{name=ArityVar}, + #b_literal{val=ArityVal}=Arity]}] + when is_integer(ArityVal) -> + case Last of + #b_br{bool=#b_var{name=Bool},succ=Succ,fail=Fail} -> + SuccBlk = maps:get(Succ, Blocks), + case is_tagged_tuple_2(SuccBlk, Tuple, Fail) of + no -> + no; + {yes,Tag} -> + {yes,Arity,Tag} + end; + _ -> + no + end; + _ -> + no + end. + +is_tagged_tuple_2(#b_blk{is=Is, + last=#b_br{bool=#b_var{name=Bool},fail=Fail}}, + Tuple, Fail) -> + is_tagged_tuple_3(Is, Bool, Tuple); +is_tagged_tuple_2(#b_blk{}, _, _) -> no. + +is_tagged_tuple_3([#b_set{op=get_tuple_element, + dst=#b_var{name=TagVar}, + args=[#b_var{name=Tuple},#b_literal{val=0}]}|Is], + Bool, Tuple) -> + is_tagged_tuple_4(Is, Bool, TagVar); +is_tagged_tuple_3([_|Is], Bool, Tuple) -> + is_tagged_tuple_3(Is, Bool, Tuple); +is_tagged_tuple_3([], _, _) -> no. + +is_tagged_tuple_4([#b_set{op={bif,'=:='},dst=#b_var{name=Bool}, + args=[#b_var{name=TagVar}, + #b_literal{val=TagVal}=Tag]}], + Bool, TagVar) when is_atom(TagVal) -> + {yes,Tag}; +is_tagged_tuple_4([_|Is], Bool, TagVar) -> + is_tagged_tuple_4(Is, Bool, TagVar); +is_tagged_tuple_4([], _, _) -> no. + +%%% +%%% Common subexpression elimination (CSE). +%%% +%%% Eliminate repeated evaluation of identical expressions. To avoid +%%% increasing the size of the stack frame, we don't eliminate +%%% subexpressions across instructions that clobber the X registers. +%%% + +ssa_opt_cse(#st{ssa=Linear}=St) -> + M = #{0=>#{}}, + St#st{ssa=cse(Linear, #{}, M)}. + +cse([{L,#b_blk{is=Is0,last=Last0}=Blk}|Bs], Sub0, M0) -> + Es0 = maps:get(L, M0), + {Is1,Es,Sub} = cse_is(Is0, Es0, Sub0, []), + Last = sub(Last0, Sub), + M = cse_successors(Is1, Blk, Es, M0), + Is = reverse(Is1), + [{L,Blk#b_blk{is=Is,last=Last}}|cse(Bs, Sub, M)]; +cse([], _, _) -> []. + +cse_successors([#b_set{op=succeeded,args=[Src]},Bif|_], Blk, EsSucc, M0) -> + case cse_suitable(Bif) of + true -> + %% The previous instruction only has a valid value at the success branch. + %% We must remove the substitution for Src from the failure branch. + #b_blk{last=#b_br{succ=Succ,fail=Fail}} = Blk, + M = cse_successors_1([Succ], EsSucc, M0), + EsFail = maps:filter(fun(_, Val) -> Val =/= Src end, EsSucc), + cse_successors_1([Fail], EsFail, M); + false -> + %% There can't be any replacement for Src in EsSucc. No need for + %% any special handling. + cse_successors_1(beam_ssa:successors(Blk), EsSucc, M0) + end; +cse_successors(_Is, Blk, Es, M) -> + cse_successors_1(beam_ssa:successors(Blk), Es, M). + +cse_successors_1([L|Ls], Es0, M) -> + case M of + #{L:=Es1} -> + Es = maps:filter(fun(Key, Value) -> + case Es1 of + #{Key:=Value} -> true; + #{} -> false + end + end, Es0), + cse_successors_1(Ls, Es0, M#{L:=Es}); + #{} -> + cse_successors_1(Ls, Es0, M#{L=>Es0}) + end; +cse_successors_1([], _, M) -> M. + +cse_is([#b_set{op=succeeded,dst=Bool,args=[Src]}=I0|Is], Es, Sub0, Acc) -> + I = sub(I0, Sub0), + case I of + #b_set{args=[Src]} -> + cse_is(Is, Es, Sub0, [I|Acc]); + #b_set{} -> + %% The previous instruction has been eliminated. Eliminate the + %% 'succeeded' instruction too. + Sub = Sub0#{Bool=>#b_literal{val=true}}, + cse_is(Is, Es, Sub, Acc) + end; +cse_is([#b_set{dst=Dst}=I0|Is], Es0, Sub0, Acc) -> + I = sub(I0, Sub0), + case beam_ssa:clobbers_xregs(I) of + true -> + %% Retaining the expressions map across calls and other + %% clobbering instructions would work, but it would cause + %% the common subexpressions to be saved to Y registers, + %% which would probably increase the size of the stack + %% frame. + cse_is(Is, #{}, Sub0, [I|Acc]); + false -> + case cse_expr(I) of + none -> + %% Not suitable for CSE. + cse_is(Is, Es0, Sub0, [I|Acc]); + {ok,ExprKey} -> + case Es0 of + #{ExprKey:=Src} -> + Sub = Sub0#{Dst=>Src}, + cse_is(Is, Es0, Sub, Acc); + #{} -> + Es = Es0#{ExprKey=>Dst}, + cse_is(Is, Es, Sub0, [I|Acc]) + end + end + end; +cse_is([], Es, Sub, Acc) -> + {Acc,Es,Sub}. + +cse_expr(#b_set{op=Op,args=Args}=I) -> + case cse_suitable(I) of + true -> {ok,{Op,Args}}; + false -> none + end. + +cse_suitable(#b_set{op=get_hd}) -> true; +cse_suitable(#b_set{op=get_tl}) -> true; +cse_suitable(#b_set{op=put_list}) -> true; +cse_suitable(#b_set{op=put_tuple}) -> true; +cse_suitable(#b_set{op={bif,Name},args=Args}) -> + %% Doing CSE for comparison operators would prevent + %% creation of 'test' instructions. + Arity = length(Args), + not (erl_internal:new_type_test(Name, Arity) orelse + erl_internal:comp_op(Name, Arity) orelse + erl_internal:bool_op(Name, Arity)); +cse_suitable(#b_set{}) -> false. + +%%% +%%% Using floating point instructions. +%%% +%%% Use the special floating points version of arithmetic +%%% instructions, if the operands are known to be floats or the result +%%% of the operation will be a float. +%%% +%%% The float instructions were never used in guards before, so we +%%% will take special care to keep not using them in guards. Using +%%% them in guards would require a new version of the 'fconv' +%%% instruction that would take a failure label. Since it is unlikely +%%% that using float instructions in guards would be benefical, why +%%% bother implementing a new instruction? Also, implementing float +%%% instructions in guards in HiPE could turn out to be a lot of work. +%%% + +-record(fs, + {s=undefined :: 'undefined' | 'cleared', + regs=#{} :: #{beam_ssa:b_var():=beam_ssa:b_var()}, + fail=none :: 'none' | beam_ssa:label(), + ren=#{} :: #{beam_ssa:label():=beam_ssa:label()}, + non_guards :: gb_sets:set(beam_ssa:label()) + }). + +ssa_opt_float(#st{ssa=Linear0,cnt=Count0}=St) -> + NonGuards0 = float_non_guards(Linear0), + NonGuards = gb_sets:from_list(NonGuards0), + Fs = #fs{non_guards=NonGuards}, + {Linear,Count} = float_opt(Linear0, Count0, Fs), + St#st{ssa=Linear,cnt=Count}. + +float_non_guards([{L,#b_blk{is=Is}}|Bs]) -> + case Is of + [#b_set{op=landingpad}|_] -> + [L|float_non_guards(Bs)]; + _ -> + float_non_guards(Bs) + end; +float_non_guards([]) -> [?BADARG_BLOCK]. + +float_opt([{L,Blk0}|Bs], Count, Fs) -> + Blk = float_rename_phis(Blk0, Fs), + case float_need_flush(Blk, Fs) of + true -> + float_flush(L, Blk, Bs, Count, Fs); + false -> + float_opt_1(L, Blk, Bs, Count, Fs) + end; +float_opt([], Count, _Fs) -> + {[],Count}. + +float_opt_1(L, #b_blk{last=#b_br{fail=F}}=Blk, Bs0, + Count0, #fs{non_guards=NonGuards}=Fs) -> + case gb_sets:is_member(F, NonGuards) of + true -> + %% This block is not inside a guard. + %% We can do the optimization. + float_opt_2(L, Blk, Bs0, Count0, Fs); + false -> + %% This block is inside a guard. Don't do + %% any floating point optimizations. + {Bs,Count} = float_opt(Bs0, Count0, Fs), + {[{L,Blk}|Bs],Count} + end; +float_opt_1(L, Blk, Bs, Count, Fs) -> + float_opt_2(L, Blk, Bs, Count, Fs). + +float_opt_2(L, #b_blk{is=Is0}=Blk0, Bs0, Count0, Fs0) -> + case float_opt_is(Is0, Fs0, Count0, []) of + {Is1,Fs1,Count1} -> + Fs = float_fail_label(Blk0, Fs1), + Split = float_split_conv(Is1, Blk0), + {Blks0,Count2} = float_number(Split, L, Count1), + {Blks,Count3} = float_conv(Blks0, Fs#fs.fail, Count2), + {Bs,Count} = float_opt(Bs0, Count3, Fs), + {Blks++Bs,Count}; + none -> + {Bs,Count} = float_opt(Bs0, Count0, Fs0), + {[{L,Blk0}|Bs],Count} + end. + +%% Split {float,convert} instructions into individual blocks. +float_split_conv(Is0, Blk) -> + Br = #b_br{bool=#b_literal{val=true},succ=0,fail=0}, + case splitwith(fun(#b_set{op=Op}) -> + Op =/= {float,convert} + end, Is0) of + {Is,[]} -> + [Blk#b_blk{is=Is}]; + {[_|_]=Is1,[#b_set{op={float,convert}}=Conv|Is2]} -> + [#b_blk{is=Is1,last=Br}, + #b_blk{is=[Conv],last=Br}|float_split_conv(Is2, Blk)]; + {[],[#b_set{op={float,convert}}=Conv|Is1]} -> + [#b_blk{is=[Conv],last=Br}|float_split_conv(Is1, Blk)] + end. + +%% Number the blocks that were split. +float_number([B|Bs0], FirstL, Count0) -> + {Bs,Count} = float_number(Bs0, Count0), + {[{FirstL,B}|Bs],Count}. + +float_number([B|Bs0], Count0) -> + {Bs,Count} = float_number(Bs0, Count0+1), + {[{Count0,B}|Bs],Count}; +float_number([], Count) -> + {[],Count}. + +%% Insert 'succeeded' instructions after each {float,convert} +%% instruction. +float_conv([{L,#b_blk{is=Is0}=Blk0}|Bs0], Fail, Count0) -> + case Is0 of + [#b_set{op={float,convert}}=Conv] -> + {Bool0,Count1} = new_reg('@ssa_bool', Count0), + Bool = #b_var{name=Bool0}, + Succeeded = #b_set{op=succeeded,dst=Bool, + args=[Conv#b_set.dst]}, + Is = [Conv,Succeeded], + [{NextL,_}|_] = Bs0, + Br = #b_br{bool=Bool,succ=NextL,fail=Fail}, + Blk = Blk0#b_blk{is=Is,last=Br}, + {Bs,Count} = float_conv(Bs0, Fail, Count1), + {[{L,Blk}|Bs],Count}; + [_|_] -> + case Bs0 of + [{NextL,_}|_] -> + Br = #b_br{bool=#b_literal{val=true}, + succ=NextL,fail=NextL}, + Blk = Blk0#b_blk{last=Br}, + {Bs,Count} = float_conv(Bs0, Fail, Count0), + {[{L,Blk}|Bs],Count}; + [] -> + {[{L,Blk0}],Count0} + end + end. + +float_need_flush(#b_blk{is=Is}, #fs{s=cleared}) -> + case Is of + [#b_set{anno=#{float_op:=_}}|_] -> + false; + _ -> + true + end; +float_need_flush(_, _) -> false. + +float_opt_is([#b_set{op=succeeded,args=[Src]}=I0], + #fs{regs=Rs}=Fs, Count, Acc) -> + case Rs of + #{Src:=Fr} -> + I = I0#b_set{args=[Fr]}, + {reverse(Acc, [I]),Fs,Count}; + #{} -> + {reverse(Acc, [I0]),Fs,Count} + end; +float_opt_is([#b_set{anno=Anno0}=I0|Is0], Fs0, Count0, Acc) -> + case Anno0 of + #{float_op:=FTypes} -> + Anno = maps:remove(float_op, Anno0), + I1 = I0#b_set{anno=Anno}, + {Is,Fs,Count} = float_make_op(I1, FTypes, Fs0, Count0), + float_opt_is(Is0, Fs, Count, reverse(Is, Acc)); + #{} -> + float_opt_is(Is0, Fs0#fs{regs=#{}}, Count0, [I0|Acc]) + end; +float_opt_is([], Fs, _Count, _Acc) -> + #fs{s=undefined} = Fs, %Assertion. + none. + +float_rename_phis(#b_blk{is=Is}=Blk, #fs{ren=Ren}) -> + if + map_size(Ren) =:= 0 -> + Blk; + true -> + Blk#b_blk{is=float_rename_phis_1(Is, Ren)} + end. + +float_rename_phis_1([#b_set{op=phi,args=Args0}=I|Is], Ren) -> + Args = [float_phi_arg(Arg, Ren) || Arg <- Args0], + [I#b_set{args=Args}|float_rename_phis_1(Is, Ren)]; +float_rename_phis_1(Is, _) -> Is. + +float_phi_arg({Var,OldLbl}, Ren) -> + case Ren of + #{OldLbl:=NewLbl} -> + {Var,NewLbl}; + #{} -> + {Var,OldLbl} + end. + +float_make_op(#b_set{op={bif,Op},dst=Dst,args=As0}=I0, + Ts, #fs{s=S,regs=Rs0}=Fs, Count0) -> + {As1,Rs1,Count1} = float_load(As0, Ts, Rs0, Count0, []), + {As,Is0} = unzip(As1), + {Fr,Count2} = new_reg('@fr', Count1), + FrDst = #b_var{name=Fr}, + I = I0#b_set{op={float,Op},dst=FrDst,args=As}, + Rs = Rs1#{Dst=>FrDst}, + Is = append(Is0) ++ [I], + case S of + undefined -> + {Ignore,Count} = new_reg('@ssa_ignore', Count2), + C = #b_set{op={float,clearerror},dst=#b_var{name=Ignore}}, + {[C|Is],Fs#fs{s=cleared,regs=Rs},Count}; + cleared -> + {Is,Fs#fs{regs=Rs},Count2} + end. + +float_load([A|As], [T|Ts], Rs0, Count0, Acc) -> + {Load,Rs,Count} = float_reg_arg(A, T, Rs0, Count0), + float_load(As, Ts, Rs, Count, [Load|Acc]); +float_load([], [], Rs, Count, Acc) -> + {reverse(Acc),Rs,Count}. + +float_reg_arg(A, T, Rs, Count0) -> + case Rs of + #{A:=Fr} -> + {{Fr,[]},Rs,Count0}; + #{} -> + {Fr,Count} = new_float_copy_reg(Count0), + Dst = #b_var{name=Fr}, + I = float_load_reg(T, A, Dst), + {{Dst,[I]},Rs#{A=>Dst},Count} + end. + +float_load_reg(convert, #b_var{}=Src, Dst) -> + #b_set{op={float,convert},dst=Dst,args=[Src]}; +float_load_reg(convert, #b_literal{val=Val}=Src, Dst) -> + try float(Val) of + F -> + #b_set{op={float,put},dst=Dst,args=[#b_literal{val=F}]} + catch + error:_ -> + %% Let the exception happen at runtime. + #b_set{op={float,convert},dst=Dst,args=[Src]} + end; +float_load_reg(float, Src, Dst) -> + #b_set{op={float,put},dst=Dst,args=[Src]}. + +new_float_copy_reg(Count) -> + new_reg('@fr_copy', Count). + +new_reg(Base, Count) -> + Fr = {Base,Count}, + {Fr,Count+1}. + +float_flush(L, Blk, Bs0, Count0, #fs{s=cleared,fail=Fail,ren=Ren0}=Fs0) -> + {Bool0,Count1} = new_reg('@ssa_bool', Count0), + Bool = #b_var{name=Bool0}, + + %% Insert two blocks before the current block. First allocate + %% block numbers. + FirstL = L, %For checkerror. + MiddleL = Count1, %For flushed float regs. + LastL = Count1 + 1, %For original block. + Count2 = Count1 + 2, + + %% Build the block with the checkerror instruction. + CheckIs = [#b_set{op={float,checkerror},dst=Bool}], + FirstBlk = #b_blk{is=CheckIs,last=#b_br{bool=Bool,succ=MiddleL,fail=Fail}}, + + %% Build the block that flushes all registers. Note that this must be a + %% separate block in case the original block begins with a phi instruction, + %% to avoid embedding a phi instruction in the middle of a block. + FlushIs = float_flush_regs(Fs0), + MiddleBlk = #b_blk{is=FlushIs,last=#b_br{bool=#b_literal{val=true}, + succ=LastL,fail=LastL}}, + + %% The last block is the original unmodified block. + LastBlk = Blk, + + %% Update state and blocks. + Ren = Ren0#{L=>LastL}, + Fs = Fs0#fs{s=undefined,regs=#{},fail=none,ren=Ren}, + Bs1 = [{FirstL,FirstBlk},{MiddleL,MiddleBlk},{LastL,LastBlk}|Bs0], + float_opt(Bs1, Count2, Fs). + +float_fail_label(#b_blk{last=Last}, Fs) -> + case Last of + #b_br{bool=#b_var{},fail=Fail} -> + Fs#fs{fail=Fail}; + _ -> + Fs + end. + +float_flush_regs(#fs{regs=Rs}) -> + maps:fold(fun(_, #b_var{name={'@fr_copy',_}}, Acc) -> + Acc; + (Dst, Fr, Acc) -> + [#b_set{op={float,get},dst=Dst,args=[Fr]}|Acc] + end, [], Rs). + +%%% +%%% Live optimization. +%%% +%%% Optimize instructions whose values are not used. They could be +%%% removed if they have no side effects, or in a few cases replaced +%%% with a cheaper instructions +%%% + +ssa_opt_live(#st{ssa=Linear}=St) -> + St#st{ssa=live_opt(reverse(Linear), #{}, [])}. + +live_opt([{L,Blk0}|Bs], LiveMap0, Acc) -> + Successors = beam_ssa:successors(Blk0), + Live0 = live_opt_succ(Successors, L, LiveMap0), + {Blk,Live} = live_opt_blk(Blk0, Live0), + LiveMap = live_opt_phis(Blk#b_blk.is, L, Live, LiveMap0), + live_opt(Bs, LiveMap, [{L,Blk}|Acc]); +live_opt([], _, Acc) -> Acc. + +live_opt_succ([S|Ss], L, LiveMap) -> + Live0 = live_opt_succ(Ss, L, LiveMap), + Key = {S,L}, + case LiveMap of + #{Key:=Live} -> + gb_sets:union(Live, Live0); + #{S:=Live} -> + gb_sets:union(Live, Live0); + #{} -> + Live0 + end; +live_opt_succ([], _, _) -> + gb_sets:empty(). + +live_opt_phis(Is, L, Live0, LiveMap0) -> + LiveMap = LiveMap0#{L=>Live0}, + Phis = takewhile(fun(#b_set{op=Op}) -> Op =:= phi end, Is), + case Phis of + [] -> + LiveMap; + [_|_] -> + PhiArgs = append([Args || #b_set{args=Args} <- Phis]), + PhiVars = [{P,V} || {#b_var{name=V},P} <- PhiArgs], + PhiLive0 = rel2fam(PhiVars), + PhiLive = [{{L,P},gb_sets:union(gb_sets:from_list(Vs), Live0)} || + {P,Vs} <- PhiLive0], + maps:merge(LiveMap, maps:from_list(PhiLive)) + end. + +live_opt_blk(#b_blk{is=Is0,last=Last}=Blk, Live0) -> + Live1 = gb_sets:union(Live0, gb_sets:from_ordset(beam_ssa:used(Last))), + {Is,Live} = live_opt_is(reverse(Is0), Live1, []), + {Blk#b_blk{is=Is},Live}. + +live_opt_is([#b_set{op=phi,dst=#b_var{name=Dst}}=I|Is], Live, Acc) -> + case gb_sets:is_member(Dst, Live) of + true -> + live_opt_is(Is, Live, [I|Acc]); + false -> + live_opt_is(Is, Live, Acc) + end; +live_opt_is([#b_set{op=succeeded,dst=#b_var{name=SuccDst}=SuccDstVar, + args=[#b_var{name=Dst}]}=SuccI, + #b_set{dst=#b_var{name=Dst}}=I|Is], Live0, Acc) -> + case gb_sets:is_member(Dst, Live0) of + true -> + case gb_sets:is_member(SuccDst, Live0) of + true -> + Live1 = gb_sets:add(Dst, Live0), + Live = gb_sets:delete_any(SuccDst, Live1), + live_opt_is([I|Is], Live, [SuccI|Acc]); + false -> + live_opt_is([I|Is], Live0, Acc) + end; + false -> + case live_opt_unused(I) of + {replace,NewI0} -> + NewI = NewI0#b_set{dst=SuccDstVar}, + live_opt_is([NewI|Is], Live0, Acc); + keep -> + case gb_sets:is_member(SuccDst, Live0) of + true -> + Live1 = gb_sets:add(Dst, Live0), + Live = gb_sets:delete_any(SuccDst, Live1), + live_opt_is([I|Is], Live, [SuccI|Acc]); + false -> + live_opt_is([I|Is], Live0, Acc) + end + end + end; +live_opt_is([#b_set{op=Op,dst=#b_var{name=Dst}}=I|Is], Live0, Acc) -> + case gb_sets:is_member(Dst, Live0) of + true -> + Live1 = gb_sets:union(Live0, gb_sets:from_ordset(beam_ssa:used(I))), + Live = gb_sets:delete_any(Dst, Live1), + live_opt_is(Is, Live, [I|Acc]); + false -> + case is_pure(Op) of + true -> + live_opt_is(Is, Live0, Acc); + false -> + Live = gb_sets:union(Live0, gb_sets:from_ordset(beam_ssa:used(I))), + live_opt_is(Is, Live, [I|Acc]) + end + end; +live_opt_is([], Live, Acc) -> + {Acc,Live}. + +is_pure({bif,_}) -> true; +is_pure({float,get}) -> true; +is_pure(bs_extract) -> true; +is_pure(extract) -> true; +is_pure(get_hd) -> true; +is_pure(get_tl) -> true; +is_pure(get_tuple_element) -> true; +is_pure(is_nonempty_list) -> true; +is_pure(is_tagged_tuple) -> true; +is_pure(put_list) -> true; +is_pure(put_tuple) -> true; +is_pure(_) -> false. + +live_opt_unused(#b_set{op=get_map_element}=Set) -> + {replace,Set#b_set{op=has_map_field}}; +live_opt_unused(_) -> keep. + +%%% +%%% Optimize binary matching instructions. +%%% + +ssa_opt_bsm(#st{ssa=Linear}=St) -> + Extracted0 = bsm_extracted(Linear), + Extracted = cerl_sets:from_list(Extracted0), + St#st{ssa=bsm_skip(Linear, Extracted)}. + +bsm_skip([{L,#b_blk{is=Is0}=Blk}|Bs], Extracted) -> + Is = bsm_skip_is(Is0, Extracted), + [{L,Blk#b_blk{is=Is}}|bsm_skip(Bs, Extracted)]; +bsm_skip([], _) -> []. + +bsm_skip_is([I0|Is], Extracted) -> + case I0 of + #b_set{op=bs_match,args=[#b_literal{val=string}|_]} -> + [I0|bsm_skip_is(Is, Extracted)]; + #b_set{op=bs_match,dst=Ctx,args=[Type,PrevCtx|Args0]} -> + I = case cerl_sets:is_element(Ctx, Extracted) of + true -> + I0; + false -> + %% The value is never extracted. + Args = [#b_literal{val=skip},PrevCtx,Type|Args0], + I0#b_set{args=Args} + end, + [I|Is]; + #b_set{} -> + [I0|bsm_skip_is(Is, Extracted)] + end; +bsm_skip_is([], _) -> []. + +bsm_extracted([{_,#b_blk{is=Is}}|Bs]) -> + case Is of + [#b_set{op=bs_extract,args=[Ctx]}|_] -> + [Ctx|bsm_extracted(Bs)]; + _ -> + bsm_extracted(Bs) + end; +bsm_extracted([]) -> []. + +%%% +%%% Short-cutting binary matching instructions. +%%% + +ssa_opt_bsm_shortcut(#st{ssa=Linear}=St) -> + Positions = bsm_positions(Linear, #{}), + case map_size(Positions) of + 0 -> + %% No binary matching instructions. + St; + _ -> + St#st{ssa=bsm_shortcut(Linear, Positions)} + end. + +bsm_positions([{L,#b_blk{is=Is,last=Last}}|Bs], PosMap0) -> + PosMap = bsm_positions_is(Is, PosMap0), + case {Is,Last} of + {[#b_set{op=bs_test_tail,dst=Bool,args=[Ctx,#b_literal{val=Bits0}]}], + #b_br{bool=Bool,fail=Fail}} -> + Bits = Bits0 + maps:get(Ctx, PosMap0), + bsm_positions(Bs, PosMap#{L=>{Bits,Fail}}); + {_,_} -> + bsm_positions(Bs, PosMap) + end; +bsm_positions([], PosMap) -> PosMap. + +bsm_positions_is([#b_set{op=bs_start_match,dst=New}|Is], PosMap0) -> + PosMap = PosMap0#{New=>0}, + bsm_positions_is(Is, PosMap); +bsm_positions_is([#b_set{op=bs_match,dst=New,args=Args}|Is], PosMap0) -> + [_,Old|_] = Args, + #{Old:=Bits0} = PosMap0, + Bits = bsm_update_bits(Args, Bits0), + PosMap = PosMap0#{New=>Bits}, + bsm_positions_is(Is, PosMap); +bsm_positions_is([_|Is], PosMap) -> + bsm_positions_is(Is, PosMap); +bsm_positions_is([], PosMap) -> PosMap. + +bsm_update_bits([#b_literal{val=string},_,#b_literal{val=String}], Bits) -> + Bits + bit_size(String); +bsm_update_bits([#b_literal{val=utf8}|_], Bits) -> + Bits + 8; +bsm_update_bits([#b_literal{val=utf16}|_], Bits) -> + Bits + 16; +bsm_update_bits([#b_literal{val=utf32}|_], Bits) -> + Bits + 32; +bsm_update_bits([_,_,_,#b_literal{val=Sz},#b_literal{val=U}], Bits) + when is_integer(Sz) -> + Bits + Sz*U; +bsm_update_bits(_, Bits) -> Bits. + +bsm_shortcut([{L,#b_blk{is=Is,last=Last0}=Blk}|Bs], PosMap) -> + case {Is,Last0} of + {[#b_set{op=bs_match,dst=New,args=[_,Old|_]}, + #b_set{op=succeeded,dst=Bool,args=[New]}], + #b_br{bool=Bool,fail=Fail}} -> + case PosMap of + #{Old:=Bits,Fail:={TailBits,NextFail}} when Bits > TailBits -> + Last = Last0#b_br{fail=NextFail}, + [{L,Blk#b_blk{last=Last}}|bsm_shortcut(Bs, PosMap)]; + #{} -> + [{L,Blk}|bsm_shortcut(Bs, PosMap)] + end; + {_,_} -> + [{L,Blk}|bsm_shortcut(Bs, PosMap)] + end; +bsm_shortcut([], _PosMap) -> []. + +%%% +%%% Miscellanous optimizations in execution order. +%%% + +ssa_opt_misc(#st{ssa=Linear}=St) -> + St#st{ssa=misc_opt(Linear, #{})}. + +misc_opt([{L,#b_blk{is=Is0,last=Last0}=Blk0}|Bs], Sub0) -> + {Is,Sub} = misc_opt_is(Is0, Sub0, []), + Last = sub(Last0, Sub), + Blk = Blk0#b_blk{is=Is,last=Last}, + [{L,Blk}|misc_opt(Bs, Sub)]; +misc_opt([], _) -> []. + +misc_opt_is([#b_set{op=phi}=I0|Is], Sub0, Acc) -> + #b_set{dst=Dst,args=Args} = I = sub(I0, Sub0), + case all_same(Args) of + true -> + %% Eliminate the phi node if there is just one source + %% value or if the values are identical. + [{Val,_}|_] = Args, + Sub = Sub0#{Dst=>Val}, + misc_opt_is(Is, Sub, Acc); + false -> + misc_opt_is(Is, Sub0, [I|Acc]) + end; +misc_opt_is([#b_set{}=I0|Is], Sub, Acc) -> + #b_set{op=Op,dst=Dst,args=Args} = I = sub(I0, Sub), + case make_literal(Op, Args) of + #b_literal{}=Literal -> + misc_opt_is(Is, Sub#{Dst=>Literal}, Acc); + error -> + misc_opt_is(Is, Sub, [I|Acc]) + end; +misc_opt_is([], Sub, Acc) -> + {reverse(Acc),Sub}. + +all_same([{H,_}|T]) -> + all(fun({E,_}) -> E =:= H end, T). + +make_literal(put_tuple, Args) -> + case make_literal_list(Args, []) of + error -> + error; + List -> + #b_literal{val=list_to_tuple(List)} + end; +make_literal(put_list, [#b_literal{val=H},#b_literal{val=T}]) -> + #b_literal{val=[H|T]}; +make_literal(_, _) -> error. + +make_literal_list([#b_literal{val=H}|T], Acc) -> + make_literal_list(T, [H|Acc]); +make_literal_list([_|_], _) -> + error; +make_literal_list([], Acc) -> + reverse(Acc). + +%%% +%%% Merge blocks. +%%% + +ssa_opt_merge_blocks(#st{ssa=Blocks}=St) -> + Preds = beam_ssa:predecessors(Blocks), + St#st{ssa=merge_blocks_1(beam_ssa:rpo(Blocks), Preds, Blocks)}. + +merge_blocks_1([L|Ls], Preds0, Blocks0) -> + case Preds0 of + #{L:=[P]} -> + #{P:=Blk0,L:=Blk1} = Blocks0, + case is_merge_allowed(L, Blk0, Blk1) of + true -> + #b_blk{is=Is0} = Blk0, + #b_blk{is=Is1} = Blk1, + Is = Is0 ++ Is1, + Blk = Blk1#b_blk{is=Is}, + Blocks1 = maps:remove(L, Blocks0), + Blocks2 = maps:put(P, Blk, Blocks1), + Successors = beam_ssa:successors(Blk), + Blocks = beam_ssa:update_phi_labels(Successors, L, P, Blocks2), + Preds = merge_update_preds(Successors, L, P, Preds0), + merge_blocks_1(Ls, Preds, Blocks); + false -> + merge_blocks_1(Ls, Preds0, Blocks0) + end; + #{} -> + merge_blocks_1(Ls, Preds0, Blocks0) + end; +merge_blocks_1([], _Preds, Blocks) -> Blocks. + +merge_update_preds([L|Ls], From, To, Preds0) -> + Ps = [rename_label(P, From, To) || P <- maps:get(L, Preds0)], + Preds = maps:put(L, Ps, Preds0), + merge_update_preds(Ls, From, To, Preds); +merge_update_preds([], _, _, Preds) -> Preds. + +rename_label(From, From, To) -> To; +rename_label(Lbl, _, _) -> Lbl. + +is_merge_allowed(_, _, #b_blk{is=[#b_set{op=peek_message}|_]}) -> + false; +is_merge_allowed(L, Blk0, #b_blk{}) -> + case beam_ssa:successors(Blk0) of + [L] -> true; + [_|_] -> false + end. + +%%% +%%% When a tuple is matched, the pattern matching compiler generates a +%%% get_tuple_element instruction for every tuple element that will +%%% ever be used in the rest of the function. That often forces the +%%% extracted tuple elements to be stored in Y registers until it's +%%% time to use them. It could also mean that there could be execution +%%% paths that will never use the extracted elements. +%%% +%%% This optimization will sink get_tuple_element instructions, that +%%% is, move them forward in the execution stream to the last possible +%%% block there they will still dominate all uses. That may reduce the +%%% size of stack frames, reduce register shuffling, and avoid +%%% extracting tuple elements on execution paths that never use the +%%% extracted values. +%%% + +ssa_opt_sink(#st{ssa=Blocks0}=St) -> + Linear = beam_ssa:linearize(Blocks0), + + %% Create a map with all variables that define get_tuple_element + %% instructions. The variable name map to the block it is defined in. + Defs = maps:from_list(def_blocks(Linear)), + + %% Now find all the blocks that use variables defined by get_tuple_element + %% instructions. + Used = used_blocks(Linear, Defs, []), + + %% Calculate dominators. + Dom0 = beam_ssa:dominators(Blocks0), + + %% It is not safe to move get_tuple_element instructions to blocks + %% that begin with certain instructions. It is also unsafe to move + %% the instructions into any part of a receive. To avoid such + %% unsafe moves, pretend that the unsuitable blocks are not + %% dominators. + Unsuitable = unsuitable(Linear, Blocks0), + Dom = case gb_sets:is_empty(Unsuitable) of + true -> + Dom0; + false -> + F = fun(_, DomBy) -> + [L || L <- DomBy, + not gb_sets:is_element(L, Unsuitable)] + end, + maps:map(F, Dom0) + end, + + %% Calculate new positions for get_tuple_element instructions. The new + %% position is a block that dominates all uses of the variable. + DefLoc = new_def_locations(Used, Defs, Dom), + + %% Now move all suitable get_tuple_element instructions to their + %% new blocks. + Blocks = foldl(fun({V,To}, A) -> + From = maps:get(V, Defs), + move_defs(V, From, To, A) + end, Blocks0, DefLoc), + St#st{ssa=Blocks}. + +def_blocks([{L,#b_blk{is=Is}}|Bs]) -> + def_blocks_is(Is, L, def_blocks(Bs)); +def_blocks([]) -> []. + +def_blocks_is([#b_set{op=get_tuple_element,dst=#b_var{name=Dst}}|Is], L, Acc) -> + def_blocks_is(Is, L, [{Dst,L}|Acc]); +def_blocks_is([_|Is], L, Acc) -> + def_blocks_is(Is, L, Acc); +def_blocks_is([], _, Acc) -> Acc. + +used_blocks([{L,Blk}|Bs], Def, Acc0) -> + Used = beam_ssa:used(Blk), + Acc = [{V,L} || V <- Used, maps:is_key(V, Def)] ++ Acc0, + used_blocks(Bs, Def, Acc); +used_blocks([], _Def, Acc) -> + rel2fam(Acc). + +%% unsuitable(Linear, Blocks) -> Unsuitable. +%% Return an ordset of block labels for the blocks that are not +%% suitable for sinking of get_tuple_element instructions. + +unsuitable(Linear, Blocks) -> + Predecessors = beam_ssa:predecessors(Blocks), + Unsuitable0 = unsuitable_1(Linear), + Unsuitable1 = unsuitable_recv(Linear, Blocks, Predecessors), + gb_sets:from_list(Unsuitable0 ++ Unsuitable1). + +unsuitable_1([{L,#b_blk{is=[#b_set{op=Op}|_]}}|Bs]) -> + Unsuitable = case Op of + bs_extract -> true; + bs_put -> true; + {float,_} -> true; + landingpad -> true; + peek_message -> true; + wait_timeout -> true; + _ -> false + end, + case Unsuitable of + true -> + [L|unsuitable_1(Bs)]; + false -> + unsuitable_1(Bs) + end; +unsuitable_1([{_,#b_blk{}}|Bs]) -> + unsuitable_1(Bs); +unsuitable_1([]) -> []. + +unsuitable_recv([{L,#b_blk{is=[#b_set{op=Op}|_]}}|Bs], Blocks, Predecessors) -> + Ls = case Op of + remove_message -> + unsuitable_loop(L, Blocks, Predecessors); + recv_next -> + unsuitable_loop(L, Blocks, Predecessors); + _ -> + [] + end, + Ls ++ unsuitable_recv(Bs, Blocks, Predecessors); +unsuitable_recv([_|Bs], Blocks, Predecessors) -> + unsuitable_recv(Bs, Blocks, Predecessors); +unsuitable_recv([], _, _) -> []. + +unsuitable_loop(L, Blocks, Predecessors) -> + unsuitable_loop(L, Blocks, Predecessors, []). + +unsuitable_loop(L, Blocks, Predecessors, Acc) -> + Ps = maps:get(L, Predecessors), + unsuitable_loop_1(Ps, Blocks, Predecessors, Acc). + +unsuitable_loop_1([P|Ps], Blocks, Predecessors, Acc0) -> + case maps:get(P, Blocks) of + #b_blk{is=[#b_set{op=peek_message}|_]} -> + unsuitable_loop_1(Ps, Blocks, Predecessors, Acc0); + #b_blk{} -> + case ordsets:is_element(P, Acc0) of + false -> + Acc1 = ordsets:add_element(P, Acc0), + Acc = unsuitable_loop(P, Blocks, Predecessors, Acc1), + unsuitable_loop_1(Ps, Blocks, Predecessors, Acc); + true -> + unsuitable_loop_1(Ps, Blocks, Predecessors, Acc0) + end + end; +unsuitable_loop_1([], _, _, Acc) -> Acc. + +%% new_def_locations([{Variable,[UsedInBlock]}|Vs], Defs, Dominators) -> +%% [{Variable,NewDefinitionBlock}] +%% Calculate new locations for get_tuple_element instructions. For each +%% variable, the new location is a block that dominates all uses of +%% variable and as near to the uses of as possible. If no such block +%% distinct from the block where the instruction currently is, the +%% variable will not be included in the result list. + +new_def_locations([{V,UsedIn}|Vs], Defs, Dom) -> + DefIn = maps:get(V, Defs), + case common_dom(UsedIn, DefIn, Dom) of + [] -> + new_def_locations(Vs, Defs, Dom); + [_|_]=BetterDef -> + L = most_dominated(BetterDef, Dom), + [{V,L}|new_def_locations(Vs, Defs, Dom)] + end; +new_def_locations([], _, _) -> []. + +common_dom([L|Ls], DefIn, Dom) -> + DomBy0 = maps:get(L, Dom), + DomBy = ordsets:subtract(DomBy0, maps:get(DefIn, Dom)), + common_dom_1(Ls, Dom, DomBy). + +common_dom_1(_, _, []) -> + []; +common_dom_1([L|Ls], Dom, [_|_]=DomBy0) -> + DomBy1 = maps:get(L, Dom), + DomBy = ordsets:intersection(DomBy0, DomBy1), + common_dom_1(Ls, Dom, DomBy); +common_dom_1([], _, DomBy) -> DomBy. + +most_dominated([L|Ls], Dom) -> + most_dominated(Ls, L, maps:get(L, Dom), Dom). + +most_dominated([L|Ls], L0, DomBy, Dom) -> + case member(L, DomBy) of + true -> + most_dominated(Ls, L0, DomBy, Dom); + false -> + most_dominated(Ls, L, maps:get(L, Dom), Dom) + end; +most_dominated([], L, _, _) -> L. + + +%% Move get_tuple_element instructions to their new locations. + +move_defs(V, From, To, Blocks) -> + #{From:=FromBlk0,To:=ToBlk0} = Blocks, + {Def,FromBlk} = remove_def(V, FromBlk0), + try insert_def(V, Def, ToBlk0) of + ToBlk -> + %%io:format("~p: ~p => ~p\n", [V,From,To]), + Blocks#{From:=FromBlk,To:=ToBlk} + catch + throw:not_possible -> + Blocks + end. + +remove_def(V, #b_blk{is=Is0}=Blk) -> + {Def,Is} = remove_def_is(Is0, V, []), + {Def,Blk#b_blk{is=Is}}. + +remove_def_is([#b_set{dst=#b_var{name=Dst}}=Def|Is], Dst, Acc) -> + {Def,reverse(Acc, Is)}; +remove_def_is([I|Is], Dst, Acc) -> + remove_def_is(Is, Dst, [I|Acc]). + +insert_def(V, Def, #b_blk{is=Is0}=Blk) -> + Is = insert_def_is(Is0, V, Def), + Blk#b_blk{is=Is}. + +insert_def_is([#b_set{op=phi}=I|Is], V, Def) -> + case member(V, beam_ssa:used(I)) of + true -> + throw(not_possible); + false -> + [I|insert_def_is(Is, V, Def)] + end; +insert_def_is([#b_set{op=Op}=I|Is]=Is0, V, Def) -> + Action0 = case Op of + call -> beyond; + 'catch_end' -> beyond; + set_tuple_element -> beyond; + timeout -> beyond; + _ -> here + end, + Action = case Is of + [#b_set{op=succeeded}|_] -> here; + _ -> Action0 + end, + case Action of + beyond -> + case member(V, beam_ssa:used(I)) of + true -> + %% The variable is used by this instruction. We must + %% place the definition before this instruction. + [Def|Is0]; + false -> + %% Place it beyond the current instruction. + [I|insert_def_is(Is, V, Def)] + end; + here -> + [Def|Is0] + end; +insert_def_is([], _V, Def) -> + [Def]. + + +%%% +%%% Common utilities. +%%% + +rel2fam(S0) -> + S1 = sofs:relation(S0), + S = sofs:rel2fam(S1), + sofs:to_external(S). + +sub(#b_set{op=phi,args=Args}=I, Sub) -> + I#b_set{args=[{sub_arg(A, Sub),P} || {A,P} <- Args]}; +sub(#b_set{args=Args}=I, Sub) -> + I#b_set{args=[sub_arg(A, Sub) || A <- Args]}; +sub(#b_br{bool=#b_var{}=Old}=Br, Sub) -> + New = sub_arg(Old, Sub), + Br#b_br{bool=New}; +sub(#b_switch{arg=#b_var{}=Old}=Sw, Sub) -> + New = sub_arg(Old, Sub), + Sw#b_switch{arg=New}; +sub(#b_ret{arg=#b_var{}=Old}=Ret, Sub) -> + New = sub_arg(Old, Sub), + Ret#b_ret{arg=New}; +sub(Last, _) -> Last. + +sub_arg(#b_remote{mod=Mod,name=Name}=Rem, Sub) -> + Rem#b_remote{mod=sub_arg(Mod, Sub),name=sub_arg(Name, Sub)}; +sub_arg(Old, Sub) -> + case Sub of + #{Old:=New} -> New; + #{} -> Old + end. diff --git a/lib/compiler/src/beam_ssa_pp.erl b/lib/compiler/src/beam_ssa_pp.erl new file mode 100644 index 0000000000..9daa2c2523 --- /dev/null +++ b/lib/compiler/src/beam_ssa_pp.erl @@ -0,0 +1,238 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2018. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +-module(beam_ssa_pp). + +-export([format_function/1,format_instr/1,format_var/1]). + +-include("beam_ssa.hrl"). + +-spec format_function(beam_ssa:b_function()) -> iolist(). + +format_function(#b_function{anno=Anno0,args=Args, + bs=Blocks,cnt=Counter}) -> + #{func_info:={M,F,_}} = Anno0, + Anno = maps:without([func_info,location,live_intervals,registers], Anno0), + FuncAnno = case Anno0 of + #{live_intervals:=Intervals} -> + Anno0#{live_intervals:=maps:from_list(Intervals)}; + #{} -> + Anno0 + end, + ReachableBlocks = beam_ssa:rpo(Blocks), + All = maps:keys(Blocks), + Unreachable = ordsets:subtract(ordsets:from_list(All), + ordsets:from_list(ReachableBlocks)), + [case Anno0 of + #{location:={Filename,Line}} -> + io_lib:format("%% ~ts:~p\n", [Filename,Line]); + #{} -> + [] + end, + io_lib:format("%% Counter = ~p\n", [Counter]), + [format_anno(Key, Value) || + {Key,Value} <- lists:sort(maps:to_list(Anno))], + io_lib:format("function ~p:~p(~ts) {\n", [M,F,format_args(Args, FuncAnno)]), + [format_live_interval(Var, FuncAnno) || Var <- Args], + format_blocks(ReachableBlocks, Blocks, FuncAnno), + case Unreachable of + [] -> + []; + [_|_] -> + ["\n%% Unreachable blocks\n\n", + format_blocks(Unreachable, Blocks, FuncAnno)] + end, + + "}\n"]. + + +-spec format_instr(beam_ssa:b_set()) -> iolist(). + +format_instr(#b_set{}=I) -> + Cs = lists:flatten(format_instr(I#b_set{anno=#{}}, #{}, true)), + string:trim(Cs, leading); +format_instr(I0) -> + I = setelement(2, I0, #{}), + Cs = lists:flatten(format_terminator(I, #{})), + string:trim(Cs, both). + +-spec format_var(beam_ssa:b_var()) -> iolist(). + +format_var(V) -> + Cs = lists:flatten(format_var(V, #{})), + string:trim(Cs, leading). + +%%% +%%% Local functions. +%%% + +format_anno(Key, Map) when is_map(Map) -> + Sorted = lists:sort(maps:to_list(Map)), + [io_lib:format("%% ~s:\n", [Key]), + [io_lib:format("%% ~w => ~w\n", [K,V]) || {K,V} <- Sorted]]; +format_anno(Key, Value) -> + io_lib:format("%% ~s: ~p\n", [Key,Value]). + +format_blocks(Ls, Blocks, Anno) -> + PP = [format_block(L, Blocks, Anno) || L <- Ls], + lists:join($\n, PP). + +format_block(L, Blocks, FuncAnno) -> + #b_blk{anno=Anno,is=Is,last=Last} = maps:get(L, Blocks), + [case map_size(Anno) of + 0 -> []; + _ -> io_lib:format("%% ~p\n", [Anno]) + end, + io_lib:format("~p:", [L]), + format_instrs(Is, FuncAnno, true), + $\n, + format_terminator(Last, FuncAnno)]. + +format_instrs([I|Is], FuncAnno, First) -> + [$\n, + format_instr(I, FuncAnno, First), + format_instrs(Is, FuncAnno, false)]; +format_instrs([], _FuncAnno, _First) -> + []. + +format_instr(#b_set{anno=Anno,op=Op,dst=Dst,args=Args}, + FuncAnno, First) -> + AnnoStr = format_anno(Anno), + LiveIntervalStr = format_live_interval(Dst, FuncAnno), + [if + First -> + []; + AnnoStr =/= []; LiveIntervalStr =/= [] -> + $\n; + true -> + [] + end, + AnnoStr, + LiveIntervalStr, + io_lib:format(" ~s~ts = ~ts", [format_i_number(Anno), + format_var(Dst, FuncAnno), + format_op(Op)]), + case Args of + [] -> + []; + [_|_] -> + io_lib:format(" ~ts", [format_args(Args, FuncAnno)]) + end]. + +format_i_number(#{n:=N}) -> + io_lib:format("[~p] ", [N]); +format_i_number(#{}) -> []. + +format_terminator(#b_br{anno=A,bool=#b_literal{val=true},succ=Lbl}, _) -> + io_lib:format(" ~sbr label ~p\n", [format_i_number(A),Lbl]); +format_terminator(#b_br{anno=A,bool=#b_literal{val=false},fail=Lbl}, _) -> + io_lib:format(" ~sbr label ~p\n", [format_i_number(A),Lbl]); +format_terminator(#b_br{anno=A,bool=Bool,succ=Succ,fail=Fail}, FuncAnno) -> + io_lib:format(" ~sbr ~ts, label ~p, label ~p\n", + [format_i_number(A),format_arg(Bool, FuncAnno),Succ,Fail]); +format_terminator(#b_switch{anno=A,arg=Arg,fail=Fail,list=List}, FuncAnno) -> + io_lib:format(" ~sswitch ~ts, label ~p, ~ts\n", + [format_i_number(A),format_arg(Arg, FuncAnno),Fail, + format_list(List,FuncAnno)]); +format_terminator(#b_ret{anno=A,arg=Arg}, FuncAnno) -> + io_lib:format(" ~sret ~ts\n", [format_i_number(A),format_arg(Arg, FuncAnno)]). + +format_op({Prefix,Name}) -> + io_lib:format("~p:~p", [Prefix,Name]); +format_op(Name) -> + io_lib:format("~p", [Name]). + +format_register(#b_var{name=V}, #{registers:=Regs}) -> + {Tag,N} = maps:get(V, Regs), + io_lib:format("~p~p", [Tag,N]); +format_register(_, #{}) -> "". + +format_var(Var, FuncAnno) -> + VarString = format_var_1(Var), + case format_register(Var, FuncAnno) of + [] -> VarString; + [_|_]=Reg -> [Reg,$/,VarString] + end. + +format_var_1(#b_var{name={Name,Uniq}}) -> + if + is_atom(Name) -> + io_lib:format("~ts:~p", [Name,Uniq]); + is_integer(Name) -> + io_lib:format("_~p:~p", [Name,Uniq]) + end; +format_var_1(#b_var{name=Name}) when is_atom(Name) -> + atom_to_list(Name); +format_var_1(#b_var{name=Name}) when is_integer(Name) -> + "_"++integer_to_list(Name). + +format_args(Args, FuncAnno) -> + Ss = [format_arg(Arg, FuncAnno) || Arg <- Args], + lists:join(", ", Ss). + +format_arg(#b_var{}=Arg, FuncAnno) -> + format_var(Arg, FuncAnno); +format_arg(#b_literal{val=Val}, _FuncAnno) -> + io_lib:format("literal ~p", [Val]); +format_arg(#b_remote{mod=Mod,name=Name,arity=Arity}, FuncAnno) -> + io_lib:format("remote (~ts):(~ts)/~p", + [format_arg(Mod, FuncAnno),format_arg(Name, FuncAnno),Arity]); +format_arg(#b_local{name=Name,arity=Arity}, FuncAnno) -> + io_lib:format("local ~ts/~p", [format_arg(Name, FuncAnno),Arity]); +format_arg({Value,Label}, FuncAnno) when is_integer(Label) -> + io_lib:format("{ ~ts, ~p }", [format_arg(Value, FuncAnno),Label]); +format_arg(Other, _) -> + io_lib:format("*** ~p ***", [Other]). + +format_list(List, FuncAnno) -> + Ss = [io_lib:format("{ ~ts, ~ts }", [format_arg(Val, FuncAnno),format_label(L)]) || + {Val,L} <- List], + io_lib:format("[ ~ts ]", [lists:join(", ", Ss)]). + +format_label(L) -> + ["label ",integer_to_list(L)]. + +format_anno(#{n:=_}=Anno) -> + format_anno(maps:remove(n, Anno)); +format_anno(#{location:={File,Line}}=Anno0) -> + Anno = maps:remove(location, Anno0), + [io_lib:format(" %% ~ts:~p\n", [File,Line])|format_anno_1(Anno)]; +format_anno(Anno) -> + format_anno_1(Anno). + +format_anno_1(Anno) -> + case map_size(Anno) of + 0 -> + []; + _ -> + [io_lib:format(" %% Anno: ~p\n", [Anno])] + end. + +format_live_interval(#b_var{name=V}=Dst, #{live_intervals:=Intervals}) -> + case Intervals of + #{V:=Rs0} -> + Rs1 = [io_lib:format("~p..~p", [Start,End]) || + {Start,End} <- Rs0], + Rs = lists:join(" ", Rs1), + io_lib:format(" %% ~ts: ~s\n", [format_var_1(Dst),Rs]); + #{} -> + [] + end; +format_live_interval(_, _) -> []. + diff --git a/lib/compiler/src/beam_ssa_pre_codegen.erl b/lib/compiler/src/beam_ssa_pre_codegen.erl new file mode 100644 index 0000000000..bbc3739eb5 --- /dev/null +++ b/lib/compiler/src/beam_ssa_pre_codegen.erl @@ -0,0 +1,2437 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2018. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +%% Purpose: Prepare for code generation, including register allocation. +%% +%% The output of this compiler pass is still in the SSA format, but +%% it has been annotated and transformed to help the code generator. +%% +%% * Some instructions are translated to other instructions closer to +%% the BEAM instructions. For example, the put_tuple instruction is +%% broken apart into the put_tuple_arity and put_tuple_elements +%% instructions. Similary, the binary matching instructions are +%% transformed from the optimization-friendly internal format to +%% instruction more similar to the actual BEAM instructions. +%% +%% * Blocks that will need an instruction for allocating a stack frame +%% are annotated with a {frame_size,Size} annotation. +%% +%% * 'copy' instructions are added for all variables that need +%% to be saved to the stack frame. Additional 'copy' instructions +%% can be added as an optimization to reuse y registers (see +%% the copy_retval sub pass). +%% +%% * Each function is annotated with a {register,RegisterMap} +%% annotation that maps each variable to a BEAM register. The linear +%% scan algorithm is used to allocate registers. +%% +%% There are four kind of registers. x, y, fr (floating point register), +%% and z. A variable will be allocated to a z register if it is only +%% used by the instruction following the instruction that defines the +%% the variable. The code generator will typically combine those +%% instructions to a test instruction. z registers are also used for +%% some instructions that don't have a return value. +%% +%% References: +%% +%% [1] H. Mössenböck and M. Pfeiffer. Linear scan register allocation +%% in the context of SSA form and register constraints. In Proceedings +%% of the International Conference on Compiler Construction, pages +%% 229–246. LNCS 2304, Springer-Verlag, 2002. +%% +%% [2] C. Wimmer and H. Mössenböck. Optimized interval splitting in a +%% linear scan register allocator. In Proceedings of the ACM/USENIX +%% International Conference on Virtual Execution Environments, pages +%% 132–141. ACM Press, 2005. +%% +%% [3] C. Wimmer and M. Franz. Linear Scan Register Allocation on SSA +%% Form. In Proceedings of the International Symposium on Code +%% Generation and Optimization, pages 170-179. ACM Press, 2010. +%% + +-module(beam_ssa_pre_codegen). + +-export([module/2]). + +-include("beam_ssa.hrl"). + +-import(lists, [all/2,any/2,append/1,duplicate/2, + foldl/3,last/1,map/2,member/2,partition/2, + reverse/1,reverse/2,sort/1,zip/2]). + +-spec module(beam_ssa:b_module(), [compile:option()]) -> + {'ok',beam_ssa:b_module()}. + +module(#b_module{body=Fs0}=Module, Opts) -> + ExtraAnnos = proplists:get_bool(dprecg, Opts), + Ps = passes(ExtraAnnos), + Fs = functions(Fs0, Ps), + {ok,Module#b_module{body=Fs}}. + +functions([F|Fs], Ps) -> + [function(F, Ps)|functions(Fs, Ps)]; +functions([], _Ps) -> []. + +-type b_var() :: beam_ssa:b_var(). +-type var_name() :: beam_ssa:var_name(). +-type instr_number() :: pos_integer(). +-type range() :: {instr_number(),instr_number()}. +-type reg_num() :: beam_asm:reg_num(). +-type xreg() :: {'x',reg_num()}. +-type yreg() :: {'y',reg_num()}. +-type ypool() :: {'y',beam_ssa:label()}. +-type reservation() :: 'fr' | {'prefer',xreg()} | 'x' | {'x',xreg()} | + ypool() | {yreg(),ypool()} | 'z'. +-type ssa_register() :: beam_ssa_codegen:ssa_register(). + +-define(TC(Body), tc(fun() -> Body end, ?FILE, ?LINE)). +-record(st, {ssa :: beam_ssa:block_map(), + args :: [b_var()], + cnt :: beam_ssa:label(), + frames=[] :: [beam_ssa:label()], + intervals=[] :: [{b_var(),[range()]}], + aliases=[] :: [{b_var(),b_var()}], + res=[] :: [{b_var(),reservation()}] | #{b_var():=reservation()}, + regs=#{} :: #{b_var():=ssa_register()}, + extra_annos=[] :: [{atom(),term()}] + }). +-define(PASS(N), {N,fun N/1}). + +passes(ExtraAnnos) -> + Ps = [?PASS(assert_no_critical_edges), + + %% Preliminaries. + ?PASS(fix_bs), + ?PASS(sanitize), + ?PASS(fix_tuples), + ?PASS(place_frames), + ?PASS(fix_receives), + + %% Find and reserve Y registers. + ?PASS(find_yregs), + ?PASS(reserve_yregs), + + %% Improve reuse of Y registers to potentially + %% reduce the size of the stack frame. + ?PASS(copy_retval), + ?PASS(opt_get_list), + + %% Calculate live intervals. + ?PASS(number_instructions), + ?PASS(live_intervals), + ?PASS(remove_unsuitable_aliases), + ?PASS(reserve_regs), + ?PASS(merge_intervals), + + %% If needed for a .precg file, save the live intervals + %% so they can be included in an annotation. + case ExtraAnnos of + false -> ignore; + true -> ?PASS(save_live_intervals) + end, + + %% Allocate registers. + ?PASS(linear_scan), + ?PASS(fix_aliased_regs), + ?PASS(frame_size), + ?PASS(turn_yregs)], + case ExtraAnnos of + false -> [P || P <- Ps, P =/= ignore]; + true -> Ps + end. + +function(#b_function{anno=Anno,args=Args,bs=Blocks0,cnt=Count0}=F0, Ps) -> + try + St0 = #st{ssa=Blocks0,args=Args,cnt=Count0}, + St = compile:run_sub_passes(Ps, St0), + #st{ssa=Blocks,cnt=Count,regs=Regs,extra_annos=ExtraAnnos} = St, + F1 = add_extra_annos(F0, ExtraAnnos), + F = beam_ssa:add_anno(registers, Regs, F1), + F#b_function{bs=Blocks,cnt=Count} + catch + Class:Error:Stack -> + #{func_info:={_,Name,Arity}} = Anno, + io:fwrite("Function: ~w/~w\n", [Name,Arity]), + erlang:raise(Class, Error, Stack) + end. + +save_live_intervals(#st{intervals=Intervals}=St) -> + St#st{extra_annos=[{live_intervals,Intervals}]}. + +fix_aliased_regs(#st{aliases=Aliases,regs=Regs}=St) -> + St#st{regs=fix_aliased_regs(Aliases, Regs)}. + +fix_aliased_regs([{Alias,V}|Aliases], Regs) -> + #{V:=Reg} = Regs, + fix_aliased_regs(Aliases, Regs#{Alias=>Reg}); +fix_aliased_regs([], Regs) -> Regs. + +%% Add extra annotations when a .precg listing file is being produced. +add_extra_annos(F, Annos) -> + foldl(fun({Name,Value}, Acc) -> + beam_ssa:add_anno(Name, Value, Acc) + end, F, Annos). + +%% assert_no_critical_edges(St0) -> St. +%% The code generator will not work if there are critial edges. +%% Abort if any critical edges are found. + +assert_no_critical_edges(#st{ssa=Blocks}=St) -> + F = fun assert_no_ces/3, + beam_ssa:fold_rpo(F, Blocks, Blocks), + St. + +assert_no_ces(_, #b_blk{is=[#b_set{op=phi,args=[_,_]=Phis}|_]}, Blocks) -> + %% This block has multiple predecessors. Make sure that none + %% of the precessors have more than one successor. + true = all(fun({_,P}) -> + length(beam_ssa:successors(P, Blocks)) =:= 1 + end, Phis), %Assertion. + Blocks; +assert_no_ces(_, _, Blocks) -> Blocks. + +%% fix_bs(St0) -> St. +%% Fix up the binary matching instructions: +%% +%% * Insert bs_save and bs_restore instructions where needed. +%% +%% * Combine bs_match and bs_extract instructions to bs_get +%% instructions. + +fix_bs(#st{ssa=Blocks,cnt=Count0}=St) -> + F = fun(#b_set{op=bs_start_match,dst=Dst}, A) -> + %% Mark the root of the match context list. + [{Dst,{context,Dst}}|A]; + (#b_set{op=bs_match,dst=Dst,args=[_,ParentCtx|_]}, A) -> + %% Link this match context the previous match context. + [{Dst,ParentCtx}|A]; + (_, A) -> + A + end, + case beam_ssa:fold_instrs_rpo(F, [0], [],Blocks) of + [] -> + %% No binary matching in this function. + St; + [_|_]=M -> + CtxChain = maps:from_list(M), + Linear0 = beam_ssa:linearize(Blocks), + + %% Insert bs_save / bs_restore instructions where needed. + {Linear1,Count} = bs_save_restore(Linear0, CtxChain, Count0), + + %% Rename instructions. + Linear = bs_instrs(Linear1, CtxChain, []), + + St#st{ssa=maps:from_list(Linear),cnt=Count} + end. + + +%% Insert bs_save and bs_restore instructions as needed. + +bs_save_restore(Linear0, CtxChain, Count0) -> + Rs0 = bs_restores(Linear0, CtxChain, #{}, #{}), + Rs = maps:values(Rs0), + S0 = sofs:relation(Rs, [{context,save_point}]), + S1 = sofs:relation_to_family(S0), + S = sofs:to_external(S1), + Slots = make_save_point_dict(S, []), + {Saves,Count1} = make_save_map(Rs, Slots, Count0, []), + {Restores,Count} = make_restore_map(maps:to_list(Rs0), Slots, Count1, []), + + %% Now insert all saves and restores. + {bs_insert(Linear0, Saves, Restores, Slots),Count}. + +make_save_map([{Ctx,Save}=Ps|T], Slots, Count, Acc) -> + Ignored = #b_var{name={'@ssa_ignored',Count}}, + case make_slot(Ps, Slots) of + #b_literal{val=start} -> + make_save_map(T, Slots, Count, Acc); + Slot -> + I = #b_set{op=bs_save,dst=Ignored,args=[Ctx,Slot]}, + make_save_map(T, Slots, Count+1, [{Save,I}|Acc]) + end; +make_save_map([], _, Count, Acc) -> + {maps:from_list(Acc),Count}. + +make_restore_map([{Bef,{Ctx,_}=Ps}|T], Slots, Count, Acc) -> + Ignored = #b_var{name={'@ssa_ignored',Count}}, + I = #b_set{op=bs_restore,dst=Ignored,args=[Ctx,make_slot(Ps, Slots)]}, + make_restore_map(T, Slots, Count+1, [{Bef,I}|Acc]); +make_restore_map([], _, Count, Acc) -> + {maps:from_list(Acc),Count}. + +make_slot({Same,Same}, _Slots) -> + #b_literal{val=start}; +make_slot({_,_}=Ps, Slots) -> + #b_literal{val=maps:get(Ps, Slots)}. + +make_save_point_dict([{Ctx,Pts}|T], Acc0) -> + Acc = make_save_point_dict_1(Pts, Ctx, 0, Acc0), + make_save_point_dict(T, Acc); +make_save_point_dict([], Acc) -> + maps:from_list(Acc). + +make_save_point_dict_1([Ctx|T], Ctx, I, Acc) -> + %% Special {atom,start} save point. Does not need a + %% bs_save instruction. + make_save_point_dict_1(T, Ctx, I, Acc); +make_save_point_dict_1([H|T], Ctx, I, Acc) -> + make_save_point_dict_1(T, Ctx, I+1, [{{Ctx,H},I}|Acc]); +make_save_point_dict_1([], Ctx, I, Acc) -> + [{Ctx,I}|Acc]. + +bs_restores([{L,#b_blk{is=Is,last=Last}}|Bs], CtxChain, D0, Rs0) -> + FPos = case D0 of + #{L:=Pos0} -> Pos0; + #{} -> #{} + end, + {SPos,Rs} = bs_restores_is(Is, CtxChain, FPos, Rs0), + D = bs_update_successors(Last, SPos, FPos, D0), + bs_restores(Bs, CtxChain, D, Rs); +bs_restores([], _, _, Rs) -> Rs. + +bs_update_successors(#b_br{succ=Succ,fail=Fail}, SPos, FPos, D) -> + join_positions([{Succ,SPos},{Fail,FPos}], D); +bs_update_successors(#b_switch{fail=Fail,list=List}, SPos, FPos, D) -> + SPos = FPos, %Assertion. + Update = [{L,SPos} || {_,L} <- List] ++ [{Fail,SPos}], + join_positions(Update, D); +bs_update_successors(#b_ret{}, _, _, D) -> D. + +join_positions([{L,MapPos0}|T], D) -> + case D of + #{L:=MapPos0} -> + %% Same map. + join_positions(T, D); + #{L:=MapPos1} -> + %% Different maps. + MapPos = join_positions_1(MapPos0, MapPos1), + join_positions(T, D#{L:=MapPos}); + #{} -> + join_positions(T, D#{L=>MapPos0}) + end; +join_positions([], D) -> D. + +join_positions_1(MapPos0, MapPos1) -> + MapPos2 = maps:map(fun(Start, Pos) -> + case MapPos0 of + #{Start:=Pos} -> Pos; + #{Start:=_} -> unknown; + #{} -> Pos + end + end, MapPos1), + maps:merge(MapPos0, MapPos2). + +bs_restores_is([#b_set{op=bs_start_match,dst=Start}|Is], + CtxChain, PosMap0, Rs) -> + PosMap = PosMap0#{Start=>Start}, + bs_restores_is(Is, CtxChain, PosMap, Rs); +bs_restores_is([#b_set{op=bs_match,dst=NewPos,args=Args}=I|Is], + CtxChain, PosMap0, Rs0) -> + Start = bs_subst_ctx(NewPos, CtxChain), + [_,FromPos|_] = Args, + case PosMap0 of + #{Start:=FromPos} -> + %% Same position, no restore needed. + PosMap = case bs_match_type(I) of + plain -> + %% Update position to new position. + PosMap0#{Start:=NewPos}; + _ -> + %% Position will not change (test_unit + %% instruction or no instruction at + %% all). + PosMap0#{Start:=FromPos} + end, + bs_restores_is(Is, CtxChain, PosMap, Rs0); + #{Start:=_} -> + %% Different positions, might need a restore instruction. + case bs_match_type(I) of + none -> + %% The tail test will be optimized away. + %% No need to do a restore. + PosMap = PosMap0#{Start:=FromPos}, + bs_restores_is(Is, CtxChain, PosMap, Rs0); + test_unit -> + %% This match instruction will be replaced by + %% a test_unit instruction. We will need a + %% restore. The new position will be the position + %% restored to (NOT NewPos). + PosMap = PosMap0#{Start:=FromPos}, + Rs = Rs0#{NewPos=>{Start,FromPos}}, + bs_restores_is(Is, CtxChain, PosMap, Rs); + plain -> + %% Match or skip. Position will be changed. + PosMap = PosMap0#{Start:=NewPos}, + Rs = Rs0#{NewPos=>{Start,FromPos}}, + bs_restores_is(Is, CtxChain, PosMap, Rs) + end + end; +bs_restores_is([#b_set{op=bs_extract,args=[FromPos|_]}|Is], + CtxChain, PosMap, Rs) -> + Start = bs_subst_ctx(FromPos, CtxChain), + #{Start:=FromPos} = PosMap, %Assertion. + bs_restores_is(Is, CtxChain, PosMap, Rs); +bs_restores_is([#b_set{op=Op,dst=Dst,args=Args}|Is], + CtxChain, PosMap0, Rs0) + when Op =:= bs_test_tail; + Op =:= call -> + {Rs,PosMap} = bs_restore_args(Args, PosMap0, CtxChain, Dst, Rs0), + bs_restores_is(Is, CtxChain, PosMap, Rs); +bs_restores_is([_|Is], CtxChain, PosMap, Rs) -> + bs_restores_is(Is, CtxChain, PosMap, Rs); +bs_restores_is([], _CtxChain, PosMap, Rs) -> + {PosMap,Rs}. + + +bs_match_type(#b_set{args=[#b_literal{val=skip},_Ctx, + #b_literal{val=binary},_Flags, + #b_literal{val=all},#b_literal{val=U}]}) -> + case U of + 1 -> none; + _ -> test_unit + end; +bs_match_type(_) -> + plain. + +bs_restore_args([#b_var{}=Arg|Args], PosMap0, CtxChain, Dst, Rs0) -> + Start = bs_subst_ctx(Arg, CtxChain), + case PosMap0 of + #{Start:=Arg} -> + %% Same position, no restore needed. + bs_restore_args(Args, PosMap0, CtxChain, Dst, Rs0); + #{Start:=_} -> + %% Different positions, need a restore instruction. + PosMap = PosMap0#{Start:=Arg}, + Rs = Rs0#{Dst=>{Start,Arg}}, + bs_restore_args(Args, PosMap, CtxChain, Dst, Rs); + #{} -> + %% Not a match context. + bs_restore_args(Args, PosMap0, CtxChain, Dst, Rs0) + end; +bs_restore_args([_|Args], PosMap, CtxChain, Dst, Rs) -> + bs_restore_args(Args, PosMap, CtxChain, Dst, Rs); +bs_restore_args([], PosMap, _CtxChain, _Dst, Rs) -> + {Rs,PosMap}. + +%% Insert all bs_save and bs_restore instructions. + +bs_insert([{L,#b_blk{is=Is0}=Blk}|Bs0], Saves, Restores, Slots) -> + Is = bs_insert_is_1(Is0, Restores, Slots), + Bs = bs_insert_saves(Is, Bs0, Saves), + [{L,Blk#b_blk{is=Is}}|bs_insert(Bs, Saves, Restores, Slots)]; +bs_insert([], _, _, _) -> []. + +bs_insert_is_1([#b_set{op=Op,dst=Dst}=I0|Is], Restores, Slots) -> + if + Op =:= bs_test_tail; + Op =:= bs_match; + Op =:= call -> + Rs = case Restores of + #{Dst:=R} -> [R]; + #{} -> [] + end, + Rs ++ [I0|bs_insert_is_1(Is, Restores, Slots)]; + Op =:= bs_start_match -> + NumSlots = case Slots of + #{Dst:=NumSlots0} -> NumSlots0; + #{} -> 0 + end, + I = beam_ssa:add_anno(num_slots, NumSlots, I0), + [I|bs_insert_is_1(Is, Restores, Slots)]; + true -> + [I0|bs_insert_is_1(Is, Restores, Slots)] + end; +bs_insert_is_1([], _, _) -> []. + +bs_insert_saves([#b_set{dst=Dst}|Is], Bs, Saves) -> + case Saves of + #{Dst:=S} -> + bs_insert_save(S, Bs); + #{} -> + bs_insert_saves(Is, Bs, Saves) + end; +bs_insert_saves([], Bs, _) -> Bs. + +bs_insert_save(Save, [{L,#b_blk{is=Is0}=Blk}|Bs]) -> + Is = case Is0 of + [#b_set{op=bs_extract}=Ex|Is1] -> + [Ex,Save|Is1]; + _ -> + [Save|Is0] + end, + [{L,Blk#b_blk{is=Is}}|Bs]. + +%% Translate bs_match instructions to bs_get, bs_match_string, +%% or bs_skip. Also rename match context variables to use the +%% variable assigned to by the start_match instruction. + +bs_instrs([{L,#b_blk{is=Is0}=Blk}|Bs], CtxChain, Acc0) -> + case bs_instrs_is(Is0, CtxChain, []) of + [#b_set{op=bs_extract,dst=Dst,args=[Ctx]}|Is] -> + %% Drop this instruction. Rewrite the corresponding + %% bs_match instruction in the previous block to + %% a bs_get instruction. + Acc = bs_combine(Dst, Ctx, Acc0), + bs_instrs(Bs, CtxChain, [{L,Blk#b_blk{is=Is}}|Acc]); + Is -> + bs_instrs(Bs, CtxChain, [{L,Blk#b_blk{is=Is}}|Acc0]) + end; +bs_instrs([], _, Acc) -> + reverse(Acc). + +bs_instrs_is([#b_set{op=Op,args=Args0}=I0|Is], CtxChain, Acc) -> + Args = [bs_subst_ctx(A, CtxChain) || A <- Args0], + I1 = I0#b_set{args=Args}, + I = case {Op,Args} of + {bs_match,[#b_literal{val=skip},Ctx,Type|As]} -> + I1#b_set{op=bs_skip,args=[Type,Ctx|As]}; + {bs_match,[#b_literal{val=string},Ctx|As]} -> + I1#b_set{op=bs_match_string,args=[Ctx|As]}; + {_,_} -> + I1 + end, + bs_instrs_is(Is, CtxChain, [I|Acc]); +bs_instrs_is([], _, Acc) -> + reverse(Acc). + +%% Combine a bs_match instruction with the destination register +%% taken from a bs_extract instruction. + +bs_combine(Dst, Ctx, [{L,#b_blk{is=Is0}=Blk}|Acc]) -> + [#b_set{}=Succeeded, + #b_set{op=bs_match,args=[Type,_|As]}=BsMatch|Is1] = reverse(Is0), + Is = reverse(Is1, [BsMatch#b_set{op=bs_get,dst=Dst,args=[Type,Ctx|As]}, + Succeeded#b_set{args=[Dst]}]), + [{L,Blk#b_blk{is=Is}}|Acc]. + +bs_subst_ctx(#b_var{}=Var, CtxChain) -> + case CtxChain of + #{Var:={context,Ctx}} -> + Ctx; + #{Var:=ParentCtx} -> + bs_subst_ctx(ParentCtx, CtxChain); + #{} -> + %% Not a match context variable. + Var + end; +bs_subst_ctx(Other, _CtxChain) -> + Other. + +%% sanitize(St0) -> St. +%% Remove constructs that can cause problems later: +%% +%% * Unreachable blocks may cause problems for determination of +%% dominators. +%% +%% * Some instructions (such as get_hd) don't accept literal +%% arguments. Evaluate the instructions and remove them. + +sanitize(#st{ssa=Blocks0,cnt=Count0}=St) -> + Ls = beam_ssa:rpo(Blocks0), + {Blocks,Count} = sanitize(Ls, Count0, Blocks0, #{}), + St#st{ssa=Blocks,cnt=Count}. + +sanitize([L|Ls], Count0, Blocks0, Values0) -> + #b_blk{is=Is0} = Blk0 = maps:get(L, Blocks0), + case sanitize_is(Is0, Count0, Values0, false, []) of + no_change -> + sanitize(Ls, Count0, Blocks0, Values0); + {Is,Count,Values} -> + Blk = Blk0#b_blk{is=Is}, + Blocks = Blocks0#{L:=Blk}, + sanitize(Ls, Count, Blocks, Values) + end; +sanitize([], Count, Blocks0, Values) -> + Blocks = if + map_size(Values) =:= 0 -> + Blocks0; + true -> + beam_ssa:rename_vars(Values, [0], Blocks0) + end, + + %% Unreachable blocks can cause problems for the dominator calculations. + Ls = beam_ssa:rpo(Blocks), + Reachable = gb_sets:from_list(Ls), + {case map_size(Blocks) =:= gb_sets:size(Reachable) of + true -> Blocks; + false -> remove_unreachable(Ls, Blocks, Reachable, []) + end,Count}. + +sanitize_is([#b_set{op=get_map_element, + args=[#b_literal{}=Map,Key]}=I0|Is], + Count0, Values, _Changed, Acc) -> + {MapVarName,Count} = new_var_name('@ssa_map', Count0), + MapVar = #b_var{name=MapVarName}, + I = I0#b_set{args=[MapVar,Key]}, + Copy = #b_set{op=copy,dst=MapVar,args=[Map]}, + sanitize_is(Is, Count, Values, true, [I,Copy|Acc]); +sanitize_is([#b_set{op=Op,dst=#b_var{name=Dst},args=Args0}=I0|Is0], + Count, Values, Changed, Acc) -> + Args = map(fun(#b_var{name=V}=Var) -> + case Values of + #{V:=New} -> New; + #{} -> Var + end; + (Lit) -> Lit + end, Args0), + case sanitize_instr(Op, Args, I0) of + {value,Value0} -> + Value = #b_literal{val=Value0}, + sanitize_is(Is0, Count, Values#{Dst=>Value}, true, Acc); + {ok,I} -> + sanitize_is(Is0, Count, Values, true, [I|Acc]); + ok -> + sanitize_is(Is0, Count, Values, Changed, [I0|Acc]) + end; +sanitize_is([], Count, Values, Changed, Acc) -> + case Changed of + true -> + {reverse(Acc),Count,Values}; + false -> + no_change + end. + +sanitize_instr({bif,Bif}, [#b_literal{val=Lit}], _I) -> + case erl_bifs:is_pure(erlang, Bif, 1) of + false -> + ok; + true -> + try + {value,erlang:Bif(Lit)} + catch + error:_ -> + ok + end + end; +sanitize_instr({bif,Bif}, [#b_literal{val=Lit1},#b_literal{val=Lit2}], _I) -> + true = erl_bifs:is_pure(erlang, Bif, 2), %Assertion. + try + {value,erlang:Bif(Lit1, Lit2)} + catch + error:_ -> + ok + end; +sanitize_instr(get_hd, [#b_literal{val=[Hd|_]}], _I) -> + {value,Hd}; +sanitize_instr(get_tl, [#b_literal{val=[_|Tl]}], _I) -> + {value,Tl}; +sanitize_instr(get_tuple_element, [#b_literal{val=T}, + #b_literal{val=I}], _I) + when I < tuple_size(T) -> + {value,element(I+1, T)}; +sanitize_instr(is_nonempty_list, [#b_literal{val=Lit}], _I) -> + {value,case Lit of + [_|_] -> true; + _ -> false + end}; +sanitize_instr(is_tagged_tuple, [#b_literal{val=Tuple}, + #b_literal{val=Arity}, + #b_literal{val=Tag}], _I) + when is_integer(Arity), is_atom(Tag) -> + if + tuple_size(Tuple) =:= Arity, element(1, Tuple) =:= Tag -> + {value,true}; + true -> + {value,false} + end; +sanitize_instr(bs_init, [#b_literal{val=new},#b_literal{val=Sz}|_], I0) -> + if + is_integer(Sz), Sz >= 0 -> ok; + true -> {ok,sanitize_badarg(I0)} + end; +sanitize_instr(bs_init, [#b_literal{val=append},_,#b_literal{val=Sz}|_], I0) -> + if + is_integer(Sz), Sz >= 0 -> ok; + true -> {ok,sanitize_badarg(I0)} + end; +sanitize_instr(succeeded, [#b_literal{}], _I) -> + {value,true}; +sanitize_instr(_, _, _) -> ok. + +sanitize_badarg(I) -> + Func = #b_remote{mod=#b_literal{val=erlang}, + name=#b_literal{val=error},arity=1}, + I#b_set{op=call,args=[Func,#b_literal{val=badarg}]}. + +remove_unreachable([L|Ls], Blocks, Reachable, Acc) -> + #b_blk{is=Is0} = Blk0 = maps:get(L, Blocks), + case split_phis(Is0) of + {[_|_]=Phis,Rest} -> + Is = [prune_phi(Phi, Reachable) || Phi <- Phis] ++ Rest, + Blk = Blk0#b_blk{is=Is}, + remove_unreachable(Ls, Blocks, Reachable, [{L,Blk}|Acc]); + {[],_} -> + remove_unreachable(Ls, Blocks, Reachable, [{L,Blk0}|Acc]) + end; +remove_unreachable([], _Blocks, _, Acc) -> + maps:from_list(Acc). + +prune_phi(#b_set{args=Args0}=Phi, Reachable) -> + Args = [A || {_,Pred}=A <- Args0, + gb_sets:is_element(Pred, Reachable)], + Phi#b_set{args=Args}. + +%%% +%%% Fix tuples. +%%% + +%% fix_tuples(St0) -> St. +%% We must split tuple creation into two instruction to mirror the +%% the way tuples are created in BEAM. Each put_tuple instruction is +%% split into put_tuple_arity followed by put_tuple_elements. + +fix_tuples(#st{ssa=Blocks0,cnt=Count0}=St) -> + F = fun (#b_set{op=put_tuple,args=Args}=Put, C0) -> + Arity = #b_literal{val=length(Args)}, + {VarName,C} = new_var_name('@ssa_ignore', C0), + Ignore = #b_var{name=VarName}, + {[Put#b_set{op=put_tuple_arity,args=[Arity]}, + #b_set{dst=Ignore,op=put_tuple_elements,args=Args}],C}; + (I, C) -> {[I],C} + end, + {Blocks,Count} = beam_ssa:flatmapfold_instrs_rpo(F, [0], Count0, Blocks0), + St#st{ssa=Blocks,cnt=Count}. + +%%% +%%% Find out where frames should be placed. +%%% + +%% place_frames(St0) -> St. +%% Return a list of the labels for the blocks that need stack frame +%% allocation instructions. +%% +%% This function attempts to place stack frames as tight as possible +%% around the code, to avoid building stack frames for code paths +%% that don't need one. +%% +%% Stack frames are placed in blocks that dominate all of their +%% descendants. That guarantees that the deallocation instructions +%% cannot be reached from other execution paths that didn't set up +%% a stack frame or set up a stack frame with a different size. + +place_frames(#st{ssa=Blocks}=St) -> + Doms = beam_ssa:dominators(Blocks), + Ls = beam_ssa:rpo(Blocks), + Tried = gb_sets:empty(), + Frames0 = [], + {Frames,_} = place_frames_1(Ls, Blocks, Doms, Tried, Frames0), + St#st{frames=Frames}. + +place_frames_1([L|Ls], Blocks, Doms, Tried0, Frames0) -> + Blk = maps:get(L, Blocks), + case need_frame(Blk) of + true -> + %% This block needs a frame. Try to place it here. + {Frames,Tried} = do_place_frame(L, Blocks, Doms, Tried0, Frames0), + + %% Successfully placed. Try to place more frames in descendants + %% that are not dominated by this block. + place_frames_1(Ls, Blocks, Doms, Tried, Frames); + false -> + try + place_frames_1(Ls, Blocks, Doms, Tried0, Frames0) + catch + throw:{need_frame,For,Tried1}=Reason -> + %% An descendant block needs a stack frame. Try to + %% place it here. + case is_dominated_by(For, L, Doms) of + true -> + %% Try to place a frame here. + {Frames,Tried} = do_place_frame(L, Blocks, Doms, + Tried1, Frames0), + place_frames_1(Ls, Blocks, Doms, Tried, Frames); + false -> + %% Wrong place. This block does not dominate + %% the block that needs the frame. Pass it on + %% to our ancestors. + throw(Reason) + end + end + end; +place_frames_1([], _, _, Tried, Frames) -> + {Frames,Tried}. + +%% do_place_frame(Label, Blocks, Dominators, Tried0, Frames0) -> {Frames,Tried}. +%% Try to place a frame in this block. This function returns +%% successfully if it either succeds at placing a frame in this +%% block, if an ancestor that dominates this block has already placed +%% a frame, or if we have already tried to put a frame in this block. +%% +%% An {need_frame,Label,Tried} exception will be thrown if this block +%% block is not suitable for having a stack frame (i.e. it does not dominate +%% all of its descendants). The exception means that an ancestor will have to +%% place the frame needed by this block. + +do_place_frame(L, Blocks, Doms, Tried0, Frames) -> + case gb_sets:is_element(L, Tried0) of + true -> + %% We have already tried to put a frame in this block. + {Frames,Tried0}; + false -> + %% Try to place a frame in this block. + Tried = gb_sets:insert(L, Tried0), + case place_frame_here(L, Blocks, Doms, Frames) of + yes -> + %% We need a frame and it is safe to place it here. + {[L|Frames],Tried}; + no -> + %% An ancestor has a frame. Not needed. + {Frames,Tried}; + ancestor -> + %% This block does not dominate all of its + %% descendants. We must place the frame in + %% an ancestor. + throw({need_frame,L,Tried}) + end + end. + +%% place_frame_here(Label, Blocks, Doms, Frames) -> no|yes|ancestor. +%% Determine whether a frame should be placed in block Label. + +place_frame_here(L, Blocks, Doms, Frames) -> + B0 = any(fun(DomBy) -> + is_dominated_by(L, DomBy, Doms) + end, Frames), + case B0 of + true -> + %% This block is dominated by an ancestor block that + %% defines a frame. Not needed/allowed to put a frame + %% here. + no; + false -> + %% No frame in any ancestor. We need a frame. + %% Now check whether the frame can be placed here. + %% If this block dominates all of its descendants + %% and the predecessors of any phi nodes it can be + %% placed here. + Descendants = beam_ssa:rpo([L], Blocks), + PhiPredecessors = phi_predecessors(L, Blocks), + MustDominate = ordsets:from_list(PhiPredecessors ++ Descendants), + Dominates = all(fun(?BADARG_BLOCK) -> + %% This block defines no variables and calls + %% erlang:error(badarg). It does not matter + %% whether L dominates ?BADARG_BLOCK or not; + %% it is still safe to put the frame in L. + true; + (Bl) -> + is_dominated_by(Bl, L, Doms) + end, MustDominate), + + %% Also, this block must not be a loop header. + IsLoopHeader = is_loop_header(L, Blocks), + case Dominates andalso not IsLoopHeader of + true -> yes; + false -> ancestor + end + end. + +%% phi_predecessors(Label, Blocks) -> +%% Return all predecessors referenced in phi nodes. + +phi_predecessors(L, Blocks) -> + #b_blk{is=Is} = maps:get(L, Blocks), + [P || #b_set{op=phi,args=Args} <- Is, {_,P} <- Args]. + +%% is_dominated_by(Label, DominatedBy, Dominators) -> true|false. +%% Test whether block Label is dominated by block DominatedBy. + +is_dominated_by(L, DomBy, Doms) -> + DominatedBy = maps:get(L, Doms), + ordsets:is_element(DomBy, DominatedBy). + +%% need_frame(#b_blk{}) -> true|false. +%% Test whether any of the instructions in the block requires a stack frame. + +need_frame(#b_blk{is=Is,last=#b_ret{arg=Ret}}) -> + need_frame_1(Is, {return,Ret}); +need_frame(#b_blk{is=Is}) -> + need_frame_1(Is, body). + +need_frame_1([#b_set{op=make_fun,dst=#b_var{name=Fun}}|Is], {return,_}=Context) -> + %% Since make_fun clobbers X registers, a stack frame is needed if + %% any of the following instructions use any other variable than + %% the one holding the reference to the created fun. + need_frame_1(Is, Context) orelse + case beam_ssa:used(#b_blk{is=Is,last=#b_ret{arg=#b_var{name=Fun}}}) of + [Fun] -> false; + [_|_] -> true + end; +need_frame_1([#b_set{op=new_try_tag}|_], _) -> + true; +need_frame_1([#b_set{op=call,dst=Val}]=Is, {return,Ret}) -> + if + Val =:= Ret -> need_frame_1(Is, tail); + true -> need_frame_1(Is, body) + end; +need_frame_1([#b_set{op=call,args=[Func|_]}|Is], Context) -> + case Func of + #b_remote{mod=#b_literal{val=Mod}, + name=#b_literal{val=Name}, + arity=Arity} -> + case erl_bifs:is_exit_bif(Mod, Name, Arity) of + true -> + false; + false -> + Context =:= body orelse + Is =/= [] orelse + is_trap_bif(Mod, Name, Arity) + end; + #b_remote{} -> + %% This is an apply(), which always needs a frame. + true; + #b_var{} -> + %% A fun call always needs a frame. + true; + _ -> + Context =:= body orelse Is =/= [] + end; +need_frame_1([I|Is], Context) -> + beam_ssa:clobbers_xregs(I) orelse need_frame_1(Is, Context); +need_frame_1([], _) -> false. + +%% is_trap_bif(Mod, Name, Arity) -> true|false. +%% Test whether we need a stack frame for this BIF. + +is_trap_bif(erlang, '!', 2) -> true; +is_trap_bif(erlang, link, 1) -> true; +is_trap_bif(erlang, unlink, 1) -> true; +is_trap_bif(erlang, monitor_node, 2) -> true; +is_trap_bif(erlang, group_leader, 2) -> true; +is_trap_bif(erlang, exit, 2) -> true; +is_trap_bif(_, _, _) -> false. + +%%% +%%% Fix variables used in matching in receive. +%%% +%%% The loop_rec/2 instruction may return a reference to a +%%% message outside of any heap or heap fragment. If the message +%%% does not match, it is not allowed to store any reference to +%%% the message (or part of the message) on the stack. If we do, +%%% the message will be corrupted if there happens to be a GC. +%%% +%%% Here we make sure to introduce copies of variables that are +%%% matched out and subsequently used after the remove_message/0 +%%% instructions. That will make sure that only X registers are +%%% used during matching. +%%% +%%% Depending on where variables are defined and used, they must +%%% be handling in two different ways. +%%% +%%% Variables that are always defined in the receive (before branching +%%% out into the different clauses of the receive) and used after the +%%% receive, must be handled in the following way: Before each +%%% remove_message instruction, each such variable must be copied, and +%%% all variables must be consolidated using a phi node in the +%%% common exit block for the receive. +%%% +%%% Variables that are matched out and used in the same clause +%%% need copy instructions before the remove_message instruction +%%% in that clause. +%%% + +fix_receives(#st{ssa=Blocks0,cnt=Count0}=St) -> + {Blocks,Count} = fix_receives_1(maps:to_list(Blocks0), + Blocks0, Count0), + St#st{ssa=Blocks,cnt=Count}. + +fix_receives_1([{L,Blk}|Ls], Blocks0, Count0) -> + case Blk of + #b_blk{is=[#b_set{op=peek_message}|_]} -> + Rm = find_rm_blocks(L, Blocks0), + LoopExit = find_loop_exit(Rm, Blocks0), + Defs0 = beam_ssa:def([L], Blocks0), + CommonUsed = recv_common(Defs0, LoopExit, Blocks0), + {Blocks1,Count1} = recv_fix_common(CommonUsed, LoopExit, Rm, + Blocks0, Count0), + Defs = ordsets:subtract(Defs0, CommonUsed), + {Blocks,Count} = fix_receive(Rm, Defs, Blocks1, Count1), + fix_receives_1(Ls, Blocks, Count); + #b_blk{} -> + fix_receives_1(Ls, Blocks0, Count0) + end; +fix_receives_1([], Blocks, Count) -> + {Blocks,Count}. + +recv_common(_Defs, none, _Blocks) -> + %% There is no common exit block because receive is used + %% in the tail position of a function. + []; +recv_common(Defs, Exit, Blocks) -> + {ExitDefs,ExitUsed} = beam_ssa:def_used([Exit], Blocks), + Def = ordsets:subtract(Defs, ExitDefs), + ordsets:intersection(Def, ExitUsed). + +%% recv_fix_common([CommonVar], LoopExit, [RemoveMessageLabel], +%% Blocks0, Count0) -> {Blocks,Count}. +%% Handle variables alwys defined in a receive and used +%% in the exit block following the receive. + +recv_fix_common([Msg0|T], Exit, Rm, Blocks0, Count0) -> + {Msg1,Count1} = new_var_name('@recv', Count0), + Msg = #b_var{name=Msg1}, + Blocks1 = beam_ssa:rename_vars(#{Msg0=>Msg}, [Exit], Blocks0), + N = length(Rm), + {MsgVars0,Count} = new_var_names(duplicate(N, '@recv'), Count1), + MsgVars = [#b_var{name=V} || V <- MsgVars0], + PhiArgs = fix_exit_phi_args(MsgVars, Rm, Exit, Blocks1), + Phi = #b_set{op=phi,dst=Msg,args=PhiArgs}, + ExitBlk0 = maps:get(Exit, Blocks1), + ExitBlk = ExitBlk0#b_blk{is=[Phi|ExitBlk0#b_blk.is]}, + Blocks2 = Blocks1#{Exit:=ExitBlk}, + Blocks = recv_fix_common_1(MsgVars, Rm, Msg0, Blocks2), + recv_fix_common(T, Exit, Rm, Blocks, Count); +recv_fix_common([], _, _, Blocks, Count) -> + {Blocks,Count}. + +recv_fix_common_1([V|Vs], [Rm|Rms], Msg, Blocks0) -> + Ren = #{Msg=>V}, + Blocks1 = beam_ssa:rename_vars(Ren, [Rm], Blocks0), + #b_blk{is=Is0} = Blk0 = maps:get(Rm, Blocks1), + Copy = #b_set{op=copy,dst=V,args=[#b_var{name=Msg}]}, + Is = insert_after_phis(Is0, [Copy]), + Blk = Blk0#b_blk{is=Is}, + Blocks = Blocks1#{Rm:=Blk}, + recv_fix_common_1(Vs, Rms, Msg, Blocks); +recv_fix_common_1([], [], _Msg, Blocks) -> Blocks. + +fix_exit_phi_args([V|Vs], [Rm|Rms], Exit, Blocks) -> + Path = beam_ssa:rpo([Rm], Blocks), + Pred = exit_predecessor(Path, Exit), + [{V,Pred}|fix_exit_phi_args(Vs, Rms, Exit, Blocks)]; +fix_exit_phi_args([], [], _, _) -> []. + +exit_predecessor([Pred,Exit|_], Exit) -> + Pred; +exit_predecessor([_|Bs], Exit) -> + exit_predecessor(Bs, Exit). + +%% fix_receive([Label], Defs, Blocks0, Count0) -> {Blocks,Count}. +%% Add a copy instruction for all variables that are matched out and +%% later used within a clause of the receive. + +fix_receive([L|Ls], Defs, Blocks0, Count0) -> + {RmDefs,Used0} = beam_ssa:def_used([L], Blocks0), + Def = ordsets:subtract(Defs, RmDefs), + Used = ordsets:intersection(Def, Used0), + {NewVs,Count} = new_var_names(Used, Count0), + NewVars = [#b_var{name=V} || V <- NewVs], + Ren = zip(Used, NewVars), + Blocks1 = beam_ssa:rename_vars(Ren, [L], Blocks0), + #b_blk{is=Is0} = Blk1 = maps:get(L, Blocks1), + CopyIs = [#b_set{op=copy,dst=New,args=[#b_var{name=Old}]} || + {Old,New} <- Ren], + Is = insert_after_phis(Is0, CopyIs), + Blk = Blk1#b_blk{is=Is}, + Blocks = maps:put(L, Blk, Blocks1), + fix_receive(Ls, Defs, Blocks, Count); +fix_receive([], _Defs, Blocks, Count) -> + {Blocks,Count}. + +%% find_loop_exit([Label], Blocks) -> Label | none. +%% Find the block to which control is transferred when the +%% the receive loop is exited. + +find_loop_exit([L1,L2|_Ls], Blocks) -> + Path1 = beam_ssa:rpo([L1], Blocks), + Path2 = beam_ssa:rpo([L2], Blocks), + find_loop_exit_1(reverse(Path1), reverse(Path2), none); +find_loop_exit(_, _) -> none. + +find_loop_exit_1([H|T1], [H|T2], _) -> + find_loop_exit_1(T1, T2, H); +find_loop_exit_1(_, _, Exit) -> Exit. + +%% find_rm_blocks(StartLabel, Blocks) -> [Label]. +%% Find all blocks that start with remove_message within the receive +%% loop whose peek_message label is StartLabel. + +find_rm_blocks(L, Blocks) -> + Seen = gb_sets:singleton(L), + Blk = maps:get(L, Blocks), + Succ = beam_ssa:successors(Blk), + find_rm_blocks_1(Succ, Seen, Blocks). + +find_rm_blocks_1([L|Ls], Seen0, Blocks) -> + case gb_sets:is_member(L, Seen0) of + true -> + find_rm_blocks_1(Ls, Seen0, Blocks); + false -> + Seen = gb_sets:insert(L, Seen0), + Blk = maps:get(L, Blocks), + case find_rm_act(Blk#b_blk.is) of + prune -> + %% Looping back. Don't look at any successors. + find_rm_blocks_1(Ls, Seen, Blocks); + continue -> + %% Neutral block. Do nothing here, but look at + %% all successors. + Succ = beam_ssa:successors(Blk), + find_rm_blocks_1(Succ++Ls, Seen, Blocks); + found -> + %% Found remove_message instruction. + [L|find_rm_blocks_1(Ls, Seen, Blocks)] + end + end; +find_rm_blocks_1([], _, _) -> []. + +find_rm_act([#b_set{op=Op}|Is]) -> + case Op of + remove_message -> found; + peek_message -> prune; + recv_next -> prune; + wait_timeout -> prune; + wait -> prune; + _ -> find_rm_act(Is) + end; +find_rm_act([]) -> + continue. + +%%% +%%% Find out which variables need to be stored in Y registers. +%%% + +-record(dk, {d :: ordsets:ordset(var_name()), + k :: ordsets:ordset(var_name()) + }). + +%% find_yregs(St0) -> St. +%% Find all variables that must be stored in Y registers. Annotate +%% the blocks that allocate frames with the set of Y registers +%% used within that stack frame. +%% +%% Basically, we following all execution paths starting from a block +%% that allocates a frame, keeping track of of all defined registers +%% and all registers killed by an instruction that clobbers X +%% registers. For every use of a variable, we check if if it is in +%% the set of killed variables; if it is, it must be stored in an Y +%% register. + +find_yregs(#st{frames=[]}=St) -> + St; +find_yregs(#st{frames=[_|_]=Frames,args=Args,ssa=Blocks0}=St) -> + FrameDefs = find_defs(Frames, Blocks0, [V || #b_var{name=V} <- Args]), + Blocks = find_yregs_1(FrameDefs, Blocks0), + St#st{ssa=Blocks}. + +find_yregs_1([{F,Defs}|Fs], Blocks0) -> + DK = #dk{d=Defs,k=[]}, + D0 = #{F=>DK}, + Ls = beam_ssa:rpo([F], Blocks0), + Yregs0 = [], + Yregs = find_yregs_2(Ls, Blocks0, D0, Yregs0), + Blk0 = maps:get(F, Blocks0), + Blk = beam_ssa:add_anno(yregs, Yregs, Blk0), + Blocks = Blocks0#{F:=Blk}, + find_yregs_1(Fs, Blocks); +find_yregs_1([], Blocks) -> Blocks. + +find_yregs_2([L|Ls], Blocks0, D0, Yregs0) -> + Blk0 = maps:get(L, Blocks0), + #b_blk{is=Is,last=Last} = Blk0, + Ys0 = maps:get(L, D0), + {Yregs1,Ys} = find_yregs_is(Is, Ys0, Yregs0), + Yregs = find_yregs_terminator(Last, Ys, Yregs1), + Successors = beam_ssa:successors(Blk0), + D = find_update_succ(Successors, Ys, D0), + find_yregs_2(Ls, Blocks0, D, Yregs); +find_yregs_2([], _Blocks, _D, Yregs) -> Yregs. + +find_defs(Frames, Blocks, Defs) -> + Seen = gb_sets:empty(), + FramesSet = gb_sets:from_list(Frames), + {FrameDefs,_} = find_defs_1([0], Blocks, FramesSet, Seen, Defs, []), + FrameDefs. + +find_defs_1([L|Ls], Blocks, Frames, Seen0, Defs0, Acc0) -> + case gb_sets:is_member(L, Frames) of + true -> + OrderedDefs = ordsets:from_list(Defs0), + find_defs_1(Ls, Blocks, Frames, Seen0, Defs0, + [{L,OrderedDefs}|Acc0]); + false -> + case gb_sets:is_member(L, Seen0) of + true -> + find_defs_1(Ls, Blocks, Frames, Seen0, Defs0, Acc0); + false -> + Seen1 = gb_sets:insert(L, Seen0), + {Acc,Seen} = find_defs_1(Ls, Blocks, Frames, Seen1, Defs0, Acc0), + #b_blk{is=Is} = Blk = maps:get(L, Blocks), + Defs = find_defs_is(Is, Defs0), + Successors = beam_ssa:successors(Blk), + find_defs_1(Successors, Blocks, Frames, Seen, Defs, Acc) + end + end; +find_defs_1([], _, _, Seen, _, Acc) -> + {Acc,Seen}. + +find_defs_is([#b_set{dst=#b_var{name=Dst}}|Is], Acc) -> + find_defs_is(Is, [Dst|Acc]); +find_defs_is([], Acc) -> Acc. + +find_update_succ([S|Ss], #dk{d=Defs0,k=Killed0}=DK0, D0) -> + case D0 of + #{S:=#dk{d=Defs1,k=Killed1}} -> + Defs = ordsets:intersection(Defs0, Defs1), + Killed = ordsets:union(Killed0, Killed1), + DK = #dk{d=Defs,k=Killed}, + D = maps:put(S, DK, D0), + find_update_succ(Ss, DK0, D); + #{} -> + D = maps:put(S, DK0, D0), + find_update_succ(Ss, DK0, D) + end; +find_update_succ([], _, D) -> D. + +find_yregs_is([#b_set{dst=#b_var{name=Dst}}=I|Is], #dk{d=Defs0,k=Killed0}=Ys, Yregs0) -> + Used = beam_ssa:used(I), + Yregs1 = ordsets:intersection(Used, Killed0), + Yregs = ordsets:union(Yregs0, Yregs1), + case beam_ssa:clobbers_xregs(I) of + false -> + Defs = ordsets:add_element(Dst, Defs0), + find_yregs_is(Is, Ys#dk{d=Defs}, Yregs); + true -> + Killed = ordsets:union(Defs0, Killed0), + Defs = [Dst], + find_yregs_is(Is, Ys#dk{d=Defs,k=Killed}, Yregs) + end; +find_yregs_is([], Ys, Yregs) -> {Yregs,Ys}. + +find_yregs_terminator(Terminator, #dk{k=Killed}, Yregs0) -> + Used = beam_ssa:used(Terminator), + Yregs = ordsets:intersection(Used, Killed), + ordsets:union(Yregs0, Yregs). + +%%% +%%% Try to reduce the size of the stack frame, by adding an explicit +%%% 'copy' instructions for return values from 'call' and 'make_fun' that +%%% need to be saved in Y registers. Here is an example to show +%%% how that's useful. First, here is the Erlang code: +%%% +%%% f(Pid) -> +%%% Res = foo(42), +%%% _ = node(Pid), +%%% bar(), +%%% Res. +%%% +%%% Compiled to SSA format, the main part of the code looks like this: +%%% +%%% 0: +%%% Res = call local literal foo/1, literal 42 +%%% _1 = bif:node Pid +%%% @ssa_bool = succeeded _1 +%%% br @ssa_bool, label 3, label 1 +%%% 3: +%%% @ssa_ignored = call local literal bar/0 +%%% ret Res +%%% +%%% It can be seen that the variables Pid and Res must be saved in Y +%%% registers in order to survive the function calls. A previous sub +%%% pass has inserted a 'copy' instruction to save the value of the +%%% variable Pid: +%%% +%%% 0: +%%% Pid:4 = copy Pid +%%% Res = call local literal foo/1, literal 42 +%%% _1 = bif:node Pid:4 +%%% @ssa_bool = succeeded _1 +%%% br @ssa_bool, label 3, label 1 +%%% +%%% 3: +%%% @ssa_ignored = call local literal bar/0 +%%% ret Res +%%% +%%% The Res and Pid:4 variables must be assigned to different Y registers +%%% because they are live at the same time. copy_retval() inserts a +%%% 'copy' instruction to copy Res to a new variable: +%%% +%%% 0: +%%% Pid:4 = copy Pid +%%% Res:6 = call local literal foo/1, literal 42 +%%% _1 = bif:node Pid:4 +%%% @ssa_bool = succeeded _1 +%%% br @ssa_bool, label 3, label 1 +%%% +%%% 3: +%%% Res = copy Res:6 +%%% @ssa_ignored = call local literal bar/0 +%%% ret Res +%%% +%%% The new variable Res:6 is used to capture the return value from the call. +%%% The variables Pid:4 and Res are no longer live at the same time, so they +%%% can be assigned to the same Y register. +%%% + +copy_retval(#st{frames=Frames,ssa=Blocks0,cnt=Count0}=St) -> + {Blocks,Count} = copy_retval_1(Frames, Blocks0, Count0), + St#st{ssa=Blocks,cnt=Count}. + +copy_retval_1([F|Fs], Blocks0, Count0) -> + #b_blk{anno=#{yregs:=Yregs0},is=Is} = maps:get(F, Blocks0), + Yregs1 = gb_sets:from_list(Yregs0), + Yregs = collect_yregs(Is, Yregs1), + Ls = beam_ssa:rpo([F], Blocks0), + {Blocks,Count} = copy_retval_2(Ls, Yregs, none, Blocks0, Count0), + copy_retval_1(Fs, Blocks, Count); +copy_retval_1([], Blocks, Count) -> + {Blocks,Count}. + +collect_yregs([#b_set{op=copy,dst=#b_var{name=Y},args=[#b_var{name=X}]}|Is], + Yregs0) -> + true = gb_sets:is_member(X, Yregs0), %Assertion. + Yregs = gb_sets:insert(Y, gb_sets:delete(X, Yregs0)), + collect_yregs(Is, Yregs); +collect_yregs([#b_set{}|Is], Yregs) -> + collect_yregs(Is, Yregs); +collect_yregs([], Yregs) -> Yregs. + +copy_retval_2([L|Ls], Yregs, Copy0, Blocks0, Count0) -> + #b_blk{is=Is0,last=Last} = Blk = maps:get(L, Blocks0), + RC = case {Last,Ls} of + {#b_br{succ=Succ,fail=?BADARG_BLOCK},[Succ|_]} -> + true; + {_,_} -> + false + end, + case copy_retval_is(Is0, RC, Yregs, Copy0, Count0, []) of + {Is,Count} -> + case Copy0 =:= none andalso Count0 =:= Count of + true -> + copy_retval_2(Ls, Yregs, none, Blocks0, Count0); + false -> + Blocks = Blocks0#{L=>Blk#b_blk{is=Is}}, + copy_retval_2(Ls, Yregs, none, Blocks, Count) + end; + {Is,Count,Copy} -> + Blocks = Blocks0#{L=>Blk#b_blk{is=Is}}, + copy_retval_2(Ls, Yregs, Copy, Blocks, Count) + end; +copy_retval_2([], _Yregs, none, Blocks, Count) -> + {Blocks,Count}. + +copy_retval_is([#b_set{op=put_tuple_elements,args=Args0}=I0], false, _Yregs, + Copy, Count, Acc) -> + I = I0#b_set{args=copy_sub_args(Args0, Copy)}, + {reverse(Acc, [I|acc_copy([], Copy)]),Count}; +copy_retval_is([#b_set{}]=Is, false, _Yregs, Copy, Count, Acc) -> + {reverse(Acc, acc_copy(Is, Copy)),Count}; +copy_retval_is([#b_set{},#b_set{op=succeeded}]=Is, false, _Yregs, Copy, Count, Acc) -> + {reverse(Acc, acc_copy(Is, Copy)),Count}; +copy_retval_is([#b_set{op=Op,dst=#b_var{name=RetVal}=Dst}=I0|Is], RC, Yregs, + Copy0, Count0, Acc0) when Op =:= call; Op =:= make_fun -> + {I1,Count1,Acc} = place_retval_copy(I0, Yregs, Copy0, Count0, Acc0), + case gb_sets:is_member(RetVal, Yregs) of + true -> + {NewVarName,Count} = new_var_name(RetVal, Count1), + NewVar = #b_var{name=NewVarName}, + Copy = #b_set{op=copy,dst=Dst,args=[NewVar]}, + I = I1#b_set{dst=NewVar}, + copy_retval_is(Is, RC, Yregs, Copy, Count, [I|Acc]); + false -> + copy_retval_is(Is, RC, Yregs, none, Count1, [I1|Acc]) + end; +copy_retval_is([#b_set{args=Args0}=I0|Is], RC, Yregs, Copy, Count, Acc) -> + I = I0#b_set{args=copy_sub_args(Args0, Copy)}, + case beam_ssa:clobbers_xregs(I) of + true -> + copy_retval_is(Is, RC, Yregs, none, Count, [I|acc_copy(Acc, Copy)]); + false -> + copy_retval_is(Is, RC, Yregs, Copy, Count, [I|Acc]) + end; +copy_retval_is([], RC, _, Copy, Count, Acc) -> + case {Copy,RC} of + {none,_} -> + {reverse(Acc),Count}; + {#b_set{},true} -> + {reverse(Acc),Count,Copy}; + {#b_set{},false} -> + {reverse(Acc, [Copy]),Count} + end. + +%% +%% Consider this code: +%% +%% Var = ... +%% ... +%% A1 = call foo/0 +%% A = copy A1 +%% B = call bar/1, Var +%% +%% If the Var variable is no longer used after this code, its Y register +%% can't be reused for A. To allow the Y register to be reused +%% we will need to insert 'copy' instructions for arguments that are +%% in Y registers: +%% +%% Var = ... +%% ... +%% A1 = call foo/0 +%% Var1 = copy Var +%% A = copy A1 +%% B = call bar/1, Var1 +%% + +place_retval_copy(I, _Yregs, none, Count, Acc) -> + {I,Count,Acc}; +place_retval_copy(#b_set{args=[F|Args0]}=I, Yregs, Copy, Count0, Acc0) -> + #b_set{dst=#b_var{name=Avoid}} = Copy, + {Args,Acc1,Count} = copy_func_args(Args0, Yregs, Avoid, Acc0, [], Count0), + Acc = [Copy|Acc1], + {I#b_set{args=[F|Args]},Count,Acc}. + +copy_func_args([#b_var{name=V}=A|As], Yregs, Avoid, CopyAcc, Acc, Count0) -> + case gb_sets:is_member(V, Yregs) of + true when V =/= Avoid -> + {NewVarName,Count} = new_var_name(V, Count0), + NewVar = #b_var{name=NewVarName}, + Copy = #b_set{op=copy,dst=NewVar,args=[A]}, + copy_func_args(As, Yregs, Avoid, [Copy|CopyAcc], [NewVar|Acc], Count); + _ -> + copy_func_args(As, Yregs, Avoid, CopyAcc, [A|Acc], Count0) + end; +copy_func_args([A|As], Yregs, Avoid, CopyAcc, Acc, Count) -> + copy_func_args(As, Yregs, Avoid, CopyAcc, [A|Acc], Count); +copy_func_args([], _Yregs, _Avoid, CopyAcc, Acc, Count) -> + {reverse(Acc),CopyAcc,Count}. + +acc_copy(Acc, none) -> Acc; +acc_copy(Acc, #b_set{}=Copy) -> [Copy|Acc]. + +copy_sub_args(Args, none) -> + Args; +copy_sub_args(Args, #b_set{dst=Dst,args=[Src]}) -> + [sub_arg(A, Dst, Src) || A <- Args]. + +sub_arg(Old, Old, New) -> New; +sub_arg(Old, _, _) -> Old. + +%%% +%%% Consider: +%%% +%%% x1/Hd = get_hd x0/Cons +%%% y0/Tl = get_tl x0/Cons +%%% +%%% Register x0 can't be reused for Hd. If Hd needs to be in x0, +%%% a 'move' instruction must be inserted. +%%% +%%% If we swap get_hd and get_tl when Tl is in a Y register, +%%% x0 can be used for Hd if Cons is not used again: +%%% +%%% y0/Tl = get_tl x0/Cons +%%% x0/Hd = get_hd x0/Cons +%%% + +opt_get_list(#st{ssa=Blocks,res=Res}=St) -> + ResMap = maps:from_list(Res), + Ls = beam_ssa:rpo(Blocks), + St#st{ssa=opt_get_list_1(Ls, ResMap, Blocks)}. + +opt_get_list_1([L|Ls], Res, Blocks0) -> + #b_blk{is=Is0} = Blk = maps:get(L, Blocks0), + case opt_get_list_is(Is0, Res, [], false) of + no -> + opt_get_list_1(Ls, Res, Blocks0); + {yes,Is} -> + Blocks = Blocks0#{L:=Blk#b_blk{is=Is}}, + opt_get_list_1(Ls, Res, Blocks) + end; +opt_get_list_1([], _, Blocks) -> Blocks. + +opt_get_list_is([#b_set{op=get_hd,dst=#b_var{name=Hd}, + args=[Cons]}=GetHd, + #b_set{op=get_tl,dst=#b_var{name=Tl}, + args=[Cons]}=GetTl|Is], + Res, Acc, Changed) -> + %% Note that when this pass is run, only Y registers have + %% reservations. The absence of an entry for a variable therefore + %% means that the variable will be in an X register. + case Res of + #{Hd:={y,_}} -> + %% Hd will be in a Y register. Don't swap. + opt_get_list_is([GetTl|Is], Res, [GetHd|Acc], Changed); + #{Tl:={y,_}} -> + %% Tl will be in a Y register. Swap. + opt_get_list_is([GetHd|Is], Res, [GetTl|Acc], true); + #{} -> + %% Both are in X registers. Nothing to do. + opt_get_list_is([GetTl|Is], Res, [GetHd|Acc], Changed) + end; +opt_get_list_is([I|Is], Res, Acc, Changed) -> + opt_get_list_is(Is, Res, [I|Acc], Changed); +opt_get_list_is([], _Res, Acc, Changed) -> + case Changed of + true -> + {yes,reverse(Acc)}; + false -> + no + end. + +%%% +%%% Number instructions in the order they are executed. +%%% + +%% number_instructions(St0) -> St. +%% Number instructions in the order they are executed. Use a step +%% size of 2. Don't number phi instructions. All phi variables in +%% a block will be live one unit before the first non-phi instruction +%% in the block. + +number_instructions(#st{ssa=Blocks0}=St) -> + Ls = beam_ssa:rpo(Blocks0), + St#st{ssa=number_is_1(Ls, 1, Blocks0)}. + +number_is_1([L|Ls], N0, Blocks0) -> + #b_blk{is=Is0,last=Last0} = Bl0 = maps:get(L, Blocks0), + {Is,N1} = number_is_2(Is0, N0, []), + Last = beam_ssa:add_anno(n, N1, Last0), + N = N1 + 2, + Bl = Bl0#b_blk{is=Is,last=Last}, + Blocks = maps:put(L, Bl, Blocks0), + number_is_1(Ls, N, Blocks); +number_is_1([], _, Blocks) -> Blocks. + +number_is_2([#b_set{op=phi}=I|Is], N, Acc) -> + number_is_2(Is, N, [I|Acc]); +number_is_2([I0|Is], N, Acc) -> + I = beam_ssa:add_anno(n, N, I0), + number_is_2(Is, N+2, [I|Acc]); +number_is_2([], N, Acc) -> + {reverse(Acc),N}. + +%%% +%%% Calculate live intervals. +%%% + +live_intervals(#st{args=Args,ssa=Blocks}=St) -> + Vars0 = [{V,{0,1}} || #b_var{name=V} <- Args], + F = fun(L, _, A) -> live_interval_blk(L, Blocks, A) end, + LiveMap0 = #{}, + Acc0 = {[],[],LiveMap0}, + {Vars,Aliases,_} = beam_ssa:fold_po(F, Acc0, Blocks), + Intervals = merge_ranges(rel2fam(Vars0++Vars)), + St#st{intervals=Intervals,aliases=Aliases}. + +merge_ranges([{V,Rs}|T]) -> + [{V,merge_ranges_1(Rs)}|merge_ranges(T)]; +merge_ranges([]) -> []. + +merge_ranges_1([{A,N},{N,Z}|Rs]) -> + merge_ranges_1([{A,Z}|Rs]); +merge_ranges_1([R|Rs]) -> + [R|merge_ranges_1(Rs)]; +merge_ranges_1([]) -> []. + +live_interval_blk(L, Blocks, {Vars0,Aliases0,LiveMap0}) -> + Live0 = [], + Successors = beam_ssa:successors(L, Blocks), + Live1 = update_successors(Successors, L, Blocks, LiveMap0, Live0), + + %% Add ranges for all variables that are live in the successors. + #b_blk{is=Is,last=Last} = maps:get(L, Blocks), + End = beam_ssa:get_anno(n, Last), + Use = [{V,{use,End+1}} || V <- Live1], + + %% Determine used and defined variables in this block. + FirstNumber = first_number(Is, Last), + {UseDef0,Aliases} = live_interval_blk_1([Last|reverse(Is)], + FirstNumber, Aliases0, Use), + UseDef = rel2fam(UseDef0), + + %% Update what is live at the beginning of this block and + %% store it. + Used = [V || {V,[{use,_}|_]} <- UseDef], + Live2 = ordsets:union(Live1, Used), + Killed = [V || {V,[{def,_}|_]} <- UseDef], + Live = ordsets:subtract(Live2, Killed), + LiveMap = LiveMap0#{L=>Live}, + + %% Construct the ranges for this block. + Vars = make_block_ranges(UseDef, FirstNumber, Vars0), + {Vars,Aliases,LiveMap}. + +make_block_ranges([{V,[{def,Def}]}|Vs], First, Acc) -> + make_block_ranges(Vs, First, [{V,{Def,Def}}|Acc]); +make_block_ranges([{V,[{def,Def}|Uses]}|Vs], First, Acc) -> + {use,Last} = last(Uses), + make_block_ranges(Vs, First, [{V,{Def,Last}}|Acc]); +make_block_ranges([{V,[{use,_}|_]=Uses}|Vs], First, Acc) -> + {use,Last} = last(Uses), + make_block_ranges(Vs, First, [{V,{First,Last}}|Acc]); +make_block_ranges([], _, Acc) -> Acc. + +live_interval_blk_1([#b_set{op=phi,dst=#b_var{name=Dst}}|Is], + FirstNumber, Aliases, Acc0) -> + Acc = [{Dst,{def,FirstNumber}}|Acc0], + live_interval_blk_1(Is, FirstNumber, Aliases, Acc); +live_interval_blk_1([#b_set{op=bs_start_match}=I|Is], FirstNumber, + Aliases0, Acc0) -> + N = beam_ssa:get_anno(n, I), + #b_set{dst=#b_var{name=Dst}} = I, + Acc1 = [{Dst,{def,N}}|Acc0], + Aliases = case beam_ssa:get_anno(reuse_for_context, I) of + true -> + #b_set{args=[#b_var{name=Src}]} = I, + [{Dst,Src}|Aliases0]; + false -> + Aliases0 + end, + Acc = [{V,{use,N}} || V <- beam_ssa:used(I)] ++ Acc1, + live_interval_blk_1(Is, FirstNumber, Aliases, Acc); +live_interval_blk_1([I|Is], FirstNumber, Aliases, Acc0) -> + N = beam_ssa:get_anno(n, I), + Acc1 = case I of + #b_set{dst=#b_var{name=Dst}} -> + [{Dst,{def,N}}|Acc0]; + _ -> + Acc0 + end, + Used = beam_ssa:used(I), + Acc = [{V,{use,N}} || V <- Used] ++ Acc1, + live_interval_blk_1(Is, FirstNumber, Aliases, Acc); +live_interval_blk_1([], _FirstNumber, Aliases, Acc) -> + {Acc,Aliases}. + +%% first_number([#b_set{}]) -> InstructionNumber. +%% Return the number for the first instruction for the block. +%% Note that this number is one less than the first +%% non-phi instruction in the block. + +first_number([#b_set{op=phi}|Is], Last) -> + first_number(Is, Last); +first_number([I|_], _) -> + beam_ssa:get_anno(n, I) - 1; +first_number([], Last) -> + beam_ssa:get_anno(n, Last) - 1. + +update_successors([L|Ls], Pred, Blocks, LiveMap, Live0) -> + Live1 = ordsets:union(Live0, get_live(L, LiveMap)), + #b_blk{is=Is} = maps:get(L, Blocks), + Live = update_live_phis(Is, Pred, Live1), + update_successors(Ls, Pred, Blocks, LiveMap, Live); +update_successors([], _, _, _, Live) -> Live. + +get_live(L, LiveMap) -> + case LiveMap of + #{L:=Live} -> Live; + #{} -> [] + end. + +update_live_phis([#b_set{op=phi,dst=#b_var{name=Killed},args=Args}|Is], + Pred, Live0) -> + Used = [V || {#b_var{name=V},L} <- Args, L =:= Pred], + Live1 = ordsets:union(ordsets:from_list(Used), Live0), + Live = ordsets:del_element(Killed, Live1), + update_live_phis(Is, Pred, Live); +update_live_phis(_, _, Live) -> Live. + +%%% +%%% Reserve Y registers. +%%% + +%% reserve_yregs(St0) -> St. +%% In each block that allocates a stack frame, insert instructions +%% that copy variables that must be in Y registers (given by +%% YRegisters) to new variables. +%% +%% Also allocate specific Y registers for try and catch tags. +%% The outermost try/catch tag is placed in y0, any directly +%% nested tag in y1, and so on. Note that this is the reversed +%% order as required by BEAM; it will be corrected later by +%% turn_yregs(). + +reserve_yregs(#st{frames=Frames}=St0) -> + foldl(fun reserve_yregs_1/2, St0, Frames). + +reserve_yregs_1(L, #st{ssa=Blocks0,cnt=Count0,res=Res0}=St) -> + Blk = maps:get(L, Blocks0), + Yregs = beam_ssa:get_anno(yregs, Blk), + {Def,Used} = beam_ssa:def_used([L], Blocks0), + UsedYregs = ordsets:intersection(Yregs, Used), + DefBefore = ordsets:subtract(UsedYregs, Def), + {BeforeVars,Blocks,Count} = rename_vars(DefBefore, L, Blocks0, Count0), + InsideVars = ordsets:subtract(UsedYregs, DefBefore), + ResTryTags0 = reserve_try_tags(L, Blocks), + ResTryTags = [{V,{Reg,Count}} || {V,Reg} <- ResTryTags0], + Vars = BeforeVars ++ InsideVars, + Res = [{V,{y,Count}} || V <- Vars] ++ ResTryTags ++ Res0, + St#st{res=Res,ssa=Blocks,cnt=Count+1}. + +reserve_try_tags(L, Blocks) -> + Seen = gb_sets:empty(), + {Res0,_} = reserve_try_tags_1([L], Blocks, Seen, #{}), + Res1 = [maps:to_list(M) || {_,M} <- maps:to_list(Res0)], + Res = [{V,{y,Y}} || {V,Y} <- append(Res1)], + ordsets:from_list(Res). + +reserve_try_tags_1([L|Ls], Blocks, Seen0, ActMap0) -> + case gb_sets:is_element(L, Seen0) of + true -> + reserve_try_tags_1(Ls, Blocks, Seen0, ActMap0); + false -> + Seen1 = gb_sets:insert(L, Seen0), + #b_blk{is=Is} = Blk = maps:get(L, Blocks), + Active0 = get_active(L, ActMap0), + Active = reserve_try_tags_is(Is, Active0), + Successors = beam_ssa:successors(Blk), + ActMap1 = update_act_map(Successors, Active, ActMap0), + {ActMap,Seen} = reserve_try_tags_1(Ls, Blocks, Seen1, ActMap1), + reserve_try_tags_1(Successors, Blocks, Seen,ActMap) + end; +reserve_try_tags_1([], _Blocks, Seen, ActMap) -> + {ActMap,Seen}. + +get_active(L, ActMap) -> + case ActMap of + #{L:=Active} -> Active; + #{} -> #{} + end. + +reserve_try_tags_is([#b_set{op=new_try_tag,dst=#b_var{name=V}}|Is], Active) -> + N = map_size(Active), + reserve_try_tags_is(Is, Active#{V=>N}); +reserve_try_tags_is([#b_set{op=kill_try_tag,args=[#b_var{name=Tag}]}|Is], Active) -> + reserve_try_tags_is(Is, maps:remove(Tag, Active)); +reserve_try_tags_is([_|Is], Active) -> + reserve_try_tags_is(Is, Active); +reserve_try_tags_is([], Active) -> Active. + +update_act_map([L|Ls], Active0, ActMap0) -> + case ActMap0 of + #{L:=Active1} -> + ActMap = ActMap0#{L=>maps:merge(Active0, Active1)}, + update_act_map(Ls, Active0, ActMap); + #{} -> + ActMap = ActMap0#{L=>Active0}, + update_act_map(Ls, Active0, ActMap) + end; +update_act_map([], _, ActMap) -> ActMap. + +rename_vars([], _, Blocks, Count) -> + {[],Blocks,Count}; +rename_vars(Vs, L, Blocks0, Count0) -> + {NewVs,Count} = new_var_names(Vs, Count0), + NewVars = [#b_var{name=V} || V <- NewVs], + Ren = zip(Vs, NewVars), + Blocks1 = beam_ssa:rename_vars(Ren, [L], Blocks0), + #b_blk{is=Is0} = Blk0 = maps:get(L, Blocks1), + CopyIs = [#b_set{op=copy,dst=New,args=[#b_var{name=Old}]} || + {Old,New} <- Ren], + Is = insert_after_phis(Is0, CopyIs), + Blk = Blk0#b_blk{is=Is}, + Blocks = maps:put(L, Blk, Blocks1), + {NewVs,Blocks,Count}. + +insert_after_phis([#b_set{op=phi}=I|Is], InsertIs) -> + [I|insert_after_phis(Is, InsertIs)]; +insert_after_phis(Is, InsertIs) -> + InsertIs ++ Is. + +%% frame_size(St0) -> St. +%% Calculate the frame size for each block that allocates a frame. +%% Annotate the block with the frame size. Also annotate all +%% return instructions with {deallocate,FrameSize} to simplify +%% code generation. + +frame_size(#st{frames=Frames,regs=Regs,ssa=Blocks0}=St) -> + Blocks = foldl(fun(L, Blks) -> + frame_size_1(L, Regs, Blks) + end, Blocks0, Frames), + St#st{ssa=Blocks}. + +frame_size_1(L, Regs, Blocks0) -> + Def = beam_ssa:def([L], Blocks0), + Yregs0 = [maps:get(V, Regs) || V <- Def, is_yreg(maps:get(V, Regs))], + Yregs = ordsets:from_list(Yregs0), + FrameSize = length(ordsets:from_list(Yregs)), + if + FrameSize =/= 0 -> + [{y,0}|_] = Yregs, %Assertion. + {y,Last} = last(Yregs), + Last = FrameSize - 1, %Assertion. + ok; + true -> + ok + end, + Blk0 = maps:get(L, Blocks0), + Blk = beam_ssa:add_anno(frame_size, FrameSize, Blk0), + + %% Insert an annotation for frame deallocation on + %% each #b_ret{}. + Blocks = maps:put(L, Blk, Blocks0), + Reachable = beam_ssa:rpo([L], Blocks), + frame_deallocate(Reachable, FrameSize, Blocks). + +frame_deallocate([L|Ls], Size, Blocks0) -> + Blk0 = maps:get(L, Blocks0), + Blk = case Blk0 of + #b_blk{last=#b_ret{}=Ret0} -> + Ret = beam_ssa:add_anno(deallocate, Size, Ret0), + Blk0#b_blk{last=Ret}; + #b_blk{} -> + Blk0 + end, + Blocks = maps:put(L, Blk, Blocks0), + frame_deallocate(Ls, Size, Blocks); +frame_deallocate([], _, Blocks) -> Blocks. + + +%% turn_yregs(St0) -> St. +%% Renumber y registers so that {y,0} becomes {y,FrameSize-1}, +%% {y,FrameSize-1} becomes {y,0} and so on. This is to make nested +%% catches work. The register allocator (linear_scan()) has given +%% a lower number to the outermost catch. + +turn_yregs(#st{frames=Frames,regs=Regs0,ssa=Blocks}=St) -> + Regs1 = foldl(fun(L, A) -> + Blk = maps:get(L, Blocks), + FrameSize = beam_ssa:get_anno(frame_size, Blk), + Def = beam_ssa:def([L], Blocks), + [turn_yregs_1(Def, FrameSize, Regs0)|A] + end, [], Frames), + Regs = maps:merge(Regs0, maps:from_list(append(Regs1))), + St#st{regs=Regs}. + +turn_yregs_1(Def, FrameSize, Regs) -> + Yregs0 = [{maps:get(V, Regs),V} || V <- Def, is_yreg(maps:get(V, Regs))], + Yregs1 = rel2fam(Yregs0), + FrameSize = length(Yregs1), + Yregs2 = [{{y,FrameSize-Y-1},Vs} || {{y,Y},Vs} <- Yregs1], + R0 = sofs:family(Yregs2), + R1 = sofs:family_to_relation(R0), + R = sofs:converse(R1), + sofs:to_external(R). + +%%% +%%% Reserving registers before register allocation. +%%% + +%% reserve_regs(St0) -> St. +%% Reserve registers prior to register allocation. Y registers +%% have already been reserved. This function will reserve z, +%% fr, and specific x registers. + +reserve_regs(#st{args=Args,ssa=Blocks,intervals=Intervals,res=Res0}=St) -> + %% Reserve x0, x1, and so on for the function arguments. + Res1 = reserve_arg_regs(Args, 0, Res0), + + %% Reserve Z registers (dummy registers) for instructions with no + %% return values (e.g. remove_message) or pseudo-return values + %% (e.g. landingpad). + Res2 = reserve_zregs(Blocks, Intervals, Res1), + + %% Reserve float registers. + Res3 = reserve_fregs(Blocks, Res2), + + %% Reserve all remaining unreserved variables as X registers. + Res = maps:from_list(Res3), + St#st{res=reserve_xregs(Blocks, Res)}. + +reserve_arg_regs([#b_var{name=Arg}|Is], N, Acc) -> + reserve_arg_regs(Is, N+1, [{Arg,{x,N}}|Acc]); +reserve_arg_regs([], _, Acc) -> Acc. + +reserve_zregs(Blocks, Intervals, Res) -> + ShortLived0 = [V || {V,[{Start,End}]} <- Intervals, Start+2 =:= End], + ShortLived = cerl_sets:from_list(ShortLived0), + F = fun(_, #b_blk{is=Is,last=Last}, A) -> + reserve_zreg(Is, Last, ShortLived, A) + end, + beam_ssa:fold_rpo(F, [0], Res, Blocks). + +reserve_zreg([#b_set{op={bif,tuple_size},dst=Dst}, + #b_set{op={bif,'=:='},args=[Dst,Val]}], _Last, ShortLived, A0) -> + case Val of + #b_literal{val=Arity} when Arity bsr 32 =:= 0 -> + %% These two instructions can be combined to a test_arity + %% instruction provided that the arity variable is short-lived. + reserve_zreg_1(Dst, ShortLived, A0); + _ -> + A0 + end; +reserve_zreg([#b_set{op={bif,tuple_size},dst=Dst}], + #b_switch{}, ShortLived, A) -> + reserve_zreg_1(Dst, ShortLived, A); +reserve_zreg([#b_set{op=Op,dst=#b_var{name=Dst}}|Is], Last, ShortLived, A0) -> + IsZReg = case Op of + context_to_binary -> true; + bs_match_string -> true; + bs_restore -> true; + bs_save -> true; + {float,clearerror} -> true; + kill_try_tag -> true; + landingpad -> true; + put_tuple_elements -> true; + remove_message -> true; + set_tuple_element -> true; + succeeded -> true; + timeout -> true; + wait_timeout -> true; + _ -> false + end, + A = case IsZReg of + true -> [{Dst,z}|A0]; + false -> A0 + end, + reserve_zreg(Is, Last, ShortLived, A); +reserve_zreg([], #b_br{bool=Bool}, ShortLived, A) -> + reserve_zreg_1(Bool, ShortLived, A); +reserve_zreg([], _, _, A) -> A. + +reserve_zreg_1(#b_var{name=V}, ShortLived, A) -> + case cerl_sets:is_element(V, ShortLived) of + true -> [{V,z}|A]; + false -> A + end; +reserve_zreg_1(#b_literal{}, _, A) -> A. + +reserve_fregs(Blocks, Res) -> + F = fun(_, #b_blk{is=Is}, A) -> + reserve_freg(Is, A) + end, + beam_ssa:fold_rpo(F, [0], Res, Blocks). + +reserve_freg([#b_set{op={float,Op},dst=#b_var{name=V}}|Is], Res) -> + case Op of + get -> + reserve_freg(Is, Res); + _ -> + reserve_freg(Is, [{V,fr}|Res]) + end; +reserve_freg([_|Is], Res) -> + reserve_freg(Is, Res); +reserve_freg([], Res) -> Res. + +%% reserve_xregs(St0) -> St. +%% Reserve all remaining variables as X registers. +%% +%% If a variable will need to be in a specific X register for a +%% 'call' or 'make_fun' (and there is nothing that will kill it +%% between the definition and use), reserve the register using a +%% {prefer,{x,X} annotation. That annotation means that the linear +%% scan algorithm will place the variable in the preferred register, +%% unless that register is already occupied. +%% +%% All remaining variables are reserved as X registers. Linear scan +%% will allocate the lowest free X register for the variable. + +reserve_xregs(Blocks, Res) -> + F = fun(L, #b_blk{is=Is,last=Last}, R) -> + {Xs0,Used0} = reserve_terminator(L, Last, Blocks, R), + reserve_xregs_is(reverse(Is), R, Xs0, Used0) + end, + beam_ssa:fold_po(F, Res, Blocks). + +reserve_xregs_is([#b_set{op=Op,dst=#b_var{name=Dst},args=Args}=I|Is], Res0, Xs0, Used0) -> + Xs1 = case is_gc_safe(I) of + true -> + Xs0; + false -> + %% There may be a garbage collection after executing this + %% instruction. We will need prune the list of preferred + %% X registers. + res_xregs_prune(Xs0, Used0, Res0) + end, + Res = reserve_xreg(Dst, Xs1, Res0), + Used1 = ordsets:union(Used0, beam_ssa:used(I)), + Used = ordsets:del_element(Dst, Used1), + case Op of + call -> + Xs = reserve_call_args(tl(Args)), + reserve_xregs_is(Is, Res, Xs, Used); + make_fun -> + Xs = reserve_call_args(tl(Args)), + reserve_xregs_is(Is, Res, Xs, Used); + _ -> + reserve_xregs_is(Is, Res, Xs1, Used) + end; +reserve_xregs_is([], Res, _Xs, _Used) -> Res. + +reserve_terminator(L, #b_br{bool=#b_literal{val=true},succ=Succ}, Blocks, Res) -> + case maps:get(Succ, Blocks) of + #b_blk{is=[],last=Last} -> + reserve_terminator(Succ, Last, Blocks, Res); + #b_blk{is=[_|_]=Is} -> + {res_xregs_from_phi(Is, L, Res, #{}),[]} + end; +reserve_terminator(_, Last, _, _) -> + {#{},beam_ssa:used(Last)}. + +res_xregs_from_phi([#b_set{op=phi,dst=#b_var{name=Dst},args=Args}|Is], + Pred, Res, Acc) -> + case [V || {#b_var{name=V},L} <- Args, L =:= Pred] of + [] -> + res_xregs_from_phi(Is, Pred, Res, Acc); + [V] -> + case Res of + #{Dst:={prefer,Reg}} -> + res_xregs_from_phi(Is, Pred, Res, Acc#{V=>Reg}); + #{Dst:=_} -> + res_xregs_from_phi(Is, Pred, Res, Acc) + end + end; +res_xregs_from_phi(_, _, _, Acc) -> Acc. + +reserve_call_args(Args) -> + reserve_call_args(Args, 0, #{}). + +reserve_call_args([#b_var{name=Name}|As], X, Xs) -> + reserve_call_args(As, X+1, Xs#{Name=>{x,X}}); +reserve_call_args([#b_literal{}|As], X, Xs) -> + reserve_call_args(As, X+1, Xs); +reserve_call_args([], _, Xs) -> Xs. + +reserve_xreg(V, Xs, Res) -> + case Res of + #{V:=_} -> + %% Already reserved. + Res; + #{} -> + case Xs of + #{V:=X} -> + %% Add a hint that a specific X register is + %% preferred, unless it is already in use. + Res#{V=>{prefer,X}}; + #{} -> + %% Reserve as an X register in general. + Res#{V=>x} + end + end. + +is_gc_safe(#b_set{op=phi}) -> + false; +is_gc_safe(#b_set{op=Op,args=Args}) -> + case beam_ssa_codegen:classify_heap_need(Op, Args) of + neutral -> true; + {put,_} -> true; + _ -> false + end. + +%% res_xregs_prune(PreferredRegs, Used, Res) -> PreferredRegs. +%% Prune the list of preferred to only include X registers that +%% are guaranteed to survice a garbage collection. + +res_xregs_prune(Xs, Used, Res) -> + %% The number of safe registers is the number of the X registers + %% used after this point. The actual number of safe registers may + %% be highter than this number, but this is a conservative safe + %% estimate. + NumSafe = foldl(fun(V, N) -> + case Res of + #{V:={x,_}} -> N + 1; + #{V:=_} -> N; + #{} -> N + 1 + end + end, 0, Used), + + %% Remove unsafe registers from the list of potential + %% preferred registers. + maps:filter(fun(_, {x,X}) -> X < NumSafe end, Xs). + +%%% +%%% Remove unsuitable aliases. +%%% +%%% If a binary is matched more than once, we must not put the +%%% the match context in the same register as the binary to +%%% avoid the following situation: +%%% +%%% {test,bs_start_match2,{f,3},1,[{x,0},0],{x,0}}. +%%% . +%%% . +%%% . +%%% {test,bs_start_match2,{f,6},1,[{x,0},0],{x,1}}. %% ILLEGAL! +%%% +%%% The second instruction is illegal because a match context source +%%% is only allowed if source and destination registers are identical. +%%% + +remove_unsuitable_aliases(#st{aliases=[_|_]=Aliases0,ssa=Blocks}=St) -> + R = rem_unsuitable(maps:values(Blocks)), + Unsuitable0 = [V || {V,[_,_|_]} <- rel2fam(R)], + Unsuitable = gb_sets:from_list(Unsuitable0), + Aliases =[P || {_,V}=P <- Aliases0, + not gb_sets:is_member(V, Unsuitable)], + St#st{aliases=Aliases}; +remove_unsuitable_aliases(#st{aliases=[]}=St) -> St. + +rem_unsuitable([#b_blk{is=Is}|Bs]) -> + Vs = [{V,Dst} || + #b_set{op=bs_start_match,dst=#b_var{name=Dst}, + args=[#b_var{name=V}]} <- Is], + Vs ++ rem_unsuitable(Bs); +rem_unsuitable([]) -> []. + +%%% +%%% Merge intervals. +%%% + +merge_intervals(#st{aliases=Aliases0,intervals=Intervals0, + res=Reserved}=St) -> + Aliases1 = [A || A <- Aliases0, + is_suitable_alias(A, Reserved)], + case Aliases1 of + [] -> + St#st{aliases=Aliases1}; + [_|_] -> + Intervals1 = maps:from_list(Intervals0), + {Intervals,Aliases} = + merge_intervals_1(Aliases1, Intervals1, []), + St#st{aliases=Aliases,intervals=Intervals} + end. + +merge_intervals_1([{Alias,V}|Vs], Intervals0, Acc) -> + #{Alias:=Int1,V:=Int2} = Intervals0, + Int3 = lists:merge(Int1, Int2), + Int = merge_intervals_2(Int3), + Intervals1 = maps:remove(Alias, Intervals0), + Intervals = Intervals1#{V:=Int}, + merge_intervals_1(Vs, Intervals, [{Alias,V}|Acc]); +merge_intervals_1([], Intervals, Acc) -> + {maps:to_list(Intervals),Acc}. + +merge_intervals_2([{A1,B1},{A2,B2}|Is]) when A2 =< B1 -> + merge_intervals_2([{min(A1, A2),max(B1, B2)}|Is]); +merge_intervals_2([{_A1,B1}=R|[{A2,_B2}|_]=Is]) when B1 < A2 -> + [R|merge_intervals_2(Is)]; +merge_intervals_2([_]=Is) -> Is. + +is_suitable_alias({V1,V2}, Reserved) -> + #{V1:=Res1,V2:=Res2} = Reserved, + case {Res1,Res2} of + {x,x} -> true; + {x,{x,_}} -> true; + {{x,_},x} -> true; + {_,_} -> false + end. + +%%% +%%% Register allocation using linear scan. +%%% + +-record(i, + {sort=1 :: instr_number(), + reg=none :: i_reg(), + pool=x :: pool_id(), + var=#b_var{} :: b_var(), + rs=[] :: [range()] + }). + +-record(l, + {cur=#i{} :: interval(), + unhandled_res=[] :: [interval()], + unhandled_any=[] :: [interval()], + active=[] :: [interval()], + inactive=[] :: [interval()], + free=#{} :: #{var_name()=>pool(), + {'next',pool_id()}:=reg_num()}, + regs=[] :: [{b_var(),ssa_register()}] + }). + +-type interval() :: #i{}. +-type i_reg() :: ssa_register() | {'prefer',xreg()} | 'none'. +-type pool_id() :: 'fr' | 'x' | 'z' | instr_number(). +-type pool() :: ordsets:ordset(ssa_register()). + +linear_scan(#st{intervals=Intervals0,res=Res}=St0) -> + St = St0#st{intervals=[],res=[]}, + Free = init_free(maps:to_list(Res)), + Intervals1 = [init_interval(Int, Res) || Int <- Intervals0], + Intervals = sort(Intervals1), + IsReserved = fun (#i{reg=Reg}) -> Reg =/= none end, + {UnhandledRes,Unhandled} = partition(IsReserved, Intervals), + L = #l{unhandled_res=UnhandledRes, + unhandled_any=Unhandled,free=Free}, + #l{regs=Regs} = do_linear(L), + St#st{regs=maps:from_list(Regs)}. + +init_interval({V,[{Start,_}|_]=Rs}, Res) -> + Info = maps:get(V, Res), + Pool = case Info of + {prefer,{x,_}} -> x; + x -> x; + {x,_} -> x; + {y,Uniq} -> Uniq; + {{y,_},Uniq} -> Uniq; + z -> z; + fr -> fr + end, + Reg = case Info of + {prefer,{x,_}} -> Info; + {x,_} -> Info; + {{y,_}=Y,_} -> Y; + _ -> none + end, + #i{sort=Start,var=V,reg=Reg,pool=Pool,rs=Rs}. + +init_free(Res) -> + Free0 = rel2fam([{x,{x,0}}|init_free_1(Res)]), + #{x:=Xs0} = Free1 = maps:from_list(Free0), + Xs = init_xregs(Xs0), + Free = Free1#{x:=Xs}, + Next = maps:fold(fun(K, V, A) -> [{{next,K},length(V)}|A] end, [], Free), + maps:merge(Free, maps:from_list(Next)). + +init_free_1([{_,{prefer,{x,_}=Reg}}|Res]) -> + [{x,Reg}|init_free_1(Res)]; +init_free_1([{_,{x,_}=Reg}|Res]) -> + [{x,Reg}|init_free_1(Res)]; +init_free_1([{_,{y,Uniq}}|Res]) -> + [{Uniq,{y,0}}|init_free_1(Res)]; +init_free_1([{_,{{y,_}=Reg,Uniq}}|Res]) -> + [{Uniq,Reg}|init_free_1(Res)]; +init_free_1([{_,z}|Res]) -> + [{z,{z,0}}|init_free_1(Res)]; +init_free_1([{_,fr}|Res]) -> + [{fr,{fr,0}}|init_free_1(Res)]; +init_free_1([{_,x}|Res]) -> + init_free_1(Res); +init_free_1([]) -> []. + +%% Make sure that the pool of xregs is contiguous. +init_xregs([{x,N},{x,M}|Is]) when N+1 =:= M -> + [{x,N}|init_xregs([{x,M}|Is])]; +init_xregs([{x,N}|[{x,_}|_]=Is]) -> + [{x,N}|init_xregs([{x,N+1}|Is])]; +init_xregs([{x,_}]=Is) -> Is. + +do_linear(L0) -> + case set_next_current(L0) of + done -> + L0; + L1 -> + L2 = expire_active(L1), + L3 = check_inactive(L2), + Available = collect_available(L3), + L4 = select_register(Available, L3), + L = make_cur_active(L4), + do_linear(L) + end. + +set_next_current(#l{unhandled_res=[Cur1|T1], + unhandled_any=[Cur2|T2]}=L) -> + case {Cur1,Cur2} of + {#i{sort=N1},#i{sort=N2}} when N1 < N2 -> + L#l{cur=Cur1,unhandled_res=T1}; + {_,_} -> + L#l{cur=Cur2,unhandled_any=T2} + end; +set_next_current(#l{unhandled_res=[], + unhandled_any=[Cur|T]}=L) -> + L#l{cur=Cur,unhandled_any=T}; +set_next_current(#l{unhandled_res=[Cur|T], + unhandled_any=[]}=L) -> + L#l{cur=Cur,unhandled_res=T}; +set_next_current(#l{unhandled_res=[],unhandled_any=[]}) -> + done. + +expire_active(#l{cur=#i{sort=CurBegin},active=Act0}=L0) -> + {Act,L} = expire_active(Act0, CurBegin, L0, []), + L#l{active=Act}. + +expire_active([#i{reg=Reg,rs=Rs0}=I|Is], CurBegin, L0, Acc) -> + {_,_} = Reg, %Assertion. + case overlap_status(Rs0, CurBegin) of + ends_before_cur -> + L = free_reg(I, L0), + expire_active(Is, CurBegin, L, Acc); + overlapping -> + expire_active(Is, CurBegin, L0, [I|Acc]); + not_overlapping -> + Rs = strip_before_current(Rs0, CurBegin), + L1 = free_reg(I, L0), + L = L1#l{inactive=[I#i{rs=Rs}|L1#l.inactive]}, + expire_active(Is, CurBegin, L, Acc) + end; +expire_active([], _CurBegin, L, Acc) -> + {Acc,L}. + +check_inactive(#l{cur=#i{sort=CurBegin},inactive=InAct0}=L0) -> + {InAct,L} = check_inactive(InAct0, CurBegin, L0, []), + L#l{inactive=InAct}. + +check_inactive([#i{rs=Rs0}=I|Is], CurBegin, L0, Acc) -> + case overlap_status(Rs0, CurBegin) of + ends_before_cur -> + check_inactive(Is, CurBegin, L0, Acc); + not_overlapping -> + check_inactive(Is, CurBegin, L0, [I|Acc]); + overlapping -> + Rs = strip_before_current(Rs0, CurBegin), + L1 = L0#l{active=[I#i{rs=Rs}|L0#l.active]}, + L = reserve_reg(I, L1), + check_inactive(Is, CurBegin, L, Acc) + end; +check_inactive([], _CurBegin, L, Acc) -> + {Acc,L}. + +strip_before_current([{_,E}|Rs], CurBegin) when E =< CurBegin -> + strip_before_current(Rs, CurBegin); +strip_before_current(Rs, _CurBegin) -> Rs. + +collect_available(#l{cur=#i{reg={prefer,{_,_}=Prefer}}=I}=L) -> + %% Use the preferred register if it is available. + Avail = collect_available(L#l{cur=I#i{reg=none}}), + case member(Prefer, Avail) of + true -> [Prefer]; + false -> Avail + end; +collect_available(#l{cur=#i{reg={_,_}=ReservedReg}}) -> + %% Return the already reserved register. + [ReservedReg]; +collect_available(#l{unhandled_res=Unhandled,cur=Cur}=L) -> + Free = get_pool(Cur, L), + + %% Note that since the live intervals are constructed from + %% SSA form, there cannot be any overlap of the current interval + %% with any inactive interval. See [3], page 175. Therefore we + %% only have check the unhandled intervals for overlap with + %% the current interval. As a further optimization, we only need + %% to check the intervals that have reserved registers. + collect_available(Unhandled, Cur, Free). + +collect_available([#i{pool=Pool1}|Is], #i{pool=Pool2}=Cur, Free) + when Pool1 =/= Pool2 -> + %% Wrong pool. Ignore this interval. + collect_available(Is, Cur, Free); +collect_available([#i{reg={_,_}=Reg}=I|Is], Cur, Free0) -> + case overlaps(I, Cur) of + true -> + Free = ordsets:del_element(Reg, Free0), + collect_available(Is, Cur, Free); + false -> + collect_available(Is, Cur, Free0) + end; +collect_available([], _, Free) -> Free. + +select_register([{_,_}=Reg|_], #l{cur=Cur0,regs=Regs}=L) -> + Cur = Cur0#i{reg=Reg}, + reserve_reg(Cur, L#l{cur=Cur,regs=[{Cur#i.var,Reg}|Regs]}); +select_register([], #l{cur=Cur0,regs=Regs}=L0) -> + %% Allocate a new register in the pool. + {Reg,L1} = get_next_free(Cur0, L0), + Cur = Cur0#i{reg=Reg}, + L = L1#l{cur=Cur,regs=[{Cur#i.var,Reg}|Regs]}, + reserve_reg(Cur, L). + +make_cur_active(#l{cur=Cur,active=Act}=L) -> + L#l{active=[Cur|Act]}. + +overlaps(#i{rs=Rs1}, #i{rs=Rs2}) -> + are_overlapping(Rs1, Rs2). + +overlap_status([{S,E}], CurBegin) -> + if + E =< CurBegin -> ends_before_cur; + CurBegin < S -> not_overlapping; + true -> overlapping + end; +overlap_status([{S,E}|Rs], CurBegin) -> + if + E =< CurBegin -> + overlap_status(Rs, CurBegin); + S =< CurBegin -> + overlapping; + true -> + not_overlapping + end. + +reserve_reg(#i{reg={_,_}=Reg}=I, L) -> + FreeRegs0 = get_pool(I, L), + FreeRegs = ordsets:del_element(Reg, FreeRegs0), + update_pool(I, FreeRegs, L). + +free_reg(#i{reg={_,_}=Reg}=I, L) -> + FreeRegs0 = get_pool(I, L), + FreeRegs = ordsets:add_element(Reg, FreeRegs0), + update_pool(I, FreeRegs, L). + +get_pool(#i{pool=Pool}, #l{free=Free}) -> + maps:get(Pool, Free). + +update_pool(#i{pool=Pool}, New, #l{free=Free0}=L) -> + Free = maps:put(Pool, New, Free0), + L#l{free=Free}. + +get_next_free(#i{pool=Pool}, #l{free=Free0}=L0) -> + K = {next,Pool}, + N = maps:get(K, Free0), + Free = maps:put(K, N+1, Free0), + L = L0#l{free=Free}, + if + is_integer(Pool) -> {{y,N},L}; + is_atom(Pool) -> {{Pool,N},L} + end. + +%%% +%%% Interval utilities. +%%% + +are_overlapping([R|Rs1], Rs2) -> + case are_overlapping_1(R, Rs2) of + true -> + true; + false -> + are_overlapping(Rs1, Rs2) + end; +are_overlapping([], _) -> false. + +are_overlapping_1({_S1,E1}, [{S2,_E2}|_]) when E1 < S2 -> + false; +are_overlapping_1({S1,E1}=R, [{S2,E2}|Rs]) -> + (S2 < E1 andalso E2 > S1) orelse are_overlapping_1(R, Rs); +are_overlapping_1({_,_}, []) -> false. + +%%% +%%% Utilities. +%%% + +%% is_loop_header(L, Blocks) -> false|true. +%% Check whether the block is a loop header. + +is_loop_header(L, Blocks) -> + %% We KNOW that a loop header must start with a peek_message + %% instruction. + case maps:get(L, Blocks) of + #b_blk{is=[#b_set{op=peek_message}|_]} -> true; + _ -> false + end. + +rel2fam(S0) -> + S1 = sofs:relation(S0), + S = sofs:rel2fam(S1), + sofs:to_external(S). + +split_phis(Is) -> + partition(fun(#b_set{op=Op}) -> Op =:= phi end, Is). + +is_yreg({y,_}) -> true; +is_yreg({x,_}) -> false; +is_yreg({z,_}) -> false; +is_yreg({fr,_}) -> false. + +new_var_names([V0|Vs0], Count0) -> + {V,Count1} = new_var_name(V0, Count0), + {Vs,Count} = new_var_names(Vs0, Count1), + {[V|Vs],Count}; +new_var_names([], Count) -> {[],Count}. + +new_var_name({Base,Int}, Count) -> + true = is_integer(Int), %Assertion. + {{Base,Count},Count+1}; +new_var_name(Base, Count) -> + {{Base,Count},Count+1}. diff --git a/lib/compiler/src/beam_ssa_recv.erl b/lib/compiler/src/beam_ssa_recv.erl new file mode 100644 index 0000000000..82fe006487 --- /dev/null +++ b/lib/compiler/src/beam_ssa_recv.erl @@ -0,0 +1,281 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2018. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% + +-module(beam_ssa_recv). +-export([module/2]). + +%%% +%%% In code such as: +%%% +%%% Ref = make_ref(), %Or erlang:monitor(process, Pid) +%%% . +%%% . +%%% . +%%% receive +%%% {Ref,Reply} -> Reply +%%% end. +%%% +%%% we know that none of the messages that exist in the message queue +%%% before the call to make_ref/0 can be matched out in the receive +%%% statement. Therefore we can avoid going through the entire message +%%% queue if we introduce two new instructions (here written as +%%% BIFs in pseudo-Erlang): +%%% +%%% recv_mark(SomeUniqInteger), +%%% Ref = make_ref(), +%%% . +%%% . +%%% . +%%% recv_set(SomeUniqInteger), +%%% receive +%%% {Ref,Reply} -> Reply +%%% end. +%%% +%%% The recv_mark/1 instruction will save the current position and +%%% SomeUniqInteger in the process context. The recv_set +%%% instruction will verify that SomeUniqInteger is still stored +%%% in the process context. If it is, it will set the current pointer +%%% for the message queue (the next message to be read out) to the +%%% position that was saved by recv_mark/1. +%%% +%%% The remove_message instruction must be modified to invalidate +%%% the information stored by the previous recv_mark/1, in case there +%%% is another receive executed between the calls to recv_mark/1 and +%%% recv_set/1. +%%% +%%% We use a reference to a label (i.e. a position in the loaded code) +%%% as the SomeUniqInteger. +%%% + +-include("beam_ssa.hrl"). +-import(lists, [all/2,reverse/2]). + +-spec module(beam_ssa:b_module(), [compile:option()]) -> + {'ok',beam_ssa:b_module()}. + +module(#b_module{body=Fs0}=Module, _Opts) -> + Fs = [function(F) || F <- Fs0], + {ok,Module#b_module{body=Fs}}. + +%%% +%%% Local functions. +%%% + +function(#b_function{anno=Anno,bs=Blocks0}=F) -> + try + Blocks = opt(Blocks0), + F#b_function{bs=Blocks} + catch + Class:Error:Stack -> + #{func_info:={_,Name,Arity}} = Anno, + io:fwrite("Function: ~w/~w\n", [Name,Arity]), + erlang:raise(Class, Error, Stack) + end. + +opt(Blocks) -> + Linear = beam_ssa:linearize(Blocks), + opt(Linear, Blocks, []). + +opt([{L,#b_blk{is=[#b_set{op=peek_message}|_]}=Blk0}|Bs], Blocks0, Preds) -> + %% Search for a suitable reference creating call in one of the predecessor + %% blocks. Whether we find such a call or not, we always clear the + %% the list of predecessors to ensure that any nested receive can't + %% search above the current receive. + case recv_opt(Preds, L, Blocks0) of + {yes,Blocks1} -> + Blk = beam_ssa:add_anno(recv_set, L, Blk0), + Blocks = maps:put(L, Blk, Blocks1), + opt(Bs, Blocks, []); + no -> + opt(Bs, Blocks0, []) + end; +opt([{L,_}|Bs], Blocks, Preds) -> + opt(Bs, Blocks, [L|Preds]); +opt([], Blocks, _) -> Blocks. + +recv_opt([L|Ls], RecvLbl, Blocks) -> + #b_blk{is=Is0} = Blk0 = maps:get(L, Blocks), + case recv_opt_is(Is0, RecvLbl, Blocks, []) of + {yes,Is} -> + Blk = Blk0#b_blk{is=Is}, + {yes,maps:put(L, Blk, Blocks)}; + no -> + recv_opt(Ls, RecvLbl, Blocks) + end; +recv_opt([], _, _Blocks) -> no. + +recv_opt_is([#b_set{op=call}=I0|Is], RecvLbl, Blocks0, Acc) -> + case makes_ref(I0, Blocks0) of + no -> + recv_opt_is(Is, RecvLbl, Blocks0, [I0|Acc]); + {yes,Ref} -> + case opt_ref_used(RecvLbl, Ref, Blocks0) of + false -> + recv_opt_is(Is, RecvLbl, Blocks0, [I0|Acc]); + true -> + I = beam_ssa:add_anno(recv_mark, RecvLbl, I0), + {yes,reverse(Acc, [I|Is])} + end + end; +recv_opt_is([I|Is], RecvLbl, Blocks, Acc) -> + recv_opt_is(Is, RecvLbl, Blocks, [I|Acc]); +recv_opt_is([], _, _, _) -> no. + +makes_ref(#b_set{dst=#b_var{name=Dst},args=[Func0|_]}, Blocks) -> + Func = case Func0 of + #b_remote{mod=#b_literal{val=erlang}, + name=#b_literal{val=Name},arity=A0} -> + {Name,A0}; + _ -> + none + end, + case Func of + {make_ref,0} -> + {yes,Dst}; + {monitor,2} -> + {yes,Dst}; + {spawn_monitor,A} when A =:= 1; A =:= 3 -> + ref_in_tuple(Dst, Blocks); + _ -> + no + end. + +ref_in_tuple(Tuple, Blocks) -> + F = fun(#b_set{op=get_tuple_element,dst=#b_var{name=Ref}, + args=[#b_var{name=Tup},#b_literal{val=1}]}, no) + when Tup =:= Tuple -> {yes,Ref}; + (_, A) -> A + end, + beam_ssa:fold_instrs_rpo(F, [0], no, Blocks). + +opt_ref_used(RecvLbl, Ref, Blocks) -> + Vs = #{{var,Ref}=>ref,ref=>Ref,ref_matched=>false}, + case opt_ref_used_1(RecvLbl, Vs, Blocks) of + used -> true; + not_used -> false; + done -> false + end. + +opt_ref_used_1(L, Vs0, Blocks) -> + #b_blk{is=Is} = Blk = maps:get(L, Blocks), + case opt_ref_used_is(Is, Vs0) of + #{}=Vs -> + opt_ref_used_last(Blk, Vs, Blocks); + Result -> + Result + end. + +opt_ref_used_is([#b_set{op=peek_message,dst=#b_var{name=M}}|Is], Vs0) -> + Vs = Vs0#{{var,M}=>message}, + opt_ref_used_is(Is, Vs); +opt_ref_used_is([#b_set{op={bif,Bif},args=Args,dst=#b_var{name=B}}=I|Is], + Vs0) -> + S = case Bif of + '=:=' -> true; + '==' -> true; + _ -> none + end, + case S of + none -> + Vs = update_vars(I, Vs0), + opt_ref_used_is(Is, Vs); + Bool when is_boolean(Bool) -> + case is_ref_msg_comparison(Args, Vs0) of + true -> + Vs = Vs0#{B=>{is_ref,Bool}}, + opt_ref_used_is(Is, Vs); + false -> + opt_ref_used_is(Is, Vs0) + end + end; +opt_ref_used_is([#b_set{op=remove_message}|_], Vs) -> + case Vs of + #{ref_matched:=true} -> + used; + #{ref_matched:=false} -> + not_used + end; +opt_ref_used_is([#b_set{op=recv_next}|_], _Vs) -> + done; +opt_ref_used_is([#b_set{op=wait_timeout}|_], _Vs) -> + done; +opt_ref_used_is([#b_set{op=wait}|_], _Vs) -> + done; +opt_ref_used_is([#b_set{}=I|Is], Vs0) -> + Vs = update_vars(I, Vs0), + opt_ref_used_is(Is, Vs); +opt_ref_used_is([], Vs) -> Vs. + +opt_ref_used_last(#b_blk{last=Last}=Blk, Vs, Blocks) -> + case Last of + #b_br{bool=#b_var{name=Bool},succ=Succ,fail=Fail} -> + case Vs of + #{Bool:={is_ref,Matched}} -> + ref_used_in([{Succ,Vs#{ref_matched:=Matched}}, + {Fail,Vs#{ref_matched:=not Matched}}], + Blocks); + #{} -> + ref_used_in([{Succ,Vs},{Fail,Vs}], Blocks) + end; + _ -> + SuccVs = [{Succ,Vs} || Succ <- beam_ssa:successors(Blk)], + ref_used_in(SuccVs, Blocks) + end. + +ref_used_in([{L,Vs0}|Ls], Blocks) -> + case opt_ref_used_1(L, Vs0, Blocks) of + not_used -> + not_used; + used -> + case ref_used_in(Ls, Blocks) of + done -> used; + Result -> Result + end; + done -> ref_used_in(Ls, Blocks) + end; +ref_used_in([], _) -> done. + +update_vars(#b_set{args=Args,dst=#b_var{name=B}}, Vs) -> + Vars = [V || #b_var{name=V} <- Args], + All = all(fun(V) -> + Var = {var,V}, + case Vs of + #{Var:=message} -> true; + #{} -> false + end + end, Vars), + case All of + true -> Vs#{{var,B}=>message}; + false -> Vs + end. + +%% is_ref_msg_comparison(Args, Variables) -> true|false. +%% Return 'true' if Args denotes a comparison between the +%% reference and message or part of the message. + +is_ref_msg_comparison([#b_var{name=A1},#b_var{name=A2}], Vs) -> + V1 = {var,A1}, + V2 = {var,A2}, + case Vs of + #{V1:=ref,V2:=message} -> true; + #{V1:=message,V2:=ref} -> true; + #{} -> false + end; +is_ref_msg_comparison(_, _) -> false. diff --git a/lib/compiler/src/beam_ssa_type.erl b/lib/compiler/src/beam_ssa_type.erl new file mode 100644 index 0000000000..e5f15da836 --- /dev/null +++ b/lib/compiler/src/beam_ssa_type.erl @@ -0,0 +1,1106 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2018. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% + +-module(beam_ssa_type). +-export([opt/2]). + +-include("beam_ssa.hrl"). +-import(lists, [any/2,droplast/1,foldl/3,last/1,member/2, + reverse/1,search/2,sort/1]). + +-define(UNICODE_INT, #t_integer{elements={0,16#10FFFF}}). + +-record(d, {ds :: #{beam_ssa:var_name():=beam_ssa:b_set()}, + ls :: #{beam_ssa:label():=type_db()}}). + +-define(ATOM_SET_SIZE, 5). + +%% Records that represent type information. +-record(t_atom, {elements=any :: 'any' | [atom()]}). +-record(t_integer, {elements=any :: 'any' | {integer(),integer()}}). +-record(t_bs_match, {type :: type()}). +-record(t_tuple, {size=0 :: integer(), + exact=false :: boolean(), + elements=[] :: [any()] + }). + +-type type() :: 'any' | 'none' | + #t_atom{} | #t_integer{} | #t_bs_match{} | #t_tuple{} | + {'binary',pos_integer()} | 'cons' | 'float' | 'list' | 'map' | 'nil' |'number'. +-type type_db() :: #{beam_ssa:var_name():=type()}. + +-spec opt([{Label0,Block0}], Args) -> [{Label,Block}] when + Label0 :: beam_ssa:label(), + Block0 :: beam_ssa:b_blk(), + Args :: [beam_ssa:b_var()], + Label :: beam_ssa:label(), + Block :: beam_ssa:b_blk(). + +opt(Linear, Args) -> + Ts = maps:from_list([{V,any} || #b_var{name=V} <- Args]), + FakeCall = #b_set{op=call,args=[#b_remote{mod=#b_literal{val=unknown}, + name=#b_literal{val=unknown}, + arity=0}]}, + Defs = maps:from_list([{V,FakeCall#b_set{dst=Var}} || + #b_var{name=V}=Var <- Args]), + D = #d{ds=Defs,ls=#{0=>Ts}}, + opt_1(Linear, D). + +opt_1([{L,Blk}|Bs], #d{ls=Ls}=D) -> + case Ls of + #{L:=Ts} -> + opt_2(L, Blk, Bs, Ts, D); + #{} -> + %% This block is never reached. Discard it. + opt_1(Bs, D) + end; +opt_1([], _) -> []. + +opt_2(L, #b_blk{is=Is0}=Blk0, Bs, Ts, D0) -> + case Is0 of + [#b_set{op=call,dst=Dst, + args=[#b_remote{mod=#b_literal{val=Mod}, + name=#b_literal{val=Name}}=Rem|Args0]}=I0] -> + case erl_bifs:is_exit_bif(Mod, Name, length(Args0)) of + true -> + %% This call will never reach the successor block. + %% Rewrite the terminator to a 'ret', and remove + %% all type information for this label. That will + %% simplify the phi node in the former successor. + Args = [simplify_arg(Arg, Ts) || Arg <- Args0], + I = I0#b_set{args=[Rem|Args]}, + Ret = #b_ret{arg=Dst}, + Blk = Blk0#b_blk{is=[I],last=Ret}, + Ls = maps:remove(L, D0#d.ls), + D = D0#d{ls=Ls}, + [{L,Blk}|opt_1(Bs, D)]; + false -> + opt_3(L, Blk0, Bs, Ts, D0) + end; + _ -> + opt_3(L, Blk0, Bs, Ts, D0) + end. + +opt_3(L, #b_blk{is=Is0,last=Last0}=Blk0, Bs, Ts0, #d{ds=Ds0,ls=Ls0}=D0) -> + {Is,Ts,Ds} = opt_is(Is0, Ts0, Ds0, Ls0, []), + D1 = D0#d{ds=Ds}, + Last = opt_terminator(Last0, Ts, Ds), + D = update_successors(Last, Ts, D1), + Blk = Blk0#b_blk{is=Is,last=Last}, + [{L,Blk}|opt_1(Bs, D)]. + +opt_is([#b_set{op=phi,dst=#b_var{name=Dst},args=Args0}=I0|Is], Ts0, Ds0, Ls, Acc) -> + %% Simplify the phi node by removing all predecessor blocks that no + %% longer exists or no longer branches to this block. + Args = [P || {_,From}=P <- Args0, maps:is_key(From, Ls)], + I = I0#b_set{args=Args}, + Ts = update_types(I, Ts0, Ds0), + Ds = Ds0#{Dst=>I}, + opt_is(Is, Ts, Ds, Ls, [I|Acc]); +opt_is([#b_set{dst=#b_var{name=Dst}}=I0|Is], Ts0, Ds0, Ls, Acc) -> + I = simplify(I0, Ts0), + Ts = update_types(I, Ts0, Ds0), + Ds = Ds0#{Dst=>I}, + opt_is(Is, Ts, Ds, Ls, [I|Acc]); +opt_is([], Ts, Ds, _Ls, Acc) -> + {reverse(Acc),Ts,Ds}. + +simplify(#b_set{op={bif,element},args=[#b_literal{val=Index},Tuple]}=I, Ts) -> + case t_tuple_size(get_type(Tuple, Ts)) of + {_,Size} when is_integer(Index), 1 =< Index, Index =< Size -> + I#b_set{op=get_tuple_element,args=[Tuple,#b_literal{val=Index-1}]}; + _ -> + I + end; +simplify(#b_set{op={bif,hd},args=[List]}=I, Ts) -> + case get_type(List, Ts) of + cons -> + I#b_set{op=get_hd}; + _ -> + I + end; +simplify(#b_set{op={bif,tl},args=[List]}=I, Ts) -> + case get_type(List, Ts) of + cons -> + I#b_set{op=get_tl}; + _ -> + I + end; +simplify(#b_set{op={bif,size},args=[Term]}=I, Ts) -> + case get_type(Term, Ts) of + #t_tuple{} -> + I#b_set{op={bif,tuple_size}}; + _ -> + I + end; +simplify(#b_set{op={bif,'=='},args=Args0}=I0, Ts) -> + Args = [simplify_arg(Arg, Ts) || Arg <- Args0], + I = I0#b_set{args=Args}, + Types = get_types(Args, Ts), + EqEq = case {meet(Types),join(Types)} of + {none,any} -> true; + {#t_integer{},#t_integer{}} -> true; + {float,float} -> true; + {{binary,_},_} -> true; + {#t_atom{},_} -> true; + {_,_} -> false + end, + case EqEq of + true -> + I#b_set{op={bif,'=:='}}; + false -> + I + end; +simplify(#b_set{op={bif,Op},args=Args0}=I0, Ts) -> + Args = [simplify_arg(Arg, Ts) || Arg <- Args0], + I = I0#b_set{args=Args}, + Types = get_types(Args, Ts), + case is_float_op(Op, Types) of + false -> + I; + true -> + AnnoArgs = [anno_float_arg(A) || A <- Types], + beam_ssa:add_anno(float_op, AnnoArgs, I) + end; +simplify(#b_set{op=wait_timeout,args=[Timeout0]}=I, Ts) -> + case simplify_arg(Timeout0, Ts) of + #b_literal{val=infinity} -> + I#b_set{op=wait,args=[]}; + Timeout -> + I#b_set{args=[Timeout]} + end; +simplify(#b_set{op=Op,args=Args0}=I, Ts) -> + Safe = case Op of + call -> true; + put_list -> true; + put_tuple -> true; + _ -> false + end, + case Safe of + true -> + Args = [simplify_arg(Arg, Ts) || Arg <- Args0], + I#b_set{args=Args}; + false -> + I + end. + +simplify_arg(#b_var{}=Arg, Ts) -> + Type = get_type(Arg, Ts), + case get_literal_from_type(Type) of + none -> Arg; + #b_literal{}=Lit -> Lit + end; +simplify_arg(Arg, _Ts) -> Arg. + +is_float_op('-', [float]) -> + true; +is_float_op('/', [_,_]) -> + true; +is_float_op(Op, [float,_Other]) -> + is_float_op_1(Op); +is_float_op(Op, [_Other,float]) -> + is_float_op_1(Op); +is_float_op(_, _) -> false. + +is_float_op_1('+') -> true; +is_float_op_1('-') -> true; +is_float_op_1('*') -> true; +is_float_op_1(_) -> false. + +anno_float_arg(float) -> float; +anno_float_arg(_) -> convert. + +opt_terminator(#b_br{bool=#b_literal{}}=Br, _Ts, _Ds) -> + Br; +opt_terminator(#b_br{bool=#b_var{name=V}=Var}=Br, Ts, Ds) -> + BoolType = get_type(Var, Ts), + case get_literal_from_type(BoolType) of + #b_literal{}=BoolLit -> + Br#b_br{bool=BoolLit}; + none -> + #{V:=Set} = Ds, + case Set of + #b_set{op={bif,'=:='},args=[Bool,#b_literal{val=true}]} -> + case t_is_boolean(get_type(Bool, Ts)) of + true -> + %% Bool =:= true ==> Bool + simplify_not(Br#b_br{bool=Bool}, Ts, Ds); + false -> + Br + end; + #b_set{} -> + simplify_not(Br, Ts, Ds) + end + end; +opt_terminator(#b_switch{arg=#b_literal{val=Val0}=Arg,fail=Fail,list=List}, + _Ts, _Ds) -> + {value,{_,L}} = search(fun({#b_literal{val=Val1},_}) -> + Val1 =:= Val0 + end, List ++ [{Arg,Fail}]), + #b_br{bool=#b_literal{val=true},succ=L,fail=L}; +opt_terminator(#b_switch{arg=V}=Sw0, Ts, Ds) -> + case get_literal_from_type(Ts) of + #b_literal{}=Lit -> + Sw = Sw0#b_switch{arg=Lit}, + opt_terminator(Sw, Ts, Ds); + none -> + case get_type(V, Ts) of + #t_integer{elements={_,_}=Range} -> + simplify_switch_int(Sw0, Range); + Type -> + case t_is_boolean(Type) of + true -> + case simplify_switch_bool(Sw0, Ts, Ds) of + #b_br{}=Br -> + opt_terminator(Br, Ts, Ds); + Sw -> + Sw + end; + false -> + Sw0 + end + end + end; +opt_terminator(#b_ret{}=Ret, _Ts, _Ds) -> + Ret. + +update_successors(#b_br{bool=#b_literal{val=false},fail=S}, Ts, D) -> + update_successor(S, Ts, D); +update_successors(#b_br{bool=#b_literal{val=true},succ=S}, Ts, D) -> + update_successor(S, Ts, D); +update_successors(#b_br{bool=#b_var{name=V},succ=Succ,fail=Fail}, Ts, D0) -> + D = update_successor(Fail, Ts#{V:=t_atom(false)}, D0), + SuccTs = infer_types(V, Ts, D0), + update_successor(Succ, SuccTs#{V:=t_atom(true)}, D); +update_successors(#b_switch{arg=#b_var{name=V},fail=Fail,list=List}, Ts, D0) -> + D = update_successor(Fail, Ts, D0), + foldl(fun({Val,S}, A) -> + T = get_type(Val, Ts), + update_successor(S, Ts#{V=>T}, A) + end, D, List); +update_successors(#b_ret{}, _Ts, D) -> D. + +update_successor(S, Ts0, #d{ls=Ls}=D) -> + case Ls of + #{S:=Ts1} -> + Ts = join_types(Ts0, Ts1), + D#d{ls=Ls#{S:=Ts}}; + #{} -> + D#d{ls=Ls#{S=>Ts0}} + end. + +update_types(#b_set{op=Op,dst=#b_var{name=Dst},args=Args}, Ts, Ds) -> + T = type(Op, Args, Ts, Ds), + Ts#{Dst=>T}. + +type(phi, Args, Ts, _Ds) -> + Types = [get_type(A, Ts) || {A,_} <- Args], + join(Types); +type({bif,'=:='}, [Same,Same], _Ts, _Ds) -> + t_atom(true); +type({bif,'=:='}, [_,_]=Args, Ts, _Ds) -> + case get_literals(Args, Ts) of + [#b_literal{val=Lit1},#b_literal{val=Lit2}] -> + t_atom(Lit1 =:= Lit2); + [_,_] -> + case meet(get_types(Args, Ts)) of + none -> t_atom(false); + _ -> t_boolean() + end + end; +type({bif,tuple_size}, [Src], Ts, _Ds) -> + case t_tuple_size(get_type(Src, Ts)) of + {exact,Size} -> + t_integer(Size); + _ -> + t_integer() + end; +type({bif,'band'}, Args, Ts, _Ds) -> + band_type(Args, Ts); +type({bif,Bif}, [Src]=Args, Ts, _Ds) -> + case get_type(Src, Ts) of + any -> + bif_type(Bif, Args); + Type -> + case will_succeed(Bif, Type) of + yes -> + t_atom(true); + no -> + t_atom(false); + maybe -> + bif_type(Bif, Args) + end + end; +type({bif,Bif}, Args, Ts, _Ds) -> + case bif_type(Bif, Args) of + number -> + arith_op_type(Args, Ts); + Type -> + Type + end; +type(bs_init, [#b_literal{val=Type}|Args], _Ts, _Ds) -> + case {Type,Args} of + {new,[_,#b_literal{val=Unit}]} -> + {binary,Unit}; + {append,[_,_,#b_literal{val=Unit}]} -> + {binary,Unit}; + {private_append,[_,_,#b_literal{val=Unit}]} -> + {binary,Unit} + end; +type(bs_extract, [Ctx], Ts, _Ds) -> + #t_bs_match{type=Type} = get_type(Ctx, Ts), + Type; +type(bs_match, Args, _Ts, _Ds) -> + #t_bs_match{type=bs_match_type(Args)}; +type(call, [#b_remote{mod=#b_literal{val=Mod}, + name=#b_literal{val=Name}}|Args], Ts, _Ds) -> + case {Mod,Name,Args} of + {erlang,setelement,[Pos,Tuple,_]} -> + case {get_type(Pos, Ts),get_type(Tuple, Ts)} of + {#t_integer{elements={MinIndex,_}},#t_tuple{}=T} + when MinIndex > 1 -> + %% First element is not updated. The result + %% will have the same type. + T; + {_,#t_tuple{}=T} -> + %% Position is 1 or unknown. May update the first + %% element of the tuple. + T#t_tuple{elements=[]}; + {#t_integer{elements={MinIndex,_}},_} -> + #t_tuple{size=MinIndex}; + {_,_} -> + #t_tuple{} + end; + {math,_,_} -> + case is_math_bif(Name, length(Args)) of + false -> any; + true -> float + end; + {_,_,_} -> + case erl_bifs:is_exit_bif(Mod, Name, length(Args)) of + true -> none; + false -> any + end + end; +type(get_tuple_element, [Tuple,#b_literal{val=0}], Ts, _Ds) -> + case get_type(Tuple, Ts) of + #t_tuple{elements=[First]} -> + get_type(#b_literal{val=First}, Ts); + #t_tuple{} -> + any + end; +type(is_nonempty_list, [Src], Ts, _Ds) -> + case get_type(Src, Ts) of + any -> + t_boolean(); + list -> + t_boolean(); + cons -> + t_atom(true); + _ -> + t_atom(false) + end; +type(is_tagged_tuple, [Src,#b_literal{val=Size},#b_literal{val=Tag}], Ts, _Ds) -> + case get_type(Src, Ts) of + #t_tuple{exact=true,size=Size,elements=[Tag]} -> + t_atom(true); + #t_tuple{exact=true,size=ActualSize,elements=[]} -> + if + Size =/= ActualSize -> + t_atom(false); + true -> + t_boolean() + end; + #t_tuple{exact=false} -> + t_boolean(); + any -> + t_boolean(); + _ -> + t_atom(false) + end; +type(put_list, _Args, _Ts, _Ds) -> + cons; +type(put_tuple, Args, _Ts, _Ds) -> + case Args of + [#b_literal{val=First}|_] -> + #t_tuple{exact=true,size=length(Args),elements=[First]}; + _ -> + #t_tuple{exact=true,size=length(Args)} + end; +type(succeeded, [#b_var{name=Src}], Ts, Ds) -> + case maps:get(Src, Ds) of + #b_set{op={bif,Bif},args=BifArgs} -> + Types = get_types(BifArgs, Ts), + case {Bif,Types} of + {byte_size,[{binary,_}]} -> + t_atom(true); + {bit_size,[{binary,_}]} -> + t_atom(true); + {map_size,[map]} -> + t_atom(true); + {'not',[Type]} -> + case t_is_boolean(Type) of + true -> t_atom(true); + false -> t_boolean() + end; + {size,[{binary,_}]} -> + t_atom(true); + {tuple_size,[#t_tuple{}]} -> + t_atom(true); + {_,_} -> + t_boolean() + end; + #b_set{op=get_hd} -> + t_atom(true); + #b_set{op=get_tl} -> + t_atom(true); + #b_set{op=get_tuple_element} -> + t_atom(true); + #b_set{op=wait} -> + t_atom(false); + #b_set{} -> + t_boolean() + end; +type(_, _, _, _) -> any. + +arith_op_type(Args, Ts) -> + Types = get_types(Args, Ts), + foldl(fun(#t_integer{}, unknown) -> t_integer(); + (#t_integer{}, number) -> number; + (#t_integer{}, float) -> float; + (#t_integer{}, #t_integer{}) -> t_integer(); + (float, unknown) -> float; + (float, #t_integer{}) -> float; + (float, number) -> float; + (number, unknown) -> number; + (number, #t_integer{}) -> number; + (number, float) -> float; + (any, _) -> number; + (_, any) -> number; + (Same, Same) -> Same; + (_, _) -> none + end, unknown, Types). + +%% will_succeed(TestOperation, Type) -> yes|no|maybe. +%% Test whether TestOperation applied to an argument of type Type +%% will succeed. Return yes, no, or maybe. +%% +%% Type is a type as described in the comment for verified_type/1 at +%% the very end of this file, but it will *never* be 'any'. + +will_succeed(is_atom, Type) -> + case Type of + #t_atom{} -> yes; + _ -> no + end; +will_succeed(is_binary, Type) -> + case Type of + {binary,U} when U rem 8 =:= 0 -> yes; + {binary,_} -> maybe; + _ -> no + end; +will_succeed(is_bitstring, Type) -> + case Type of + {binary,_} -> yes; + _ -> no + end; +will_succeed(is_boolean, Type) -> + case Type of + #t_atom{elements=any} -> + maybe; + #t_atom{elements=Es} -> + case t_is_boolean(Type) of + true -> + yes; + false -> + case any(fun is_boolean/1, Es) of + true -> maybe; + false -> no + end + end; + _ -> + no + end; +will_succeed(is_float, Type) -> + case Type of + float -> yes; + number -> maybe; + _ -> no + end; +will_succeed(is_integer, Type) -> + case Type of + #t_integer{} -> yes; + number -> maybe; + _ -> no + end; +will_succeed(is_list, Type) -> + case Type of + list -> yes; + cons -> yes; + nil -> yes; + _ -> no + end; +will_succeed(is_map, Type) -> + case Type of + map -> yes; + _ -> no + end; +will_succeed(is_number, Type) -> + case Type of + float -> yes; + #t_integer{} -> yes; + number -> yes; + _ -> no + end; +will_succeed(is_tuple, Type) -> + case Type of + #t_tuple{} -> yes; + _ -> no + end; +will_succeed(_, _) -> maybe. + + +band_type([#b_literal{val=Int},Other], Ts) when is_integer(Int) -> + band_type_1(Int, Other, Ts); +band_type([Other,#b_literal{val=Int}], Ts) when is_integer(Int) -> + band_type_1(Int, Other, Ts); +band_type([_,_], _) -> t_integer(). + +band_type_1(Int, OtherSrc, Ts) -> + Type = band_type_2(Int, 0), + OtherType = get_type(OtherSrc, Ts), + meet(Type, OtherType). + +band_type_2(N, Bits) when Bits < 64 -> + case 1 bsl Bits of + P when P =:= N + 1 -> + t_integer(0, N); + P when P > N + 1 -> + t_integer(); + _ -> + band_type_2(N, Bits+1) + end; +band_type_2(_, _) -> + %% Negative or large positive number. Give up. + t_integer(). + +bs_match_type([#b_literal{val=Type}|Args]) -> + bs_match_type(Type, Args). + +bs_match_type(binary, Args) -> + [_,_,_,#b_literal{val=U}] = Args, + {binary,U}; +bs_match_type(float, _) -> + float; +bs_match_type(integer, Args) -> + case Args of + [_, + #b_literal{val=Flags}, + #b_literal{val=Size}, + #b_literal{val=Unit}] when Size * Unit < 64 -> + NumBits = Size * Unit, + case member(unsigned, Flags) of + true -> + t_integer(0, (1 bsl NumBits)-1); + false -> + %% Signed integer. Don't bother. + t_integer() + end; + [_|_] -> + t_integer() + end; +bs_match_type(skip, _) -> + any; +bs_match_type(string, _) -> + any; +bs_match_type(utf8, _) -> + ?UNICODE_INT; +bs_match_type(utf16, _) -> + ?UNICODE_INT; +bs_match_type(utf32, _) -> + ?UNICODE_INT. + +simplify_switch_int(#b_switch{list=List0}=Sw, {Min,Max}) -> + List1 = sort(List0), + Vs = [V || {#b_literal{val=V},_} <- List1], + case eq_ranges(Vs, Min, Max) of + true -> + {_,LastL} = last(List1), + List = droplast(List1), + Sw#b_switch{fail=LastL,list=List}; + false -> + Sw + end. + +eq_ranges([H], H, H) -> true; +eq_ranges([H|T], H, Max) -> eq_ranges(T, H+1, Max); +eq_ranges(_, _, _) -> false. + +simplify_switch_bool(#b_switch{arg=B,list=List0}=Sw, Ts, Ds) -> + List = sort(List0), + case List of + [{#b_literal{val=false},Fail},{#b_literal{val=true},Succ}] -> + simplify_not(#b_br{bool=B,succ=Succ,fail=Fail}, Ts, Ds); + [_|_] -> + Sw + end. + +simplify_not(#b_br{bool=#b_var{name=V},succ=Succ,fail=Fail}=Br, Ts, Ds) -> + case Ds of + #{V:=#b_set{op={bif,'not'},args=[Bool]}} -> + case t_is_boolean(get_type(Bool, Ts)) of + true -> + Br#b_br{bool=Bool,succ=Fail,fail=Succ}; + false -> + Br + end; + #{} -> + Br + end. + +get_literals(Values, Ts) -> + [get_literal_from_type(get_type(Val, Ts)) || Val <- Values]. + +get_types(Values, Ts) -> + [get_type(Val, Ts) || Val <- Values]. + +-spec get_type(beam_ssa:value(), type_db()) -> type(). + +get_type(#b_var{name=V}, Ts) -> + #{V:=T} = Ts, + T; +get_type(#b_literal{val=Val}, _Ts) -> + if + is_atom(Val) -> + t_atom(Val); + is_float(Val) -> + float; + is_integer(Val) -> + t_integer(Val); + is_list(Val), Val =/= [] -> + cons; + is_map(Val) -> + map; + Val =:= {} -> + #t_tuple{exact=true}; + is_tuple(Val) -> + #t_tuple{exact=true,size=tuple_size(Val), + elements=[element(1, Val)]}; + Val =:= [] -> + nil; + true -> + any + end. + +infer_types(V, Ts, #d{ds=Ds}) -> + #{V:=#b_set{op=Op,args=Args}} = Ds, + Types = infer_type(Op, Args, Ds), + meet_types(Types, Ts). + +infer_type({bif,element}, [#b_literal{val=Pos},#b_var{name=Tuple}], _Ds) -> + if + is_integer(Pos), 1 =< Pos -> + [{Tuple,#t_tuple{size=Pos}}]; + true -> + [] + end; +infer_type({bif,'=:='}, [#b_var{name=Src},#b_literal{}=Lit], Ds) -> + Def = maps:get(Src, Ds), + Type = get_type(Lit, #{}), + [{Src,Type}|infer_tuple_size(Def, Lit) ++ + infer_first_element(Def, Lit)]; +infer_type({bif,Bif}, [#b_var{name=Src}]=Args, _Ds) -> + case inferred_bif_type(Bif, Args) of + any -> []; + T -> [{Src,T}] + end; +infer_type({bif,is_map_key}, [_,#b_var{name=Src}], _Ds) -> + [{Src,map}]; +infer_type({bif,map_get}, [_,#b_var{name=Src}], _Ds) -> + [{Src,map}]; +infer_type(bs_start_match, [#b_var{name=Bin}], _Ds) -> + [{Bin,{binary,1}}]; +infer_type(is_nonempty_list, [#b_var{name=Src}], _Ds) -> + [{Src,cons}]; +infer_type(is_tagged_tuple, [#b_var{name=Src},#b_literal{val=Size}, + #b_literal{val=Tag}], _Ds) -> + [{Src,#t_tuple{exact=true,size=Size,elements=[Tag]}}]; +infer_type(succeeded, [#b_var{name=Src}], Ds) -> + #b_set{op=Op,args=Args} = maps:get(Src, Ds), + infer_type(Op, Args, Ds); +infer_type(_Op, _Args, _Ds) -> + []. + +%% bif_type(Name, Args) -> Type +%% Return the return type for the guard BIF or operator Name with +%% arguments Args. +%% +%% Note that that the following BIFs are handle elsewhere: +%% +%% band/2 +%% tuple_size/1 + +bif_type(abs, [_]) -> number; +bif_type(bit_size, [_]) -> t_integer(); +bif_type(byte_size, [_]) -> t_integer(); +bif_type(ceil, [_]) -> t_integer(); +bif_type(float, [_]) -> float; +bif_type(floor, [_]) -> t_integer(); +bif_type(is_map_key, [_,_]) -> t_boolean(); +bif_type(length, [_]) -> t_integer(); +bif_type(map_size, [_]) -> t_integer(); +bif_type(round, [_]) -> t_integer(); +bif_type(size, [_]) -> t_integer(); +bif_type(trunc, [_]) -> t_integer(); +bif_type('bnot', [_]) -> t_integer(); +bif_type('bor', [_,_]) -> t_integer(); +bif_type('bsl', [_,_]) -> t_integer(); +bif_type('bsr', [_,_]) -> t_integer(); +bif_type('bxor', [_,_]) -> t_integer(); +bif_type('div', [_,_]) -> t_integer(); +bif_type('rem', [_,_]) -> t_integer(); +bif_type('/', [_,_]) -> float; +bif_type(Name, Args) -> + Arity = length(Args), + case erl_internal:new_type_test(Name, Arity) orelse + erl_internal:bool_op(Name, Arity) orelse + erl_internal:comp_op(Name, Arity) of + true -> + t_boolean(); + false -> + case erl_internal:arith_op(Name, Arity) of + true -> number; + false -> any + end + end. + +inferred_bif_type(is_atom, [_]) -> t_atom(); +inferred_bif_type(is_binary, [_]) -> {binary,8}; +inferred_bif_type(is_bitstring, [_]) -> {binary,1}; +inferred_bif_type(is_boolean, [_]) -> t_boolean(); +inferred_bif_type(is_float, [_]) -> float; +inferred_bif_type(is_integer, [_]) -> t_integer(); +inferred_bif_type(is_list, [_]) -> list; +inferred_bif_type(is_map, [_]) -> map; +inferred_bif_type(is_number, [_]) -> number; +inferred_bif_type(is_tuple, [_]) -> #t_tuple{}; +inferred_bif_type(abs, [_]) -> number; +inferred_bif_type(bit_size, [_]) -> {binary,1}; +inferred_bif_type(byte_size, [_]) -> {binary,1}; +inferred_bif_type(ceil, [_]) -> number; +inferred_bif_type(float, [_]) -> number; +inferred_bif_type(floor, [_]) -> number; +inferred_bif_type(round, [_]) -> number; +inferred_bif_type(trunc, [_]) -> number; +inferred_bif_type(tuple_size, [_]) -> #t_tuple{}; +inferred_bif_type(_, _) -> any. + +infer_tuple_size(#b_set{op={bif,tuple_size},args=[#b_var{name=Tuple}]}, + #b_literal{val=Size}) when is_integer(Size) -> + [{Tuple,#t_tuple{exact=true,size=Size}}]; +infer_tuple_size(_, _) -> []. + +infer_first_element(#b_set{op=get_tuple_element, + args=[#b_var{name=Tuple},#b_literal{val=0}]}, + #b_literal{val=First}) -> + [{Tuple,#t_tuple{size=1,elements=[First]}}]; +infer_first_element(_, _) -> []. + +is_math_bif(cos, 1) -> true; +is_math_bif(cosh, 1) -> true; +is_math_bif(sin, 1) -> true; +is_math_bif(sinh, 1) -> true; +is_math_bif(tan, 1) -> true; +is_math_bif(tanh, 1) -> true; +is_math_bif(acos, 1) -> true; +is_math_bif(acosh, 1) -> true; +is_math_bif(asin, 1) -> true; +is_math_bif(asinh, 1) -> true; +is_math_bif(atan, 1) -> true; +is_math_bif(atanh, 1) -> true; +is_math_bif(erf, 1) -> true; +is_math_bif(erfc, 1) -> true; +is_math_bif(exp, 1) -> true; +is_math_bif(log, 1) -> true; +is_math_bif(log2, 1) -> true; +is_math_bif(log10, 1) -> true; +is_math_bif(sqrt, 1) -> true; +is_math_bif(atan2, 2) -> true; +is_math_bif(pow, 2) -> true; +is_math_bif(ceil, 1) -> true; +is_math_bif(floor, 1) -> true; +is_math_bif(fmod, 2) -> true; +is_math_bif(pi, 0) -> true; +is_math_bif(_, _) -> false. + +join_types(Ts0, Ts1) -> + if + map_size(Ts0) < map_size(Ts1) -> + join_types_1(maps:keys(Ts0), Ts1, Ts0); + true -> + join_types_1(maps:keys(Ts1), Ts0, Ts1) + end. + +join_types_1([V|Vs], Ts0, Ts1) -> + case {Ts0,Ts1} of + {#{V:=Same},#{V:=Same}} -> + join_types_1(Vs, Ts0, Ts1); + {#{V:=T0},#{V:=T1}} -> + case join(T0, T1) of + T1 -> + join_types_1(Vs, Ts0, Ts1); + T -> + join_types_1(Vs, Ts0, Ts1#{V:=T}) + end; + {#{},#{V:=_}} -> + join_types_1(Vs, Ts0, Ts1) + end; +join_types_1([], Ts0, Ts1) -> + maps:merge(Ts0, Ts1). + +join([T1,T2|Ts]) -> + join([join(T1, T2)|Ts]); +join([T]) -> T. + +get_literal_from_type(#t_atom{elements=[Atom]}) -> + #b_literal{val=Atom}; +get_literal_from_type(#t_integer{elements={Int,Int}}) -> + #b_literal{val=Int}; +get_literal_from_type(nil) -> + #b_literal{val=[]}; +get_literal_from_type(_) -> none. + +t_atom() -> + #t_atom{elements=any}. + +t_atom(Atom) when is_atom(Atom) -> + #t_atom{elements=[Atom]}. + +t_boolean() -> + #t_atom{elements=[false,true]}. + +t_integer() -> + #t_integer{elements=any}. + +t_integer(Int) when is_integer(Int) -> + #t_integer{elements={Int,Int}}. + +t_integer(Min, Max) when is_integer(Min), is_integer(Max) -> + #t_integer{elements={Min,Max}}. + +t_is_boolean(#t_atom{elements=[F,T]}) -> + F =:= false andalso T =:= true; +t_is_boolean(#t_atom{elements=[B]}) -> + is_boolean(B); +t_is_boolean(_) -> false. + +t_tuple_size(#t_tuple{size=Size,exact=false}) -> + {at_least,Size}; +t_tuple_size(#t_tuple{size=Size,exact=true}) -> + {exact,Size}; +t_tuple_size(_) -> + none. + +%% join(Type1, Type2) -> Type +%% Return the "join" of Type1 and Type2. The join is a more general +%% type than Type1 and Type2. For example: +%% +%% join(#t_integer{elements=any}, #t_integer=elements={0,3}}) -> +%% #t_integer{} +%% +%% The join for two different types result in 'any', which is +%% the top element for our type lattice: +%% +%% join(#t_integer{}, map) -> any + +-spec join(type(), type()) -> type(). + +join(T, T) -> + verified_type(T); +join(none, T) -> + verified_type(T); +join(T, none) -> + verified_type(T); +join(any, _) -> any; +join(_, any) -> any; +join(#t_atom{elements=[_|_]=Set1}, #t_atom{elements=[_|_]=Set2}) -> + Set = ordsets:union(Set1, Set2), + case ordsets:size(Set) of + Size when Size =< ?ATOM_SET_SIZE -> + #t_atom{elements=Set}; + _Size -> + #t_atom{elements=any} + end; +join(#t_atom{elements=any}=T, #t_atom{elements=[_|_]}) -> T; +join(#t_atom{elements=[_|_]}, #t_atom{elements=any}=T) -> T; +join({binary,U1}, {binary,U2}) -> + {binary,gcd(U1, U2)}; +join(#t_integer{}, #t_integer{}) -> t_integer(); +join(list, cons) -> list; +join(cons, list) -> list; +join(nil, cons) -> list; +join(cons, nil) -> list; +join(nil, list) -> list; +join(list, nil) -> list; +join(#t_integer{}, float) -> number; +join(float, #t_integer{}) -> number; +join(#t_integer{}, number) -> number; +join(number, #t_integer{}) -> number; +join(float, number) -> number; +join(number, float) -> number; +join(#t_tuple{size=Sz,exact=Exact1}, #t_tuple{size=Sz,exact=Exact2}) -> + Exact = Exact1 and Exact2, + #t_tuple{size=Sz,exact=Exact}; +join(#t_tuple{size=Sz1}, #t_tuple{size=Sz2}) -> + #t_tuple{size=min(Sz1, Sz2)}; +join(_T1, _T2) -> + %%io:format("~p ~p\n", [_T1,_T2]), + any. + +gcd(A, B) -> + case A rem B of + 0 -> B; + X -> gcd(B, X) + end. + +meet_types([{V,T0}|Vs], Ts) -> + #{V:=T1} = Ts, + T = meet(T0, T1), + meet_types(Vs, Ts#{V:=T}); +meet_types([], Ts) -> Ts. + +meet([T1,T2|Ts]) -> + meet([meet(T1, T2)|Ts]); +meet([T]) -> T. + +%% meet(Type1, Type2) -> Type +%% Return the "meet" of Type1 and Type2. The meet is a narrower +%% type than Type1 and Type2. For example: +%% +%% meet(#t_integer{elements=any}, #t_integer{elements={0,3}}) -> +%% #t_integer{elements={0,3}} +%% +%% The meet for two different types result in 'none', which is +%% the bottom element for our type lattice: +%% +%% meet(#t_integer{}, map) -> none + +-spec meet(type(), type()) -> type(). + +meet(T, T) -> + verified_type(T); +meet(#t_atom{elements=[_|_]=Set1}, #t_atom{elements=[_|_]=Set2}) -> + case ordsets:intersection(Set1, Set2) of + [] -> + none; + [_|_]=Set -> + #t_atom{elements=Set} + end; +meet(#t_atom{elements=[_|_]}=T, #t_atom{elements=any}) -> + T; +meet(#t_atom{elements=any}, #t_atom{elements=[_|_]}=T) -> + T; +meet(#t_integer{elements={_,_}}=T, #t_integer{elements=any}) -> + T; +meet(#t_integer{elements=any}, #t_integer{elements={_,_}}=T) -> + T; +meet(#t_integer{elements={Min1,Max1}}, + #t_integer{elements={Min2,Max2}}) -> + #t_integer{elements={max(Min1, Min2),min(Max1, Max2)}}; +meet(#t_integer{}=T, number) -> T; +meet(float, number) -> float; +meet(#t_integer{}=T, number) -> T; +meet(float, number) -> float; +meet(number, #t_integer{}=T) -> T; +meet(#t_integer{}=T, number) -> T; +meet(number, float=T) -> T; +meet(float=T, number) -> T; +meet(list, cons) -> cons; +meet(list, nil) -> nil; +meet(cons, list) -> cons; +meet(nil, list) -> nil; +meet(#t_tuple{}=T1, #t_tuple{}=T2) -> + meet_tuples(T1, T2); +meet({binary,U1}, {binary,U2}) -> + {binary,max(U1, U2)}; +meet(any, T) -> + verified_type(T); +meet(T, any) -> + verified_type(T); +meet(_, _) -> + %% Inconsistent types. There will be an exception at runtime. + none. + +meet_tuples(#t_tuple{elements=[E1]}, #t_tuple{elements=[E2]}) + when E1 =/= E2 -> + none; +meet_tuples(#t_tuple{size=Sz1,exact=true}, + #t_tuple{size=Sz2,exact=true}) when Sz1 =/= Sz2 -> + none; +meet_tuples(#t_tuple{size=Sz1,exact=Ex1,elements=Es1}, + #t_tuple{size=Sz2,exact=Ex2,elements=Es2}) -> + Size = max(Sz1, Sz2), + Exact = Ex1 or Ex2, + Es = case {Es1,Es2} of + {[],[_|_]} -> Es2; + {[_|_],[]} -> Es1; + {_,_} -> Es1 + end, + #t_tuple{size=Size,exact=Exact,elements=Es}. + +%% verified_type(Type) -> Type +%% Returns the passed in type if it is one of the defined types. +%% Crashes if there is anything wrong with the type. +%% +%% Here are all possible types: +%% +%% any Any Erlang term (top element for the type lattice). +%% +%% #t_atom{} Any atom or some specific atoms. +%% {binary,Unit} Binary/bitstring aligned to unit Unit. +%% float Floating point number. +%% #t_integer{} Integer +%% list Empty or nonempty list. +%% map Map. +%% nil Empty list. +%% cons Cons (nonempty list). +%% number A number (float or integer). +%% #t_tuple{} Tuple. +%% +%% none No type (bottom element for the type lattice). + +-spec verified_type(T) -> T when + T :: type(). + +verified_type(any=T) -> T; +verified_type(none=T) -> T; +verified_type(#t_atom{elements=any}=T) -> T; +verified_type(#t_atom{elements=[_|_]}=T) -> T; +verified_type({binary,U}=T) when is_integer(U) -> T; +verified_type(#t_integer{elements=any}=T) -> T; +verified_type(#t_integer{elements={Min,Max}}=T) + when is_integer(Min), is_integer(Max) -> T; +verified_type(list=T) -> T; +verified_type(map=T) -> T; +verified_type(nil=T) -> T; +verified_type(cons=T) -> T; +verified_type(number=T) -> T; +verified_type(#t_tuple{}=T) -> T; +verified_type(float=T) -> T. diff --git a/lib/compiler/src/beam_utils.erl b/lib/compiler/src/beam_utils.erl index af1e719206..686d314c2d 100644 --- a/lib/compiler/src/beam_utils.erl +++ b/lib/compiler/src/beam_utils.erl @@ -193,8 +193,7 @@ bif_to_test('=:=', [C,A], Fail) when ?is_const(C) -> bif_to_test('=:=', [_,_]=Ops, Fail) -> {test,is_eq_exact,Fail,Ops}; bif_to_test('=/=', [C,A], Fail) when ?is_const(C) -> {test,is_ne_exact,Fail,[A,C]}; -bif_to_test('=/=', [_,_]=Ops, Fail) -> {test,is_ne_exact,Fail,Ops}; -bif_to_test(is_record, [_,_,_]=Ops, Fail) -> {test,is_record,Fail,Ops}. +bif_to_test('=/=', [_,_]=Ops, Fail) -> {test,is_ne_exact,Fail,Ops}. %% is_pure_test({test,Op,Fail,Ops}) -> true|false. diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl index 5fdea23a26..3a835cfb2f 100644 --- a/lib/compiler/src/compile.erl +++ b/lib/compiler/src/compile.erl @@ -31,6 +31,9 @@ %% Erlc interface. -export([compile/3,compile_beam/3,compile_asm/3,compile_core/3]). +%% Utility functions for compiler passes. +-export([run_sub_passes/2]). + -export_type([option/0]). -include("erl_compile.hrl"). @@ -39,6 +42,8 @@ -import(lists, [member/2,reverse/1,reverse/2,keyfind/3,last/1, map/2,flatmap/2,foreach/2,foldr/3,any/2]). +-define(SUB_PASS_TIMES, compile__sub_pass_times). + %%---------------------------------------------------------------------- -type abstract_code() :: [erl_parse:abstract_form()]. @@ -64,6 +69,7 @@ -type err_ret() :: 'error' | {'error', errors(), warnings()}. -type comp_ret() :: mod_ret() | bin_ret() | err_ret(). + %%---------------------------------------------------------------------- %% @@ -143,6 +149,30 @@ noenv_output_generated(Opts) -> env_compiler_options() -> env_default_opts(). + +%%% +%%% Run sub passes from a compiler pass. +%%% + +-spec run_sub_passes([term()], term()) -> term(). + +run_sub_passes(Ps, St) -> + case get(?SUB_PASS_TIMES) of + undefined -> + Runner = fun(_Name, Run, S) -> Run(S) end, + run_sub_passes_1(Ps, Runner, St); + Times when is_list(Times) -> + Runner = fun(Name, Run, S0) -> + T1 = erlang:monotonic_time(), + S = Run(S0), + T2 = erlang:monotonic_time(), + put(?SUB_PASS_TIMES, + [{Name,T2-T1}|get(?SUB_PASS_TIMES)]), + S + end, + run_sub_passes_1(Ps, Runner, St) + end. + %% %% Local functions %% @@ -219,22 +249,24 @@ expand_opt(report, Os) -> expand_opt(return, Os) -> [return_errors,return_warnings|Os]; expand_opt(r16, Os) -> - [no_get_hd_tl,no_record_opt,no_utf8_atoms|Os]; + expand_opt_before_21(Os); expand_opt(r17, Os) -> - [no_get_hd_tl,no_record_opt,no_utf8_atoms|Os]; + expand_opt_before_21(Os); expand_opt(r18, Os) -> - [no_get_hd_tl,no_record_opt,no_utf8_atoms|Os]; + expand_opt_before_21(Os); expand_opt(r19, Os) -> - [no_get_hd_tl,no_record_opt,no_utf8_atoms|Os]; + expand_opt_before_21(Os); expand_opt(r20, Os) -> - [no_get_hd_tl,no_record_opt,no_utf8_atoms|Os]; + expand_opt_before_21(Os); +expand_opt(r21, Os) -> + Os; expand_opt({debug_info_key,_}=O, Os) -> [encrypt_debug_info,O|Os]; -expand_opt(no_float_opt, Os) -> - %%Turn off the entire type optimization pass. - [no_topt|Os]; expand_opt(O, Os) -> [O|Os]. +expand_opt_before_21(Os) -> + [no_get_hd_tl,no_ssa_opt_record,no_utf8_atoms|Os]. + %% format_error(ErrorDescriptor) -> string() -spec format_error(term()) -> iolist(). @@ -387,17 +419,57 @@ fold_comp([{Name,Pass}|Ps], Run, Code0, St0) -> end; fold_comp([], _Run, Code, St) -> {ok,Code,St}. +run_sub_passes_1([{Name,Run}|Ps], Runner, St0) + when is_atom(Name), is_function(Run, 1) -> + try Runner(Name, Run, St0) of + St -> + run_sub_passes_1(Ps, Runner, St) + catch + C:E:Stk -> + io:format("Sub pass ~s\n", [Name]), + erlang:raise(C, E, Stk) + end; +run_sub_passes_1([], _, St) -> St. + run_tc({Name,Fun}, Code, St) -> + put(?SUB_PASS_TIMES, []), T1 = erlang:monotonic_time(), Val = (catch Fun(Code, St)), T2 = erlang:monotonic_time(), - Elapsed = erlang:convert_time_unit(T2 - T1, native, millisecond), + Times = erase(?SUB_PASS_TIMES), + Elapsed = erlang:convert_time_unit(T2 - T1, native, microsecond), Mem0 = erts_debug:flat_size(Val)*erlang:system_info(wordsize), Mem = lists:flatten(io_lib:format("~.1f kB", [Mem0/1024])), io:format(" ~-30s: ~10.3f s ~12s\n", - [Name,Elapsed/1000,Mem]), + [Name,Elapsed/1000000,Mem]), + print_times(Times, Name), Val. +print_times(Times0, Name) -> + Fam0 = sofs:relation(Times0), + Fam1 = sofs:rel2fam(Fam0), + Fam2 = sofs:to_external(Fam1), + Fam3 = [{W,lists:sum(Times)} || {W,Times} <- Fam2], + Fam = reverse(lists:keysort(2, Fam3)), + Total = case lists:sum([T || {_,T} <- Fam]) of + 0 -> 1; + Total0 -> Total0 + end, + case Fam of + [] -> + ok; + [_|_] -> + io:format(" %% Sub passes of ~s from slowest to fastest:\n", [Name]), + print_times_1(Fam, Total) + end. + +print_times_1([{Name,T}|Ts], Total) -> + Elapsed = erlang:convert_time_unit(T, native, microsecond), + io:format(" ~-27s: ~10.3f s ~3w %\n", + [Name,Elapsed/1000000,round(100*T/Total)]), + print_times_1(Ts, Total); +print_times_1([], _Total) -> ok. + run_eprof({Name,Fun}, Code, Name, St) -> io:format("~p: Running eprof\n", [Name]), c:appcall(tools, eprof, start_profiling, [[self()]]), @@ -741,8 +813,20 @@ kernel_passes() -> ?pass(v3_kernel), {iff,dkern,{listing,"kernel"}}, {iff,'to_kernel',{done,"kernel"}}, - {pass,v3_codegen}, - {iff,dcg,{listing,"codegen"}} + {pass,beam_kernel_to_ssa}, + {iff,dssa,{listing,"ssa"}}, + {iff,ssalint,{pass,beam_ssa_lint}}, + {unless,no_ssa_opt,{pass,beam_ssa_opt}}, + {iff,dssaopt,{listing,"ssaopt"}}, + {iff,ssalint,{pass,beam_ssa_lint}}, + {unless,no_recv_opt,{pass,beam_ssa_recv}}, + {iff,drecv,{listing,"recv"}}, + {pass,beam_ssa_pre_codegen}, + {iff,dprecg,{listing,"precodegen"}}, + {iff,ssalint,{pass,beam_ssa_lint}}, + {pass,beam_ssa_codegen}, + {iff,dcg,{listing,"codegen"}}, + {iff,doldcg,{listing,"codegen"}} | asm_passes()]. asm_passes() -> @@ -1958,8 +2042,15 @@ pre_load() -> beam_except, beam_flatten, beam_jump, + beam_kernel_to_ssa, beam_opcodes, beam_peep, + beam_ssa, + beam_ssa_codegen, + beam_ssa_opt, + beam_ssa_pre_codegen, + beam_ssa_recv, + beam_ssa_type, beam_split, beam_trim, beam_utils, @@ -1980,7 +2071,6 @@ pre_load() -> sys_core_bsm, sys_core_dsetel, sys_core_fold, - v3_codegen, v3_core, v3_kernel], _ = code:ensure_modules_loaded(L), diff --git a/lib/compiler/src/compiler.app.src b/lib/compiler/src/compiler.app.src index 9fa5a1c6ea..74529f7fef 100644 --- a/lib/compiler/src/compiler.app.src +++ b/lib/compiler/src/compiler.app.src @@ -33,9 +33,18 @@ beam_except, beam_flatten, beam_jump, + beam_kernel_to_ssa, beam_listing, beam_opcodes, beam_peep, + beam_ssa, + beam_ssa_codegen, + beam_ssa_lint, + beam_ssa_opt, + beam_ssa_pp, + beam_ssa_pre_codegen, + beam_ssa_recv, + beam_ssa_type, beam_split, beam_trim, beam_utils, @@ -61,7 +70,6 @@ sys_core_fold_lists, sys_core_inline, sys_pre_attributes, - v3_codegen, v3_core, v3_kernel, v3_kernel_pp diff --git a/lib/compiler/src/v3_kernel.erl b/lib/compiler/src/v3_kernel.erl index aef0b6cc9f..fe8e252e5a 100644 --- a/lib/compiler/src/v3_kernel.erl +++ b/lib/compiler/src/v3_kernel.erl @@ -2043,9 +2043,6 @@ get_match(#k_cons{}, St0) -> get_match(#k_binary{}, St0) -> {[V]=Mes,St1} = new_vars(1, St0), {#k_binary{segs=V},Mes,St1}; -get_match(#k_bin_seg{size=#k_atom{val=all},next={k_bin_end,[]}}=Seg, St0) -> - {[S]=Vars,St1} = new_vars(1, St0), - {Seg#k_bin_seg{seg=S,next=[]},Vars,St1}; get_match(#k_bin_seg{}=Seg, St0) -> {[S,N0],St1} = new_vars(2, St0), N = set_kanno(N0, [no_usage]), @@ -2073,9 +2070,6 @@ new_clauses(Cs0, U, St) -> #k_cons{hd=H,tl=T} -> [H,T|As]; #k_tuple{es=Es} -> Es ++ As; #k_binary{segs=E} -> [E|As]; - #k_bin_seg{size=#k_atom{val=all}, - seg=S,next={k_bin_end,[]}} -> - [S|As]; #k_bin_seg{seg=S,next=N} -> [S,N|As]; #k_bin_int{next=N} -> @@ -2374,9 +2368,10 @@ uexpr(#k_try{anno=A,arg=A0,vars=Vs,body=B0,evars=Evs,handler=H0}, true -> {[#k_var{name=X}],#k_var{name=X}} = {Vs,B0}, %Assertion. #k_atom{val=false} = H0, %Assertion. - {A1,Bu,St1} = uexpr(A0, Br, St0), + {Avs,St1} = new_vars(length(Rs0), St0), + {A1,Bu,St} = uexpr(A0, {break,Avs}, St1), {#k_protected{anno=#k{us=Bu,ns=lit_list_vars(Rs0),a=A}, - arg=A1,ret=Rs0},Bu,St1}; + arg=A1,ret=Rs0,inner=Avs},Bu,St}; false -> {Avs,St1} = new_vars(length(Vs), St0), {A1,Au,St2} = ubody(A0, {break,Avs}, St1), diff --git a/lib/compiler/src/v3_kernel.hrl b/lib/compiler/src/v3_kernel.hrl index e6f0d3c1f7..e26360a6da 100644 --- a/lib/compiler/src/v3_kernel.hrl +++ b/lib/compiler/src/v3_kernel.hrl @@ -66,7 +66,7 @@ -record(k_receive_next, {anno=[]}). -record(k_try, {anno=[],arg,vars,body,evars,handler,ret=[]}). -record(k_try_enter, {anno=[],arg,vars,body,evars,handler}). --record(k_protected, {anno=[],arg,ret=[]}). +-record(k_protected, {anno=[],arg,ret=[],inner}). -record(k_catch, {anno=[],body,ret=[]}). -record(k_guard_match, {anno=[],vars,body,ret=[]}). diff --git a/lib/compiler/test/Makefile b/lib/compiler/test/Makefile index da5d207db9..2a5004aa4c 100644 --- a/lib/compiler/test/Makefile +++ b/lib/compiler/test/Makefile @@ -14,6 +14,7 @@ MODULES= \ beam_except_SUITE \ beam_jump_SUITE \ beam_reorder_SUITE \ + beam_ssa_SUITE \ beam_type_SUITE \ beam_utils_SUITE \ bif_SUITE \ @@ -52,6 +53,7 @@ NO_OPT= \ beam_except \ beam_jump \ beam_reorder \ + beam_ssa \ beam_type \ beam_utils \ bif \ @@ -75,6 +77,7 @@ INLINE= \ andor \ apply \ beam_block \ + beam_ssa \ beam_utils \ bif \ bs_bincomp \ @@ -124,7 +127,7 @@ RELSYSDIR = $(RELEASE_PATH)/compiler_test # ---------------------------------------------------- ERL_MAKE_FLAGS += -ERL_COMPILE_FLAGS += +clint +clint0 +ERL_COMPILE_FLAGS += +clint +clint0 +ssalint EBIN = . @@ -135,7 +138,8 @@ EBIN = . make_emakefile: $(NO_OPT_ERL_FILES) $(POST_OPT_ERL_FILES) $(INLINE_ERL_FILES) $(ERL_TOP)/make/make_emakefile $(ERL_COMPILE_FLAGS) -o$(EBIN) $(MODULES) \ > $(EMAKEFILE) - $(ERL_TOP)/make/make_emakefile +no_copt +no_postopt $(ERL_COMPILE_FLAGS) \ + $(ERL_TOP)/make/make_emakefile +no_copt +no_postopt \ + +no_ssa_opt +no_recv_opt $(ERL_COMPILE_FLAGS) \ -o$(EBIN) $(NO_OPT_MODULES) >> $(EMAKEFILE) $(ERL_TOP)/make/make_emakefile +no_copt $(ERL_COMPILE_FLAGS) \ -o$(EBIN) $(POST_OPT_MODULES) >> $(EMAKEFILE) diff --git a/lib/compiler/test/beam_ssa_SUITE.erl b/lib/compiler/test/beam_ssa_SUITE.erl new file mode 100644 index 0000000000..0356c99c5a --- /dev/null +++ b/lib/compiler/test/beam_ssa_SUITE.erl @@ -0,0 +1,290 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2018. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +-module(beam_ssa_SUITE). + +-export([all/0,suite/0,groups/0,init_per_suite/1,end_per_suite/1, + init_per_group/2,end_per_group/2, + calls/1,tuple_matching/1,recv/1,maps/1]). + +suite() -> [{ct_hooks,[ts_install_cth]}]. + +all() -> + [{group,p}]. + +groups() -> + [{p,test_lib:parallel(), + [tuple_matching, + calls, + recv, + maps + ]}]. + +init_per_suite(Config) -> + test_lib:recompile(?MODULE), + Config. + +end_per_suite(_Config) -> + ok. + +init_per_group(_GroupName, Config) -> + Config. + +end_per_group(_GroupName, Config) -> + Config. + +calls(Config) -> + Ret = {return,value,Config}, + Ret = fun_call(fun(42) -> ok end, Ret), + Ret = apply_fun(fun(a, b) -> ok end, [a,b], Ret), + Ret = apply_mfa(test_lib, id, [anything], Ret), + {'EXIT',{badarg,_}} = (catch call_error()), + {'EXIT',{badarg,_}} = (catch call_error(42)), + 5 = start_it([erlang,length,1,2,3,4,5]), + ok. + +fun_call(Fun, X0) -> + X = id(X0), + Fun(42), + X. + +apply_fun(Fun, Args, X0) -> + X = id(X0), + apply(Fun, Args), + X. + +apply_mfa(Mod, Name, Args, X0) -> + X = id(X0), + apply(Mod, Name, Args), + X. + +call_error() -> + error(badarg), + ok. + +call_error(I) -> + <>, + ok. + +start_it([_|_]=MFA) -> + case MFA of + [M,F|Args] -> M:F(Args) + end. + +tuple_matching(_Config) -> + do_tuple_matching({tag,42}). + +do_tuple_matching(Arg) -> + Res = do_tuple_matching_1(Arg), + Res = do_tuple_matching_2(Arg), + Res = do_tuple_matching_3(Arg), + Res. + +do_tuple_matching_1({tag,V}) -> + {ok,V}. + +do_tuple_matching_2(Tuple) when is_tuple(Tuple) -> + Size = tuple_size(Tuple), + if + Size =:= 2 -> + {ok,element(2, Tuple)} + end. + +do_tuple_matching_3(Tuple) when is_tuple(Tuple) -> + Size = tuple_size(Tuple), + if + Size =:= 2 -> + 2 = id(Size), + {ok,element(2, Tuple)} + end. + +-record(reporter_state, {res,run_config}). +-record(run_config, {report_interval=0}). + +recv(_Config) -> + Parent = self(), + + %% Test sync_wait_mon/2. + Succ = fun() -> Parent ! {ack,self(),{result,42}} end, + {result,42} = sync_wait_mon(spawn_monitor(Succ), infinity), + + Down = fun() -> exit(down) end, + {error,down} = sync_wait_mon(spawn_monitor(Down), infinity), + + Exit = fun() -> + Self = self(), + spawn(fun() -> exit(Self, kill_me) end), + receive _ -> ok end + end, + {error,kill_me} = sync_wait_mon(spawn_monitor(Exit), infinity), + + Timeout = fun() -> receive _ -> ok end end, + {error,timeout} = sync_wait_mon(spawn_monitor(Timeout), 0), + + %% Test reporter_loop/1. + {a,Parent} = reporter_loop(#reporter_state{res={a,Parent}, + run_config=#run_config{}}), + + %% Test bad_sink/0. + bad_sink(), + + %% Test tricky_recv_1/0. + self() ! 1, + a = tricky_recv_1(), + self() ! 2, + b = tricky_recv_1(), + + %% Test tricky_recv_2/0. + self() ! 1, + {1,yes} = tricky_recv_2(), + self() ! 2, + {2,maybe} = tricky_recv_2(), + + %% Test 'receive after infinity' in try/catch. + Pid = spawn(fun recv_after_inf_in_try/0), + exit(Pid, done), + + %% Test tricky_recv_3(). + self() ! {{self(),r0},{1,42,"name"}}, + {Parent,r0,[<<1:32,1:8,42:8>>,"name",0]} = tricky_recv_3(), + self() ! {{self(),r1},{2,99,<<"data">>}}, + {Parent,r1,<<1:32,2:8,99:8,"data">>} = tricky_recv_3(), + + %% Test tricky_recv_4(). + self() ! {[self(),r0],{1,42,"name"}}, + {Parent,r0,[<<1:32,1:8,42:8>>,"name",0]} = tricky_recv_4(), + self() ! {[self(),r1],{2,99,<<"data">>}}, + {Parent,r1,<<1:32,2:8,99:8,"data">>} = tricky_recv_4(), + + ok. + +sync_wait_mon({Pid, Ref}, Timeout) -> + receive + {ack,Pid,Return} -> + erlang:demonitor(Ref, [flush]), + Return; + {'DOWN',Ref,_Type,Pid,Reason} -> + {error,Reason}; + {'EXIT',Pid,Reason} -> + erlang:demonitor(Ref, [flush]), + {error,Reason} + after Timeout -> + erlang:demonitor(Ref, [flush]), + exit(Pid, kill), + {error,timeout} + end. + +reporter_loop(State) -> + RC = State#reporter_state.run_config, + receive after RC#run_config.report_interval -> + State#reporter_state.res + end. + +bad_sink() -> + {ok,Pid} = my_spawn(self()), + %% The get_tuple_element instruction for the matching + %% above was sinked into the receive loop. That will + %% not work (and would be bad for performance if it + %% would work). + receive + {ok,Pid} -> + ok; + error -> + exit(failed) + end, + exit(Pid, kill). + +my_spawn(Parent) -> + Pid = spawn(fun() -> + Parent ! {ok,self()}, + receive _ -> ok end + end), + {ok,Pid}. + +tricky_recv_1() -> + receive + X=1 -> + id(42), + a; + X=2 -> + b + end, + case X of + 1 -> a; + 2 -> b + end. + +tricky_recv_2() -> + receive + X=1 -> + Y = case id(X) of + 1 -> yes; + _ -> no + end, + a; + X=2 -> + Y = maybe, + b + end, + {X,Y}. + +recv_after_inf_in_try() -> + try + %% Used to crash beam_kernel_to_ssa. + receive after infinity -> ok end + catch + _A:_B -> + receive after infinity -> ok end + end. + +tricky_recv_3() -> + {Pid, R, Request} = + receive + {{Pid0,R0}, {1, Proto0, Name0}} -> + {Pid0, R0, + [<<1:32, 1:8, Proto0:8>>,Name0,0]}; + {{Pid1,R1}, {2, Proto1, Data1}} -> + {Pid1, R1, + <<1:32, 2:8, Proto1:8, Data1/binary>>} + end, + id({Pid,R,Request}). + +tricky_recv_4() -> + {Pid, R, Request} = + receive + {[Pid0,R0], {1, Proto0, Name0}} -> + {Pid0, R0, + [<<1:32, 1:8, Proto0:8>>,Name0,0]}; + {[Pid1,R1], {2, Proto1, Data1}} -> + {Pid1, R1, + <<1:32, 2:8, Proto1:8, Data1/binary>>} + end, + id({Pid,R,Request}). + +maps(_Config) -> + {'EXIT',{{badmatch,#{}},_}} = (catch maps_1(any)), + ok. + +maps_1(K) -> + _ = id(42), + #{K:=V} = #{}, + V. + +%% The identity function. +id(I) -> I. diff --git a/lib/compiler/test/beam_type_SUITE.erl b/lib/compiler/test/beam_type_SUITE.erl index 061076b3ff..caf43dad40 100644 --- a/lib/compiler/test/beam_type_SUITE.erl +++ b/lib/compiler/test/beam_type_SUITE.erl @@ -23,7 +23,8 @@ init_per_group/2,end_per_group/2, integers/1,coverage/1,booleans/1,setelement/1,cons/1, tuple/1,record_float/1,binary_float/1,float_compare/1, - arity_checks/1,elixir_binaries/1,find_best/1]). + arity_checks/1,elixir_binaries/1,find_best/1, + test_size/1]). suite() -> [{ct_hooks,[ts_install_cth]}]. @@ -43,7 +44,8 @@ groups() -> float_compare, arity_checks, elixir_binaries, - find_best + find_best, + test_size ]}]. init_per_suite(Config) -> @@ -177,11 +179,50 @@ setelement(_Config) -> cons(_Config) -> [did] = cons(assigned, did), + + true = cons_is_empty_list([]), + false = cons_is_empty_list([a]), + + false = cons_not(true), + true = cons_not(false), + + {$a,"bc"} = cons_hdtl(true), + {$d,"ef"} = cons_hdtl(false), ok. cons(assigned, Instrument) -> [Instrument] = [did]. +cons_is_empty_list(L) -> + Cons = case L of + [] -> "true"; + _ -> "false" + end, + id(1), + case Cons of + "true" -> true; + "false" -> false + end. + +cons_not(B) -> + Cons = case B of + true -> "true"; + false -> "false" + end, + id(1), + case Cons of + "true" -> false; + "false" -> true + end. + +cons_hdtl(B) -> + Cons = case B of + true -> "abc"; + false -> "def" + end, + id(1), + {id(hd(Cons)),id(tl(Cons))}. + tuple(_Config) -> {'EXIT',{{badmatch,{necessary}},_}} = (catch do_tuple()), ok. @@ -320,6 +361,15 @@ find_best([], <<"a">>) -> find_best([], nil) -> {error,<<"should not get here">>}. +test_size(Config) -> + 2 = do_test_size({a,b}), + 4 = do_test_size(<<42:32>>), + ok. + +do_test_size(Term) when is_tuple(Term) -> + size(Term); +do_test_size(Term) when is_binary(Term) -> + size(Term). id(I) -> I. diff --git a/lib/compiler/test/bs_match_SUITE.erl b/lib/compiler/test/bs_match_SUITE.erl index 7814738449..b4277f0705 100644 --- a/lib/compiler/test/bs_match_SUITE.erl +++ b/lib/compiler/test/bs_match_SUITE.erl @@ -248,6 +248,12 @@ bin_tail(Config) when is_list(Config) -> ok = bin_tail_e(<<2:2,1:1,1:5,42:64>>), error = bin_tail_e(<<3:2,1:1,1:5,42:64>>), error = bin_tail_e(<<>>), + + MD5 = erlang:md5(<<42>>), + <<"abc">> = bin_tail_f(<>, MD5, 3), + error = bin_tail_f(<>, MD5, 999), + {'EXIT',{_,_}} = (catch bin_tail_f(<<0:16/unit:8>>, MD5, 0)), + ok. bin_tail_c(Bin, Offset) -> @@ -304,6 +310,14 @@ bin_tail_e_var(Bin) -> <<2:2,_:1,1:5,Tail/binary>> -> Tail; _ -> error end. + +bin_tail_f(Bin, MD5, Size) -> + case Bin of + <> -> + Tail; + <> -> + error + end. save_restore(Config) when is_list(Config) -> 0 = save_restore_1(<<0:2,42:6>>), @@ -455,6 +469,15 @@ unit(Config) when is_list(Config) -> 127 = peek7(<<127:7>>), 100 = peek7(<<100:7,19:7>>), fc(peek7, [<<1,2>>], catch peek7(<<1,2>>)), + + 1 = unit_opt(1, -1), + 8 = unit_opt(8, -1), + + <<1:32,"abc">> = unit_opt_2(<<1:32,"abc">>), + <<"def">> = unit_opt_2(<<2:32,"def">>), + {'EXIT',_} = (catch unit_opt_2(<<1:32,33:7>>)), + {'EXIT',_} = (catch unit_opt_2(<<2:32,55:7>>)), + ok. peek1(<>) -> B. @@ -465,6 +488,27 @@ peek8(<>) -> B. peek16(<>) -> B. +unit_opt(U, X) -> + %% Cover type analysis in beam_ssa_type. + Bin = case U of + 1 -> <>; + 8 -> <> + end, + %% The type of Bin will be set to {binary,gcd(1, 8)}. + case Bin of + <<_/binary-unit:8>> -> 8; + <<_/binary-unit:1>> -> 1 + end. + +unit_opt_2(<> = Bin0) -> + Bin = if + St =:= 1 -> + Bin0; + St =:= 2 -> + <> + end, + id(Bin). + shared_sub_bins(Config) when is_list(Config) -> {15,[<<>>,<<5>>,<<4,5>>,<<3,4,5>>,<<2,3,4,5>>]} = sum(<<1,2,3,4,5>>, [], 0), ok. diff --git a/lib/compiler/test/compilation_SUITE.erl b/lib/compiler/test/compilation_SUITE.erl index 3ba3ce7cdf..def7a60f45 100644 --- a/lib/compiler/test/compilation_SUITE.erl +++ b/lib/compiler/test/compilation_SUITE.erl @@ -170,7 +170,7 @@ try_it(Module, Conf) -> atom_to_list(Module)), Out = proplists:get_value(priv_dir,Conf), io:format("Compiling: ~s\n", [Src]), - CompRc0 = compile:file(Src, [clint0,clint,{outdir,Out},report, + CompRc0 = compile:file(Src, [clint0,clint,ssalint,{outdir,Out},report, bin_opt_info|OtherOpts]), io:format("Result: ~p\n",[CompRc0]), {ok,_Mod} = CompRc0, @@ -189,7 +189,7 @@ try_it(Module, Conf) -> ct:timetrap(Timetrap), io:format("Compiling (with old inliner): ~s\n", [Src]), - CompRc2 = compile:file(Src, [clint, + CompRc2 = compile:file(Src, [clint,ssalint, {outdir,Out},report,bin_opt_info, {inline,1000}|OtherOpts]), io:format("Result: ~p\n",[CompRc2]), @@ -355,7 +355,7 @@ compile_compiler(Files, OutDir, Version, InlineOpts) -> io:format("~ts", [code:which(compile)]), io:format("Compiling ~s into ~ts", [Version,OutDir]), Opts = [report, - clint0,clint, + clint0,clint,ssalint, bin_opt_info, {outdir,OutDir}, {d,'COMPILER_VSN',"\""++Version++"\""}, diff --git a/lib/compiler/test/compile_SUITE.erl b/lib/compiler/test/compile_SUITE.erl index 8056982d8e..85f0b7dc46 100644 --- a/lib/compiler/test/compile_SUITE.erl +++ b/lib/compiler/test/compile_SUITE.erl @@ -385,6 +385,9 @@ do_file_listings(DataDir, PrivDir, [File|Files]) -> do_listing(Simple, TargetDir, dcbsm, ".core_bsm"), do_listing(Simple, TargetDir, dsetel, ".dsetel"), do_listing(Simple, TargetDir, dkern, ".kernel"), + do_listing(Simple, TargetDir, dssa, ".ssa"), + do_listing(Simple, TargetDir, dssaopt, ".ssaopt"), + do_listing(Simple, TargetDir, dprecg, ".precodegen"), do_listing(Simple, TargetDir, dcg, ".codegen"), do_listing(Simple, TargetDir, dblk, ".block"), do_listing(Simple, TargetDir, dexcept, ".except"), @@ -423,6 +426,9 @@ listings_big(Config) when is_list(Config) -> do_listing(Big, TargetDir, 'E'), do_listing(Big, TargetDir, 'P'), do_listing(Big, TargetDir, dkern, ".kernel"), + do_listing(Big, TargetDir, dssa, ".ssa"), + do_listing(Big, TargetDir, dssaopt, ".ssaopt"), + do_listing(Big, TargetDir, dprecg, ".precodegen"), do_listing(Big, TargetDir, to_dis, ".dis"), TargetNoext = filename:rootname(Target, code:objfile_extension()), @@ -919,7 +925,7 @@ do_core_pp_1(M, A, Outdir) -> ok = file:delete(CoreFile), %% Compile as usual (including optimizations). - compile_forms(M, Core, [clint,from_core,binary]), + compile_forms(M, Core, [clint,ssalint,from_core,binary]), %% Don't optimize to test that we are not dependent %% on the Core Erlang optmimization passes. @@ -928,7 +934,7 @@ do_core_pp_1(M, A, Outdir) -> %% records; if sys_core_fold was run it would fix %% that; if sys_core_fold was not run v3_kernel would %% crash.) - compile_forms(M, Core, [clint,from_core,no_copt,binary]), + compile_forms(M, Core, [clint,ssalint,from_core,no_copt,binary]), ok. @@ -1241,21 +1247,14 @@ do_opt_guards_fun([_|Is]) -> do_opt_guards_fun(Is); do_opt_guards_fun([]) -> []. -is_exception(bs_match_SUITE, {matching_and_andalso_2,2}) -> true; -is_exception(bs_match_SUITE, {matching_and_andalso_3,2}) -> true; is_exception(guard_SUITE, {'-complex_not/1-fun-4-',1}) -> true; is_exception(guard_SUITE, {'-complex_not/1-fun-5-',1}) -> true; -is_exception(guard_SUITE, {basic_andalso_orelse,1}) -> true; is_exception(guard_SUITE, {bad_guards,1}) -> true; is_exception(guard_SUITE, {bad_guards_2,2}) -> true; is_exception(guard_SUITE, {bad_guards_3,2}) -> true; -is_exception(guard_SUITE, {cqlc,4}) -> true; is_exception(guard_SUITE, {csemi7,3}) -> true; -is_exception(guard_SUITE, {misc,1}) -> true; is_exception(guard_SUITE, {nested_not_2b,4}) -> true; is_exception(guard_SUITE, {tricky_1,2}) -> true; -is_exception(map_SUITE, {map_guard_update,2}) -> true; -is_exception(map_SUITE, {map_guard_update_variables,3}) -> true; is_exception(_, _) -> false. sys_pre_attributes(Config) -> @@ -1477,18 +1476,18 @@ bc_options(Config) -> 101 = highest_opcode(DataDir, small_float, [no_get_hd_tl,no_line_info]), 103 = highest_opcode(DataDir, big, - [no_get_hd_tl,no_record_opt, + [no_get_hd_tl,no_ssa_opt_record, no_line_info,no_stack_trimming]), 125 = highest_opcode(DataDir, small_float, - [no_get_hd_tl,no_line_info,no_float_opt]), + [no_get_hd_tl,no_line_info,no_ssa_opt_float]), 132 = highest_opcode(DataDir, small, - [no_get_hd_tl,no_record_opt,no_float_opt,no_line_info]), + [no_get_hd_tl,no_ssa_opt_record,no_ssa_opt_float,no_line_info]), - 136 = highest_opcode(DataDir, big, [no_get_hd_tl,no_record_opt,no_line_info]), + 136 = highest_opcode(DataDir, big, [no_get_hd_tl,no_ssa_opt_record,no_line_info]), - 153 = highest_opcode(DataDir, big, [no_get_hd_tl,no_record_opt]), + 153 = highest_opcode(DataDir, big, [no_get_hd_tl,no_ssa_opt_record]), 153 = highest_opcode(DataDir, big, [r16]), 153 = highest_opcode(DataDir, big, [r17]), 153 = highest_opcode(DataDir, big, [r18]), diff --git a/lib/compiler/test/core_fold_SUITE.erl b/lib/compiler/test/core_fold_SUITE.erl index 30d145582a..68bc5c6e2f 100644 --- a/lib/compiler/test/core_fold_SUITE.erl +++ b/lib/compiler/test/core_fold_SUITE.erl @@ -373,7 +373,7 @@ unused_multiple_values_error(Config) when is_list(Config) -> PrivDir = proplists:get_value(priv_dir, Config), Dir = test_lib:get_data_dir(Config), Core = filename:join(Dir, "unused_multiple_values_error"), - Opts = [no_copt,clint,return,from_core,{outdir,PrivDir} + Opts = [no_copt,clint,ssalint,return,from_core,{outdir,PrivDir} |test_lib:opt_opts(?MODULE)], {error,[{unused_multiple_values_error, [{none,core_lint,{return_mismatch,{hello,1}}}]}], diff --git a/lib/compiler/test/guard_SUITE.erl b/lib/compiler/test/guard_SUITE.erl index 99dc06b525..73a8dc0fda 100644 --- a/lib/compiler/test/guard_SUITE.erl +++ b/lib/compiler/test/guard_SUITE.erl @@ -1788,6 +1788,11 @@ t_tuple_size(Config) when is_list(Config) -> 14 = Mod:t({1,2,3,4}), _ = code:delete(Mod), _ = code:purge(Mod), + + good_ip({1,2,3,4}), + good_ip({1,2,3,4,5,6,7,8}), + error = validate_ip({42,11}), + error = validate_ip(atom), ok. @@ -1805,6 +1810,16 @@ ludicrous_tuple_size(T) when tuple_size(T) =:= 16#FFFFFFFFFFFFFFFF -> ok; ludicrous_tuple_size(_) -> error. +good_ip(IP) -> + IP = validate_ip(IP). + +validate_ip(Value) when is_tuple(Value) andalso + ((size(Value) =:= 4) orelse (size(Value) =:= 8)) -> + %% size/1 (converted to tuple_size) used more than once. + Value; +validate_ip(_) -> + error. + %% %% The binary_part/2,3 guard BIFs %% diff --git a/lib/compiler/test/inline_SUITE.erl b/lib/compiler/test/inline_SUITE.erl index fdf2fe88b4..e03769012b 100644 --- a/lib/compiler/test/inline_SUITE.erl +++ b/lib/compiler/test/inline_SUITE.erl @@ -96,7 +96,8 @@ try_inline(Mod, Config) -> %% Normal compilation. io:format("Compiling: ~s\n", [Src]), - {ok,Mod} = compile:file(Src, [{outdir,Out},report,bin_opt_info,clint]), + {ok,Mod} = compile:file(Src, [{outdir,Out},report, + bin_opt_info,clint,ssalint]), ct:timetrap({minutes,10}), NormalResult = rpc:call(Node, ?MODULE, load_and_call, [Out,Mod]), @@ -104,7 +105,7 @@ try_inline(Mod, Config) -> %% Inlining. io:format("Compiling with old inliner: ~s\n", [Src]), {ok,Mod} = compile:file(Src, [{outdir,Out},report,bin_opt_info, - {inline,1000},clint]), + {inline,1000},clint,ssalint]), %% Run inlined code. ct:timetrap({minutes,10}), @@ -117,7 +118,7 @@ try_inline(Mod, Config) -> %% Inlining. io:format("Compiling with new inliner: ~s\n", [Src]), {ok,Mod} = compile:file(Src, [{outdir,Out},report, - bin_opt_info,inline,clint]), + bin_opt_info,inline,clint,ssalint]), %% Run inlined code. ct:timetrap({minutes,10}), @@ -351,7 +352,8 @@ otp_7223_2({a}) -> coverage(Config) when is_list(Config) -> Mod = bsdecode, Src = filename:join(proplists:get_value(data_dir, Config), Mod), - {ok,Mod,_} = compile:file(Src, [binary,report,{inline,0},clint]), + {ok,Mod,_} = compile:file(Src, [binary,report,{inline,0}, + clint,ssalint]), {ok,Mod,_} = compile:file(Src, [binary,report,{inline,20}, - verbose,clint]), + verbose,clint,ssalint]), ok. diff --git a/lib/compiler/test/misc_SUITE.erl b/lib/compiler/test/misc_SUITE.erl index 497cbad636..beae5eb618 100644 --- a/lib/compiler/test/misc_SUITE.erl +++ b/lib/compiler/test/misc_SUITE.erl @@ -171,7 +171,7 @@ silly_coverage(Config) when is_list(Config) -> expect_error(fun() -> sys_core_dsetel:module(BadCoreErlang, []) end), expect_error(fun() -> v3_kernel:module(BadCoreErlang, []) end), - %% v3_codegen + %% beam_kernel_to_ssa BadKernel = {k_mdef,[],?MODULE, [{foo,0}], [], @@ -179,7 +179,31 @@ silly_coverage(Config) when is_list(Config) -> {k,[],[],[]}, f,0,[], seriously_bad_body}]}, - expect_error(fun() -> v3_codegen:module(BadKernel, []) end), + expect_error(fun() -> beam_kernel_to_ssa:module(BadKernel, []) end), + + %% beam_ssa_lint + %% beam_ssa_recv + %% beam_ssa_pre_codegen + %% beam_ssa_opt + %% beam_ssa_codegen + BadSSA = {b_module,#{},a,b,c, + [{b_function,#{func_info=>{mod,foo,0}},args,bad_blocks,0}]}, + expect_error(fun() -> beam_ssa_lint:module(BadSSA, []) end), + expect_error(fun() -> beam_ssa_recv:module(BadSSA, []) end), + expect_error(fun() -> beam_ssa_pre_codegen:module(BadSSA, []) end), + expect_error(fun() -> beam_ssa_opt:module(BadSSA, []) end), + expect_error(fun() -> beam_ssa_codegen:module(BadSSA, []) end), + + %% beam_ssa_lint, beam_ssa_pp + {error,[{_,Errors}]} = beam_ssa_lint:module(bad_ssa_lint_input(), []), + _ = [io:put_chars(Mod:format_error(Reason)) || + {Mod,Reason} <- Errors], + + %% Cover printing of annotations in beam_ssa_pp + PPAnno = #{func_info=>{mod,foo,0},other_anno=>value,map_anno=>#{k=>v}}, + PPBlocks = #{0=>{b_blk,#{},[],{b_ret,#{},{b_literal,42}}}}, + PP = {b_function,PPAnno,[],PPBlocks,0}, + io:put_chars(beam_ssa_pp:format_function(PP)), %% beam_a BeamAInput = {?MODULE,[{foo,0}],[], @@ -268,6 +292,31 @@ silly_coverage(Config) when is_list(Config) -> ok. +bad_ssa_lint_input() -> + {b_module,#{},t, + [{foobar,1},{module_info,0},{module_info,1}], + [], + [{b_function, + #{func_info => {t,foobar,1},location => {"t.erl",4}}, + [{b_var,0}], + #{0 => {b_blk,#{},[],{b_ret,#{},{b_var,'@undefined_var'}}}}, + 3}, + {b_function, + #{func_info => {t,module_info,0}}, + [], + #{0 => + {b_blk,#{}, + [{b_set,#{}, + {b_var,{'@ssa_ret',3}}, + call, + [{b_remote, + {b_literal,erlang}, + {b_literal,get_module_info}, + 1}, + {b_var,'@unknown_variable'}]}], + {b_ret,#{},{b_var,{'@ssa_ret',3}}}}}, + 4}]}. + expect_error(Fun) -> try Fun() of Any -> diff --git a/lib/compiler/test/receive_SUITE_data/ref_opt/no_5.erl b/lib/compiler/test/receive_SUITE_data/ref_opt/no_5.erl new file mode 100644 index 0000000000..4fbde3a83d --- /dev/null +++ b/lib/compiler/test/receive_SUITE_data/ref_opt/no_5.erl @@ -0,0 +1,38 @@ +-module(no_5). +-compile([export_all,nowarn_export_all]). + +?MODULE() -> + ok. + +%% Nested receives were not handled properly. + +confusing_recv_mark(Pid) -> + Ref = make_ref(), + %% There would be a recv_mark here. + MRef = erlang:monitor(process, Pid), + receive + Ref -> + %% And a recv_set here. + receive + MRef -> gurka + end; + MRef -> + gaffel + end. + +%% The optimization could potentially be improved to +%% handle matching of multiple refs, like this: + +proper_recv_mark(Pid) -> + %% Place the recv_mark before the creation of both refs. + Ref = make_ref(), + MRef = erlang:monitor(process, Pid), + %% Place the recv_set here. + receive + Ref -> + receive + MRef -> gurka + end; + MRef -> + gaffel + end. diff --git a/lib/compiler/test/receive_SUITE_data/ref_opt/yes_14.S b/lib/compiler/test/receive_SUITE_data/ref_opt/yes_14.S deleted file mode 100644 index fd14228135..0000000000 --- a/lib/compiler/test/receive_SUITE_data/ref_opt/yes_14.S +++ /dev/null @@ -1,71 +0,0 @@ -{module, yes_14}. %% version = 0 - -{exports, [{f,2},{module_info,0},{module_info,1},{yes_14,0}]}. - -{attributes, []}. - -{labels, 12}. - - -{function, yes_14, 0, 2}. - {label,1}. - {func_info,{atom,yes_14},{atom,yes_14},0}. - {label,2}. - {move,{atom,ok},{x,0}}. - return. - - -{function, f, 2, 4}. - {label,3}. - {func_info,{atom,yes_14},{atom,f},2}. - {label,4}. - {allocate_heap,2,3,2}. - {move,{x,0},{y,1}}. - {put_tuple,2,{y,0}}. - {put,{atom,data}}. - {put,{x,1}}. - {call_ext,0,{extfunc,erlang,make_ref,0}}. % Ref in [x0] - {test_heap,4,1}. - {put_tuple,3,{x,1}}. - {put,{atom,request}}. - {put,{x,0}}. - {put,{y,0}}. - {move,{x,0},{y,0}}. % Ref in [x0,y0] - {move,{y,1},{x,0}}. % Ref in [y0] - {kill,{y,1}}. - send. - {move,{y,0},{x,0}}. % Ref in [x0,y0] - {move,{x,0},{y,1}}. % Ref in [x0,y0,y1] - {label,5}. - {loop_rec,{f,7},{x,0}}. % Ref in [y0,y1] - {test,is_tuple,{f,6},[{x,0}]}. - {test,test_arity,{f,6},[{x,0},2]}. - {get_tuple_element,{x,0},0,{x,1}}. - {get_tuple_element,{x,0},1,{x,2}}. - {test,is_eq_exact,{f,6},[{x,1},{atom,reply}]}. - {test,is_eq_exact,{f,6},[{x,2},{y,1}]}. - remove_message. - {move,{atom,ok},{x,0}}. - {deallocate,2}. - return. - {label,6}. - {loop_rec_end,{f,5}}. - {label,7}. - {wait,{f,5}}. - - -{function, module_info, 0, 9}. - {label,8}. - {func_info,{atom,yes_14},{atom,module_info},0}. - {label,9}. - {move,{atom,yes_14},{x,0}}. - {call_ext_only,1,{extfunc,erlang,get_module_info,1}}. - - -{function, module_info, 1, 11}. - {label,10}. - {func_info,{atom,yes_14},{atom,module_info},1}. - {label,11}. - {move,{x,0},{x,1}}. - {move,{atom,yes_14},{x,0}}. - {call_ext_only,2,{extfunc,erlang,get_module_info,2}}. diff --git a/lib/compiler/test/receive_SUITE_data/ref_opt/yes_14.erl b/lib/compiler/test/receive_SUITE_data/ref_opt/yes_14.erl new file mode 100644 index 0000000000..aa47c02af9 --- /dev/null +++ b/lib/compiler/test/receive_SUITE_data/ref_opt/yes_14.erl @@ -0,0 +1,27 @@ +-module(yes_14). +-compile(export_all). + +?MODULE() -> + ok. + +do_call(Process, Request) -> + Mref = erlang:monitor(process, Process), + Process ! Request, + Local = case node(Process) of + Node when Node =:= node() -> true; + _Node -> false + end, + id(Local), + receive + {X,Y,Z} when Mref =/= X, Z =:= 42, Mref =:= Y -> + error; + {X,Y,_} when Mref =/= X, Mref =:= Y -> + error; + {Mref, Reply} -> + erlang:demonitor(Mref, [flush]), + {ok, Reply}; + {'DOWN', Mref, _, _, _} -> + error + end. + +id(I) -> I. diff --git a/lib/compiler/test/test_lib.erl b/lib/compiler/test/test_lib.erl index 8954a9f5fb..c6baa611ec 100644 --- a/lib/compiler/test/test_lib.erl +++ b/lib/compiler/test/test_lib.erl @@ -72,11 +72,9 @@ opt_opts(Mod) -> {options,Opts} = lists:keyfind(options, 1, Comp), lists:filter(fun(no_copt) -> true; (no_postopt) -> true; - (no_float_opt) -> true; - (no_new_funs) -> true; - (no_new_binaries) -> true; - (no_new_apply) -> true; - (no_gc_bifs) -> true; + (no_ssa_opt) -> true; + (no_recv_opt) -> true; + (no_ssa_float) -> true; (no_stack_trimming) -> true; (debug_info) -> true; (inline) -> true; -- cgit v1.2.3 From 05d9c45e7295243f897f3e423ee4540d4c6d9bc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Thu, 16 Aug 2018 11:50:40 +0200 Subject: Remove retired compiler passes --- lib/compiler/src/Makefile | 1 - lib/compiler/src/beam_receive.erl | 416 ------ lib/compiler/src/beam_record.erl | 131 -- lib/compiler/src/beam_reorder.erl | 150 -- lib/compiler/src/beam_type.erl | 1117 -------------- lib/compiler/src/v3_codegen.erl | 2925 ------------------------------------- 6 files changed, 4740 deletions(-) delete mode 100644 lib/compiler/src/beam_receive.erl delete mode 100644 lib/compiler/src/beam_record.erl delete mode 100644 lib/compiler/src/beam_reorder.erl delete mode 100644 lib/compiler/src/beam_type.erl delete mode 100644 lib/compiler/src/v3_codegen.erl (limited to 'lib/compiler') diff --git a/lib/compiler/src/Makefile b/lib/compiler/src/Makefile index f036a2d8dc..bd35f20442 100644 --- a/lib/compiler/src/Makefile +++ b/lib/compiler/src/Makefile @@ -211,7 +211,6 @@ $(EBIN)/sys_core_dsetel.beam: core_parse.hrl $(EBIN)/sys_core_fold.beam: core_parse.hrl $(EBIN)/sys_core_fold_lists.beam: core_parse.hrl $(EBIN)/sys_core_inline.beam: core_parse.hrl -$(EBIN)/v3_codegen.beam: v3_kernel.hrl $(EBIN)/v3_core.beam: core_parse.hrl $(EBIN)/v3_kernel.beam: core_parse.hrl v3_kernel.hrl $(EBIN)/v3_kernel_pp.beam: v3_kernel.hrl diff --git a/lib/compiler/src/beam_receive.erl b/lib/compiler/src/beam_receive.erl deleted file mode 100644 index ddbe67605a..0000000000 --- a/lib/compiler/src/beam_receive.erl +++ /dev/null @@ -1,416 +0,0 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2010-2018. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. -%% -%% %CopyrightEnd% -%% - --module(beam_receive). --export([module/2]). --import(lists, [foldl/3,reverse/1,reverse/2]). - -%%% -%%% In code such as: -%%% -%%% Ref = make_ref(), %Or erlang:monitor(process, Pid) -%%% . -%%% . -%%% . -%%% receive -%%% {Ref,Reply} -> Reply -%%% end. -%%% -%%% we know that none of the messages that exist in the message queue -%%% before the call to make_ref/0 can be matched out in the receive -%%% statement. Therefore we can avoid going through the entire message -%%% queue if we introduce two new instructions (here written as -%%% BIFs in pseudo-Erlang): -%%% -%%% recv_mark(SomeUniqInteger), -%%% Ref = make_ref(), -%%% . -%%% . -%%% . -%%% recv_set(SomeUniqInteger), -%%% receive -%%% {Ref,Reply} -> Reply -%%% end. -%%% -%%% The recv_mark/1 instruction will save the current position and -%%% SomeUniqInteger in the process context. The recv_set -%%% instruction will verify that SomeUniqInteger is still stored -%%% in the process context. If it is, it will set the current pointer -%%% for the message queue (the next message to be read out) to the -%%% position that was saved by recv_mark/1. -%%% -%%% The remove_message instruction must be modified to invalidate -%%% the information stored by the previous recv_mark/1, in case there -%%% is another receive executed between the calls to recv_mark/1 and -%%% recv_set/1. -%%% -%%% We use a reference to a label (i.e. a position in the loaded code) -%%% as the SomeUniqInteger. -%%% - --spec module(beam_utils:module_code(), [compile:option()]) -> - {'ok',beam_utils:module_code()}. - -module({Mod,Exp,Attr,Fs0,Lc}, _Opts) -> - Fs = [function(F) || F <- Fs0], - Code = {Mod,Exp,Attr,Fs,Lc}, - {ok,Code}. - -%%% -%%% Local functions. -%%% - -function({function,Name,Arity,Entry,Is}) -> - try - D = beam_utils:index_labels(Is), - {function,Name,Arity,Entry,opt(Is, D, [])} - catch - Class:Error:Stack -> - io:fwrite("Function: ~w/~w\n", [Name,Arity]), - erlang:raise(Class, Error, Stack) - end. - -opt([{call_ext,A,{extfunc,erlang,spawn_monitor,A}}=I0|Is0], D, Acc) - when A =:= 1; A =:= 3 -> - case ref_in_tuple(Is0) of - no -> - opt(Is0, D, [I0|Acc]); - {yes,Regs,Is1,MatchReversed} -> - %% The call creates a brand new reference. Now - %% search for a receive statement in the same - %% function that will match against the reference. - case opt_recv(Is1, Regs, D) of - no -> - opt(Is0, D, [I0|Acc]); - {yes,Is,Lbl} -> - opt(Is, D, MatchReversed++[I0,{recv_mark,{f,Lbl}}|Acc]) - end - end; -opt([{call_ext,Arity,{extfunc,erlang,Name,Arity}}=I|Is0], D, Acc) -> - case creates_new_ref(Name, Arity) of - true -> - %% The call creates a brand new reference. Now - %% search for a receive statement in the same - %% function that will match against the reference. - case opt_recv(Is0, regs_init_x0(), D) of - no -> - opt(Is0, D, [I|Acc]); - {yes,Is,Lbl} -> - opt(Is, D, [I,{recv_mark,{f,Lbl}}|Acc]) - end; - false -> - opt(Is0, D, [I|Acc]) - end; -opt([I|Is], D, Acc) -> - opt(Is, D, [I|Acc]); -opt([], _, Acc) -> - reverse(Acc). - -ref_in_tuple([{test,is_tuple,_,[{x,0}]}=I1, - {test,test_arity,_,[{x,0},2]}=I2, - {block,[{set,[_],[{x,0}],{get_tuple_element,0}}, - {set,[Dst],[{x,0}],{get_tuple_element,1}}|Bl]}=I3|Is]) -> - ref_in_tuple_1(Bl, Dst, Is, [I3,I2,I1]); -ref_in_tuple([{test,is_tuple,_,[{x,0}]}=I1, - {test,test_arity,_,[{x,0},2]}=I2, - {block,[{set,[Dst],[{x,0}],{get_tuple_element,1}}|Bl]}=I3|Is]) -> - ref_in_tuple_1(Bl, Dst, Is, [I3,I2,I1]); -ref_in_tuple(_) -> no. - -ref_in_tuple_1(Bl, Dst, Is, MatchReversed) -> - Regs0 = regs_init_singleton(Dst), - Regs = opt_update_regs_bl(Bl, Regs0), - {yes,Regs,Is,MatchReversed}. - -%% creates_new_ref(Name, Arity) -> true|false. -%% Return 'true' if the BIF Name/Arity will create a new reference. -creates_new_ref(monitor, 2) -> true; -creates_new_ref(make_ref, 0) -> true; -creates_new_ref(_, _) -> false. - -%% opt_recv([Instruction], Regs, LabelIndex) -> no|{yes,[Instruction]} -%% Search for a receive statement that will only retrieve messages -%% that contain the newly created reference (which is currently in {x,0}). -opt_recv(Is, Regs, D) -> - L = gb_sets:empty(), - opt_recv(Is, D, Regs, L, []). - -opt_recv([{label,L}=Lbl,{loop_rec,{f,Fail},_}=Loop|Is], D, R0, _, Acc) -> - R = regs_kill_not_live(0, R0), - case regs_empty(R) of - false -> - %% We now have the new reference in Y registers - %% and the current instruction is the beginning of a - %% receive statement. We must now verify that only messages - %% that contain the reference will be matched. - case opt_ref_used(Is, R, Fail, D) of - false -> - no; - true -> - RecvSet = {recv_set,{f,L}}, - {yes,reverse(Acc, [RecvSet,Lbl,Loop|Is]),L} - end; - true -> - no - end; -opt_recv([I|Is], D, R0, L0, Acc) -> - {R,L} = opt_update_regs(I, R0, L0), - case regs_empty(R) of - true -> - %% The reference is no longer alive. There is no - %% point in continuing the search. - no; - false -> - opt_recv(Is, D, R, L, [I|Acc]) - end; -opt_recv([], _, _, _, _) -> no. - -opt_update_regs({block,Bl}, R, L) -> - {opt_update_regs_bl(Bl, R),L}; -opt_update_regs({call,_,_}, R, L) -> - {regs_kill_not_live(0, R),L}; -opt_update_regs({call_ext,_,_}, R, L) -> - {regs_kill_not_live(0, R),L}; -opt_update_regs({call_fun,_}, R, L) -> - {regs_kill_not_live(0, R),L}; -opt_update_regs({kill,Y}, R, L) -> - {regs_kill([Y], R),L}; -opt_update_regs({'catch',_,{f,Lbl}}, R, L) -> - {R,gb_sets:add(Lbl, L)}; -opt_update_regs({catch_end,_}, R, L) -> - {R,L}; -opt_update_regs({label,Lbl}, R, L) -> - case gb_sets:is_member(Lbl, L) of - false -> - %% We can't allow arbitrary labels (since the receive - %% could be entered without first creating the reference). - {regs_init(),L}; - true -> - %% A catch label for a previously seen catch instruction is OK. - {R,L} - end; -opt_update_regs({'try',_,{f,Lbl}}, R, L) -> - {R,gb_sets:add(Lbl, L)}; -opt_update_regs({try_end,_}, R, L) -> - {R,L}; -opt_update_regs({line,_}, R, L) -> - {R,L}; -opt_update_regs(_I, _R, L) -> - %% Unrecognized instruction. Abort the search. - {regs_init(),L}. - -opt_update_regs_bl([{set,Ds,_,{alloc,Live,_}}|Is], Regs0) -> - Regs1 = regs_kill_not_live(Live, Regs0), - Regs = regs_kill(Ds, Regs1), - opt_update_regs_bl(Is, Regs); -opt_update_regs_bl([{set,[Dst]=Ds,[Src],move}|Is], Regs0) -> - Regs1 = regs_kill(Ds, Regs0), - Regs = case regs_is_member(Src, Regs1) of - false -> Regs1; - true -> regs_add(Dst, Regs1) - end, - opt_update_regs_bl(Is, Regs); -opt_update_regs_bl([{set,Ds,_,_}|Is], Regs0) -> - Regs = regs_kill(Ds, Regs0), - opt_update_regs_bl(Is, Regs); -opt_update_regs_bl([], Regs) -> Regs. - -%% opt_ref_used([Instruction], RefRegs, FailLabel, LabelIndex) -> true|false -%% Return 'true' if it is certain that only messages that contain the same -%% reference as in RefRegs can be matched out. Otherwise return 'false'. -%% -%% Basically, we follow all possible paths through the receive statement. -%% If all paths are safe, we return 'true'. -%% -%% A branch to FailLabel is safe, because it exits the receive statement -%% and no further message may be matched out. -%% -%% If a path hits an comparision between RefRegs and part of the message, -%% that path is safe (any messages that may be matched further down the -%% path is guaranteed to contain the reference). -%% -%% Otherwise, if we hit a 'remove_message' instruction, we give up -%% and return 'false' (the optimization is definitely unsafe). If -%% we hit an unrecognized instruction, we also give up and return -%% 'false' (the optimization may be unsafe). - -opt_ref_used(Is, RefRegs, Fail, D) -> - Done = gb_sets:singleton(Fail), - Regs = regs_init_x0(), - try - _ = opt_ref_used_1(Is, RefRegs, D, Done, Regs), - true - catch - throw:not_used -> - false - end. - -%% This functions only returns if all paths through the receive -%% statement are safe, and throws an 'not_used' term otherwise. -opt_ref_used_1([{block,Bl}|Is], RefRegs, D, Done, Regs0) -> - Regs = opt_ref_used_bl(Bl, Regs0), - opt_ref_used_1(Is, RefRegs, D, Done, Regs); -opt_ref_used_1([{test,is_eq_exact,{f,Fail},Args}|Is], - RefRegs, D, Done0, Regs) -> - Done = opt_ref_used_at(Fail, RefRegs, D, Done0, Regs), - case is_ref_msg_comparison(Args, RefRegs, Regs) of - false -> - opt_ref_used_1(Is, RefRegs, D, Done, Regs); - true -> - %% The instructions that follow (Is) can only be executed - %% if the message contains the same reference as in RefRegs. - Done - end; -opt_ref_used_1([{test,is_ne_exact,{f,Fail},Args}|Is], - RefRegs, D, Done0, Regs) -> - Done = opt_ref_used_1(Is, RefRegs, D, Done0, Regs), - case is_ref_msg_comparison(Args, RefRegs, Regs) of - false -> - opt_ref_used_at(Fail, RefRegs, D, Done, Regs); - true -> - Done - end; -opt_ref_used_1([{test,_,{f,Fail},_}|Is], RefRegs, D, Done0, Regs) -> - Done = opt_ref_used_at(Fail, RefRegs, D, Done0, Regs), - opt_ref_used_1(Is, RefRegs, D, Done, Regs); -opt_ref_used_1([{select,_,_,{f,Fail},List}|_], RefRegs, D, Done, Regs) -> - Lbls = [F || {f,F} <- List] ++ [Fail], - opt_ref_used_in_all(Lbls, RefRegs, D, Done, Regs); -opt_ref_used_1([{label,Lbl}|Is], RefRegs, D, Done, Regs) -> - case gb_sets:is_member(Lbl, Done) of - true -> Done; - false -> opt_ref_used_1(Is, RefRegs, D, Done, Regs) - end; -opt_ref_used_1([{loop_rec_end,_}|_], _, _, Done, _) -> - Done; -opt_ref_used_1([_I|_], _RefReg, _D, _Done, _Regs) -> - %% The optimization may be unsafe. - throw(not_used). - -%% is_ref_msg_comparison(Args, RefRegs, RegisterSet) -> true|false. -%% Return 'true' if Args denotes a comparison between the -%% reference and message or part of the message. -is_ref_msg_comparison([R1,R2], RefRegs, Regs) -> - (regs_is_member(R2, RefRegs) andalso regs_is_member(R1, Regs)) orelse - (regs_is_member(R1, RefRegs) andalso regs_is_member(R2, Regs)). - -opt_ref_used_in_all([L|Ls], RefRegs, D, Done0, Regs) -> - Done = opt_ref_used_at(L, RefRegs, D, Done0, Regs), - opt_ref_used_in_all(Ls, RefRegs, D, Done, Regs); -opt_ref_used_in_all([], _, _, Done, _) -> Done. - -opt_ref_used_at(Fail, RefRegs, D, Done0, Regs) -> - case gb_sets:is_member(Fail, Done0) of - true -> - Done0; - false -> - Is = beam_utils:code_at(Fail, D), - Done = opt_ref_used_1(Is, RefRegs, D, Done0, Regs), - gb_sets:add(Fail, Done) - end. - -opt_ref_used_bl([{set,[],[],remove_message}|_], _) -> - %% We have proved that a message that does not depend on the - %% reference can be matched out. - throw(not_used); -opt_ref_used_bl([{set,Ds,Ss,_}|Is], Regs0) -> - case regs_all_members(Ss, Regs0) of - false -> - %% The destination registers may be assigned values that - %% are not dependent on the message being matched. - Regs = regs_kill(Ds, Regs0), - opt_ref_used_bl(Is, Regs); - true -> - %% All the sources depend on the message directly or - %% indirectly. - Regs = regs_add_list(Ds, Regs0), - opt_ref_used_bl(Is, Regs) - end; -opt_ref_used_bl([], Regs) -> Regs. - -%%% -%%% Functions for keeping track of a set of registers. -%%% - -%% regs_init() -> RegisterSet -%% Return an empty set of registers. - -regs_init() -> - {0,0}. - -%% regs_init_singleton(Register) -> RegisterSet -%% Return a set that only contains one register. - -regs_init_singleton(Reg) -> - regs_add(Reg, regs_init()). - -%% regs_init_x0() -> RegisterSet -%% Return a set that only contains the {x,0} register. - -regs_init_x0() -> - {1 bsl 0,0}. - -%% regs_empty(Register) -> true|false -%% Test whether the register set is empty. - -regs_empty(R) -> - R =:= {0,0}. - -%% regs_kill_not_live(Live, RegisterSet) -> RegisterSet' -%% Kill all registers indicated not live by Live. - -regs_kill_not_live(Live, {Xregs,Yregs}) -> - {Xregs band ((1 bsl Live)-1),Yregs}. - -%% regs_kill([Register], RegisterSet) -> RegisterSet' -%% Kill all registers mentioned in the list of registers. - -regs_kill([{x,N}|Rs], {Xregs,Yregs}) -> - regs_kill(Rs, {Xregs band (bnot (1 bsl N)),Yregs}); -regs_kill([{y,N}|Rs], {Xregs,Yregs}) -> - regs_kill(Rs, {Xregs,Yregs band (bnot (1 bsl N))}); -regs_kill([{fr,_}|Rs], Regs) -> - regs_kill(Rs, Regs); -regs_kill([], Regs) -> Regs. - -regs_add_list(List, Regs) -> - foldl(fun(R, A) -> regs_add(R, A) end, Regs, List). - -%% regs_add(Register, RegisterSet) -> RegisterSet' -%% Add a new register to the set of registers. - -regs_add({x,N}, {Xregs,Yregs}) -> - {Xregs bor (1 bsl N),Yregs}; -regs_add({y,N}, {Xregs,Yregs}) -> - {Xregs,Yregs bor (1 bsl N)}. - -%% regs_all_members([Register], RegisterSet) -> true|false -%% Test whether all of the registers are part of the register set. - -regs_all_members([R|Rs], Regs) -> - regs_is_member(R, Regs) andalso regs_all_members(Rs, Regs); -regs_all_members([], _) -> true. - -%% regs_is_member(Register, RegisterSet) -> true|false -%% Test whether Register is part of the register set. - -regs_is_member({x,N}, {Regs,_}) -> Regs band (1 bsl N) =/= 0; -regs_is_member({y,N}, {_,Regs}) -> Regs band (1 bsl N) =/= 0; -regs_is_member(_, _) -> false. diff --git a/lib/compiler/src/beam_record.erl b/lib/compiler/src/beam_record.erl deleted file mode 100644 index 58a6de6775..0000000000 --- a/lib/compiler/src/beam_record.erl +++ /dev/null @@ -1,131 +0,0 @@ -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2014-2017. All Rights Reserved. -%% -%% The contents of this file are subject to the Erlang Public License, -%% Version 1.1, (the "License"); you may not use this file except in -%% compliance with the License. You should have received a copy of the -%% Erlang Public License along with this software. If not, it can be -%% retrieved online at http://www.erlang.org/. -%% -%% Software distributed under the License is distributed on an "AS IS" -%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See -%% the License for the specific language governing rights and limitations -%% under the License. -%% -%% %CopyrightEnd% -%% - -%% Rewrite the instruction stream on tagged tuple tests. -%% Tagged tuples means a tuple of any arity with an atom as its -%% first element, such as records and error tuples. -%% -%% From: -%% ... -%% {test,is_tuple,Fail,[Src]}. -%% {test,test_arity,Fail,[Src,Sz]}. -%% ... -%% {get_tuple_element,Src,0,Dst}. -%% ... -%% {test,is_eq_exact,Fail,[Dst,Atom]}. -%% ... -%% To: -%% ... -%% {test,is_tagged_tuple,Fail,[Src,Sz,Atom]}. -%% ... -%% - --module(beam_record). --export([module/2]). - --import(lists, [reverse/1,reverse/2]). - --spec module(beam_utils:module_code(), [compile:option()]) -> - {'ok',beam_utils:module_code()}. - -module({Mod,Exp,Attr,Fs0,Lc}, _Opt) -> - Fs = [function(F) || F <- Fs0], - {ok,{Mod,Exp,Attr,Fs,Lc}}. - -function({function,Name,Arity,CLabel,Is0}) -> - try - Is1 = beam_utils:anno_defs(Is0), - Idx = beam_utils:index_labels(Is1), - Is = rewrite(reverse(Is1), Idx), - {function,Name,Arity,CLabel,Is} - catch - Class:Error:Stack -> - io:fwrite("Function: ~w/~w\n", [Name,Arity]), - erlang:raise(Class, Error, Stack) - end. - -rewrite(Is, Idx) -> - rewrite(Is, Idx, 0, []). - -rewrite([{test,test_arity,Fail,[Src,N]}=TA, - {test,is_tuple,Fail,[Src]}=TT|Is], Idx, Def, Acc0) -> - case is_tagged_tuple(Acc0, Def, Fail, Src, Idx) of - no -> - rewrite(Is, Idx, 0, [TT,TA|Acc0]); - {yes,Atom,Acc} -> - I = {test,is_tagged_tuple,Fail,[Src,N,Atom]}, - rewrite(Is, Idx, Def, [I|Acc]) - end; -rewrite([{block,[{'%anno',{def,Def}}|Bl]}|Is], Idx, _Def, Acc) -> - rewrite(Is, Idx, Def, [{block,Bl}|Acc]); -rewrite([{label,L}=I|Is], Idx0, Def, Acc) -> - Idx = beam_utils:index_label(L, Acc, Idx0), - rewrite(Is, Idx, Def, [I|Acc]); -rewrite([I|Is], Idx, Def, Acc) -> - rewrite(Is, Idx, Def, [I|Acc]); -rewrite([], _, _, Acc) -> Acc. - -is_tagged_tuple([{block,Bl}, - {test,is_eq_exact,Fail,[Dst,{atom,_}=Atom]}|Is], - Def, Fail, Src, Idx) -> - case is_tagged_tuple_1(Bl, Is, Fail, Src, Dst, Idx, Def, []) of - no -> - no; - {yes,[]} -> - {yes,Atom,Is}; - {yes,[_|_]=Block} -> - {yes,Atom,[{block,Block}|Is]} - end; -is_tagged_tuple(_, _, _, _, _) -> - no. - -is_tagged_tuple_1([{set,[Dst],[Src],{get_tuple_element,0}}=I|Bl], - Is, Fail, Src, Dst, Idx, Def, Acc) -> - %% Check usage of Dst to find out whether the get_tuple_element - %% is needed. - case usage(Dst, Is, Fail, Idx) of - killed -> - %% Safe to remove the get_tuple_element instruction. - {yes,reverse(Acc, Bl)}; - used -> - %% Actively used. Must keep instruction. - {yes,reverse(Acc, [I|Bl])}; - not_used -> - %% Not actually used (but must be initialized). - case is_defined(Dst, Def) of - false -> - %% Dst must be initialized, but the - %% actual value does not matter. - Kill = {set,[Dst],[nil],move}, - {yes,reverse(Acc, [Kill|Bl])}; - true -> - %% The register is previously initialized. - %% We can remove the instruction. - {yes,reverse(Acc, Bl)} - end - end; -is_tagged_tuple_1([I|Bl], Is, Fail, Src, Dst, Idx, Def, Acc) -> - is_tagged_tuple_1(Bl, Is, Fail, Src, Dst, Idx, Def, [I|Acc]); -is_tagged_tuple_1(_, _, _, _, _, _, _, _) -> - no. - -usage(Dst, Is, Fail, Idx) -> - beam_utils:usage(Dst, [{test,is_number,Fail,[nil]}|Is], Idx). - -is_defined({x,X}, Def) -> - (Def bsr X) band 1 =:= 1. diff --git a/lib/compiler/src/beam_reorder.erl b/lib/compiler/src/beam_reorder.erl deleted file mode 100644 index 8d2ef5a431..0000000000 --- a/lib/compiler/src/beam_reorder.erl +++ /dev/null @@ -1,150 +0,0 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 1999-2018. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. -%% -%% %CopyrightEnd% -%% - --module(beam_reorder). - --export([module/2]). --import(lists, [member/2,reverse/1]). - --spec module(beam_utils:module_code(), [compile:option()]) -> - {'ok',beam_utils:module_code()}. - -module({Mod,Exp,Attr,Fs0,Lc}, _Opt) -> - Fs = [function(F) || F <- Fs0], - {ok,{Mod,Exp,Attr,Fs,Lc}}. - -function({function,Name,Arity,CLabel,Is0}) -> - try - Is = reorder(Is0), - {function,Name,Arity,CLabel,Is} - catch - Class:Error:Stack -> - io:fwrite("Function: ~w/~w\n", [Name,Arity]), - erlang:raise(Class, Error, Stack) - end. - -%% reorder(Instructions0) -> Instructions -%% Reorder instructions before the beam_block pass, because reordering -%% will be more cumbersome when the blocks are in place. -%% -%% Execution of get_tuple_element instructions can be delayed until -%% they are actually needed. Consider the sequence: -%% -%% get_tuple_element Tuple Pos Dst -%% test Test Fail Operands -%% -%% If Dst is killed at label Fail (and not referenced in Operands), -%% we can can swap the instructions: -%% -%% test Test Fail Operands -%% get_tuple_element Tuple Pos Dst -%% -%% That can be beneficial in two ways: Firstly, if the branch is taken -%% we have avoided execution of the get_tuple_element instruction. -%% Secondly, even if the branch is not taken, subsequent optimization -%% (opt_blocks/1) may be able to change Dst to the final destination -%% register and eliminate a 'move' instruction. - -reorder(Is) -> - D = beam_utils:index_labels(Is), - reorder_1(Is, D, []). - -reorder_1([{Op,_,_}=TryCatch|[I|Is]=Is0], D, Acc) - when Op =:= 'catch'; Op =:= 'try' -> - %% Don't allow 'try' or 'catch' instructions to split blocks if - %% it can be avoided. - case is_safe(I) of - false -> - reorder_1(Is0, D, [TryCatch|Acc]); - true -> - reorder_1([TryCatch|Is], D, [I|Acc]) - end; -reorder_1([{label,L}=I|_], D, Acc) -> - Is = beam_utils:code_at(L, D), - reorder_1(Is, D, [I|Acc]); -reorder_1([{test,is_nonempty_list,_,_}=I|Is], D, Acc) -> - %% The run-time system may combine the is_nonempty_list test with - %% the following get_list instruction. - reorder_1(Is, D, [I|Acc]); -reorder_1([{test,_,_,_}=I, - {select,_,_,_,_}=S|Is], D, Acc) -> - %% There is nothing to gain by inserting a get_tuple_element - %% instruction between the test instruction and the select - %% instruction. - reorder_1(Is, D, [S,I|Acc]); -reorder_1([{test,_,{f,_},[Src|_]}=I|Is], D, - [{get_tuple_element,Src,_,_}|_]=Acc) -> - %% We want to avoid code that can confuse beam_validator such as: - %% is_tuple Fail Src - %% test_arity Fail Src Arity - %% is_map Fail Src - %% get_tuple_element Src Pos Dst - %% Therefore, don't reorder the instructions in such cases. - reorder_1(Is, D, [I|Acc]); -reorder_1([{test,_,{f,L},Ss}=I|Is0], D0, - [{get_tuple_element,_,_,El}=G|Acc0]=Acc) -> - case member(El, Ss) of - true -> - reorder_1(Is0, D0, [I|Acc]); - false -> - case beam_utils:is_killed_at(El, L, D0) of - true -> - Is = [I,G|Is0], - reorder_1(Is, D0, Acc0); - false -> - case beam_utils:is_killed(El, Is0, D0) of - true -> - Code0 = beam_utils:code_at(L, D0), - Code = [G|Code0], - D = beam_utils:index_label(L, Code, D0), - Is = [I|Is0], - reorder_1(Is, D, Acc0); - false -> - reorder_1(Is0, D0, [I|Acc]) - end - end - end; -reorder_1([{allocate_zero,N,Live}=I0|Is], D, - [{get_tuple_element,{x,Tup},_,{x,Dst}}=G|Acc]=Acc0) -> - case Tup < Dst andalso Dst+1 =:= Live of - true -> - %% Move allocation instruction upwards past - %% get_tuple_element instructions to create more - %% opportunities for moving get_tuple_element - %% instructions. - I = {allocate_zero,N,Dst}, - reorder_1([I,G|Is], D, Acc); - false -> - reorder_1(Is, D, [I0|Acc0]) - end; -reorder_1([I|Is], D, Acc) -> - reorder_1(Is, D, [I|Acc]); -reorder_1([], _, Acc) -> reverse(Acc). - -%% is_safe(Instruction) -> true|false -%% Test whether an instruction is safe (cannot cause an exception). - -is_safe({kill,_}) -> true; -is_safe({move,_,_}) -> true; -is_safe({put,_}) -> true; -is_safe({put_list,_,_,_}) -> true; -is_safe({put_tuple,_,_}) -> true; -is_safe({test_heap,_,_}) -> true; -is_safe(_) -> false. diff --git a/lib/compiler/src/beam_type.erl b/lib/compiler/src/beam_type.erl deleted file mode 100644 index b5c979e529..0000000000 --- a/lib/compiler/src/beam_type.erl +++ /dev/null @@ -1,1117 +0,0 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 1999-2018. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. -%% -%% %CopyrightEnd% -%% -%% Purpose: Type-based optimisations. See the comment for verified_type/1 -%% the very end of this file for a description of the types in the -%% type database. - --module(beam_type). - --export([module/2]). - --import(lists, [foldl/3,member/2,reverse/1,reverse/2,sort/1]). - --define(UNICODE_INT, {integer,{0,16#10FFFF}}). - --spec module(beam_utils:module_code(), [compile:option()]) -> - {'ok',beam_utils:module_code()}. - -module({Mod,Exp,Attr,Fs0,Lc}, _Opts) -> - Fs = [function(F) || F <- Fs0], - {ok,{Mod,Exp,Attr,Fs,Lc}}. - -function({function,Name,Arity,CLabel,Asm0}) -> - try - Asm1 = beam_utils:live_opt(Asm0), - Asm2 = opt(Asm1, [], tdb_new()), - Asm3 = beam_utils:live_opt(Asm2), - Asm = beam_utils:delete_annos(Asm3), - {function,Name,Arity,CLabel,Asm} - catch - Class:Error:Stack -> - io:fwrite("Function: ~w/~w\n", [Name,Arity]), - erlang:raise(Class, Error, Stack) - end. - -%% opt([Instruction], Accumulator, TypeDb) -> {[Instruction'],TypeDb'} -%% Keep track of type information; try to simplify. - -opt([{block,Body1}|Is], [{block,Body0}|Acc], Ts0) -> - {Body2,Ts} = simplify(Body1, Ts0), - Body = merge_blocks(Body0, Body2), - opt(Is, [{block,Body}|Acc], Ts); -opt([{block,Body0}|Is], Acc, Ts0) -> - {Body,Ts} = simplify(Body0, Ts0), - opt(Is, [{block,Body}|Acc], Ts); -opt([I0|Is], Acc, Ts0) -> - case simplify_basic([I0], Ts0) of - {[],Ts} -> opt(Is, Acc, Ts); - {[I],Ts} -> opt(Is, [I|Acc], Ts) - end; -opt([], Acc, _) -> reverse(Acc). - -%% simplify(Instruction, TypeDb) -> NewInstruction -%% Simplify an instruction using type information (this is -%% technically a "strength reduction"). - -simplify(Is0, TypeDb0) -> - {Is,_} = BasicRes = simplify_basic(Is0, TypeDb0), - case simplify_float(Is, TypeDb0) of - not_possible -> BasicRes; - {_,_}=Res -> Res - end. - -%% simplify_basic([Instruction], TypeDatabase) -> {[Instruction],TypeDatabase'} -%% Basic simplification, mostly tuples, no floating point optimizations. - -simplify_basic(Is, Ts) -> - simplify_basic(Is, Ts, []). - -simplify_basic([I0|Is], Ts0, Acc) -> - case simplify_instr(I0, Ts0) of - [] -> - simplify_basic(Is, Ts0, Acc); - [I] -> - Ts = update(I, Ts0), - simplify_basic(Is, Ts, [I|Acc]) - end; -simplify_basic([], Ts, Acc) -> - {reverse(Acc),Ts}. - -%% simplify_instr(Instruction, Ts) -> [Instruction]. - -%% Simplify a simple instruction using type information. Return an -%% empty list if the instruction should be removed, or a list with -%% the original or modified instruction. - -simplify_instr({set,[D],[{integer,Index},Reg],{bif,element,_}}=I, Ts) -> - case max_tuple_size(Reg, Ts) of - Sz when 0 < Index, Index =< Sz -> - [{set,[D],[Reg],{get_tuple_element,Index-1}}]; - _ -> [I] - end; -simplify_instr({test,Test,Fail,[R]}=I, Ts) -> - case tdb_find(R, Ts) of - any -> - [I]; - Type -> - case will_succeed(Test, Type) of - yes -> []; - no -> [{jump,Fail}]; - maybe -> [I] - end - end; -simplify_instr({set,[D],[TupleReg],{get_tuple_element,0}}=I, Ts) -> - case tdb_find(TupleReg, Ts) of - {tuple,_,_,[Contents]} -> - [{set,[D],[Contents],move}]; - _ -> - [I] - end; -simplify_instr({test,test_arity,_,[R,Arity]}=I, Ts) -> - case tdb_find(R, Ts) of - {tuple,exact_size,Arity,_} -> []; - _ -> [I] - end; -simplify_instr({test,is_eq_exact,Fail,[R,{atom,A}=Atom]}=I, Ts) -> - case tdb_find(R, Ts) of - {atom,_}=Atom -> []; - boolean when is_boolean(A) -> [I]; - any -> [I]; - _ -> [{jump,Fail}] - end; -simplify_instr({test,is_record,_,[R,{atom,_}=Tag,{integer,Arity}]}=I, Ts) -> - case tdb_find(R, Ts) of - {tuple,exact_size,Arity,[Tag]} -> []; - _ -> [I] - end; -simplify_instr({select,select_val,Reg,_,_}=I, Ts) -> - [case tdb_find(Reg, Ts) of - {integer,Range} -> - simplify_select_val_int(I, Range); - boolean -> - simplify_select_val_bool(I); - _ -> - I - end]; -simplify_instr({test,bs_test_unit,_,[Src,Unit]}=I, Ts) -> - case tdb_find(Src, Ts) of - {binary,U} when U rem Unit =:= 0 -> []; - _ -> [I] - end; -simplify_instr(I, _) -> [I]. - -simplify_select_val_int({select,select_val,R,_,L0}=I, {Min,Max}) -> - Vs = sort([V || {integer,V} <- L0]), - case eq_ranges(Vs, Min, Max) of - false -> I; - true -> simplify_select_val_1(L0, {integer,Max}, R, []) - end. - -simplify_select_val_bool({select,select_val,R,_,L}=I) -> - Vs = sort([V || {atom,V} <- L]), - case Vs of - [false,true] -> - simplify_select_val_1(L, {atom,false}, R, []); - _ -> - I - end. - -simplify_select_val_1([Val,F|T], Val, R, Acc) -> - L = reverse(Acc, T), - {select,select_val,R,F,L}; -simplify_select_val_1([V,F|T], Val, R, Acc) -> - simplify_select_val_1(T, Val, R, [F,V|Acc]). - -eq_ranges([H], H, H) -> true; -eq_ranges([H|T], H, Max) -> eq_ranges(T, H+1, Max); -eq_ranges(_, _, _) -> false. - -%% will_succeed(TestOperation, Type) -> yes|no|maybe. -%% Test whether TestOperation applied to an argument of type Type -%% will succeed. Return yes, no, or maybe. -%% -%% Type is a type as described in the comment for verified_type/1 at -%% the very end of this file, but it will *never* be 'any'. - -will_succeed(is_atom, Type) -> - case Type of - {atom,_} -> yes; - boolean -> yes; - _ -> no - end; -will_succeed(is_binary, Type) -> - case Type of - {binary,U} when U rem 8 =:= 0 -> yes; - {binary,_} -> maybe; - _ -> no - end; -will_succeed(is_bitstr, Type) -> - case Type of - {binary,_} -> yes; - _ -> no - end; -will_succeed(is_integer, Type) -> - case Type of - integer -> yes; - {integer,_} -> yes; - _ -> no - end; -will_succeed(is_map, Type) -> - case Type of - map -> yes; - _ -> no - end; -will_succeed(is_nonempty_list, Type) -> - case Type of - nonempty_list -> yes; - _ -> no - end; -will_succeed(is_tuple, Type) -> - case Type of - {tuple,_,_,_} -> yes; - _ -> no - end; -will_succeed(_, _) -> maybe. - -%% simplify_float([Instruction], TypeDatabase) -> -%% {[Instruction],TypeDatabase'} | not_possible -%% Simplify floating point operations in blocks. -%% -simplify_float(Is0, Ts0) -> - {Is1,Ts} = simplify_float_1(Is0, Ts0, [], []), - Is2 = opt_fmoves(Is1, []), - Is3 = flt_need_heap(Is2), - try - {flt_liveness(Is3),Ts} - catch - throw:not_possible -> not_possible - end. - -simplify_float_1([{set,[],[],fclearerror}|Is], Ts, Rs, Acc) -> - simplify_float_1(Is, Ts, Rs, clearerror(Acc)); -simplify_float_1([{set,[],[],fcheckerror}|Is], Ts, Rs, Acc) -> - simplify_float_1(Is, Ts, Rs, checkerror(Acc)); -simplify_float_1([{set,[{fr,_}],_,_}=I|Is], Ts, Rs, Acc) -> - simplify_float_1(Is, Ts, Rs, [I|Acc]); -simplify_float_1([{set,[D0],[A0],{alloc,_,{gc_bif,'-',{f,0}}}}=I|Is]=Is0, - Ts0, Rs0, Acc0) -> - case tdb_find(A0, Ts0) of - float -> - A = coerce_to_float(A0), - {Rs1,Acc1} = load_reg(A, Ts0, Rs0, Acc0), - {D,Rs} = find_dest(D0, Rs1), - Areg = fetch_reg(A, Rs), - Acc = [{set,[D],[Areg],{bif,fnegate,{f,0}}}|clearerror(Acc1)], - Ts = tdb_store(D0, float, Ts0), - simplify_float_1(Is, Ts, Rs, Acc); - _Other -> - Ts = update(I, Ts0), - {Rs,Acc} = flush(Rs0, Is0, Acc0), - simplify_float_1(Is, Ts, Rs, [I|checkerror(Acc)]) - end; -simplify_float_1([{set,[D0],[A0,B0],{alloc,_,{gc_bif,Op0,{f,0}}}}=I|Is]=Is0, - Ts0, Rs0, Acc0) -> - case float_op(Op0, A0, B0, Ts0) of - no -> - Ts = update(I, Ts0), - {Rs,Acc} = flush(Rs0, Is0, Acc0), - simplify_float_1(Is, Ts, Rs, [I|checkerror(Acc)]); - {yes,Op} -> - A = coerce_to_float(A0), - B = coerce_to_float(B0), - {Rs1,Acc1} = load_reg(A, Ts0, Rs0, Acc0), - {Rs2,Acc2} = load_reg(B, Ts0, Rs1, Acc1), - {D,Rs} = find_dest(D0, Rs2), - Areg = fetch_reg(A, Rs), - Breg = fetch_reg(B, Rs), - Acc = [{set,[D],[Areg,Breg],{bif,Op,{f,0}}}|clearerror(Acc2)], - Ts = tdb_store(D0, float, Ts0), - simplify_float_1(Is, Ts, Rs, Acc) - end; -simplify_float_1([{set,_,_,{try_catch,_,_}}=I|Is]=Is0, _Ts, Rs0, Acc0) -> - Acc = flush_all(Rs0, Is0, Acc0), - simplify_float_1(Is, tdb_new(), Rs0, [I|Acc]); -simplify_float_1([{set,_,_,{line,_}}=I|Is], Ts, Rs, Acc) -> - simplify_float_1(Is, Ts, Rs, [I|Acc]); -simplify_float_1([I|Is], Ts0, [], Acc) -> - Ts = update(I, Ts0), - simplify_float_1(Is, Ts, [], [I|Acc]); -simplify_float_1([I|Is]=Is0, Ts0, Rs0, Acc0) -> - Ts = update(I, Ts0), - {Rs,Acc} = flush(Rs0, Is0, Acc0), - simplify_float_1(Is, Ts, Rs, [I|checkerror(Acc)]); -simplify_float_1([], Ts, [], Acc) -> - Is = reverse(Acc), - {Is,Ts}. - -coerce_to_float({integer,I}=Int) -> - try float(I) of - F -> - {float,F} - catch _:_ -> - %% Let the overflow happen at run-time. - Int - end; -coerce_to_float(Other) -> Other. - -opt_fmoves([{set,[{x,_}=R],[{fr,_}]=Src,fmove}=I1, - {set,[_]=Dst,[{x,_}=R],move}=I2|Is], Acc) -> - case beam_utils:is_killed_block(R, Is) of - false -> opt_fmoves(Is, [I2,I1|Acc]); - true -> opt_fmoves(Is, [{set,Dst,Src,fmove}|Acc]) - end; -opt_fmoves([I|Is], Acc) -> - opt_fmoves(Is, [I|Acc]); -opt_fmoves([], Acc) -> reverse(Acc). - -clearerror(Is) -> - clearerror(Is, Is). - -clearerror([{set,[],[],fclearerror}|_], OrigIs) -> OrigIs; -clearerror([{set,[],[],fcheckerror}|_], OrigIs) -> [{set,[],[],fclearerror}|OrigIs]; -clearerror([_|Is], OrigIs) -> clearerror(Is, OrigIs); -clearerror([], OrigIs) -> [{set,[],[],fclearerror}|OrigIs]. - -%% merge_blocks(Block1, Block2) -> Block. -%% Combine two blocks and eliminate any move instructions that assign -%% to registers that are killed later in the block. -%% -merge_blocks(B1, [{'%anno',_}|B2]) -> - merge_blocks_1(B1++[{set,[],[],stop_here}|B2]). - -merge_blocks_1([{set,[],_,stop_here}|Is]) -> Is; -merge_blocks_1([{set,[D],_,move}=I|Is]) -> - case beam_utils:is_killed_block(D, Is) of - true -> merge_blocks_1(Is); - false -> [I|merge_blocks_1(Is)] - end; -merge_blocks_1([I|Is]) -> [I|merge_blocks_1(Is)]. - -%% flt_need_heap([Instruction]) -> [Instruction] -%% Insert need heap allocation instructions in the instruction stream -%% to properly account for both inserted floating point operations and -%% normal term build operations (such as put_list/3). -%% -%% Ignore old heap allocation instructions (except if they allocate a stack -%% frame too), as they may be in the wrong place (because gc_bif instructions -%% could have been converted to floating point operations). - -flt_need_heap(Is) -> - flt_need_heap_1(reverse(Is), 0, 0, []). - -flt_need_heap_1([{set,[],[],{alloc,_,Alloc}}|Is], H, Fl, Acc) -> - case Alloc of - {_,nostack,_,_} -> - %% Remove any existing test_heap/2 instruction. - flt_need_heap_1(Is, H, Fl, Acc); - {Z,Stk,_,Inits} when is_integer(Stk) -> - %% Keep any allocate*/2 instruction and recalculate heap need. - I = {set,[],[],{alloc,regs,{Z,Stk,build_alloc(H, Fl),Inits}}}, - flt_need_heap_1(Is, 0, 0, [I|Acc]) - end; -flt_need_heap_1([I|Is], H0, Fl0, Acc) -> - {Ns,H1,Fl1} = flt_need_heap_2(I, H0, Fl0), - flt_need_heap_1(Is, H1, Fl1, [I|Ns]++Acc); -flt_need_heap_1([], H, Fl, Acc) -> - flt_alloc(H, Fl) ++ Acc. - -%% First come all instructions that build. We pass through, while we -%% add to the need for heap words and floats on the heap. -flt_need_heap_2({set,[_],[{fr,_}],fmove}, H, Fl) -> - {[],H,Fl+1}; -flt_need_heap_2({set,_,_,put_list}, H, Fl) -> - {[],H+2,Fl}; -flt_need_heap_2({set,_,_,{put_tuple,_}}, H, Fl) -> - {[],H+1,Fl}; -flt_need_heap_2({set,_,_,put}, H, Fl) -> - {[],H+1,Fl}; -%% The following instructions cause the insertion of an allocation -%% instruction if needed. -flt_need_heap_2({set,_,_,{alloc,_,_}}, H, Fl) -> - {flt_alloc(H, Fl),0,0}; -flt_need_heap_2({set,_,_,{set_tuple_element,_}}, H, Fl) -> - {flt_alloc(H, Fl),0,0}; -flt_need_heap_2({'%anno',_}, H, Fl) -> - {flt_alloc(H, Fl),0,0}; -%% All other instructions are "neutral". We just pass them. -flt_need_heap_2(_, H, Fl) -> - {[],H,Fl}. - -flt_alloc(0, 0) -> - []; -flt_alloc(H, 0) -> - [{set,[],[],{alloc,regs,{nozero,nostack,H,[]}}}]; -flt_alloc(H, F) -> - [{set,[],[],{alloc,regs,{nozero,nostack, - build_alloc(H, F),[]}}}]. - -build_alloc(Words, 0) -> Words; -build_alloc(Words, Floats) -> {alloc,[{words,Words},{floats,Floats}]}. - - -%% flt_liveness([Instruction]) -> [Instruction] -%% (Re)calculate the number of live registers for each heap allocation -%% function. We base liveness of the number of register map at the -%% beginning of the instruction sequence. -%% -%% A 'not_possible' term will be thrown if the set of live registers -%% is not continous at an allocation function (e.g. if {x,0} and {x,2} -%% are live, but not {x,1}). - -flt_liveness([{'%anno',{used,Regs}}=LiveInstr|Is]) -> - flt_liveness_1(Is, Regs, [LiveInstr]). - -flt_liveness_1([{set,Ds,Ss,{alloc,Live0,Alloc}}|Is], Regs0, Acc) -> - Live = min(Live0, live_regs(Regs0)), - I = {set,Ds,Ss,{alloc,Live,Alloc}}, - Regs1 = init_regs(Live), - Regs = x_live(Ds, Regs1), - flt_liveness_1(Is, Regs, [I|Acc]); -flt_liveness_1([{set,Ds,_,_}=I|Is], Regs0, Acc) -> - Regs = x_live(Ds, Regs0), - flt_liveness_1(Is, Regs, [I|Acc]); -flt_liveness_1([{'%anno',_}], _Regs, Acc) -> - reverse(Acc). - -init_regs(Live) -> - (1 bsl Live) - 1. - -live_regs(Regs) -> - live_regs_1(Regs, 0). - -live_regs_1(0, N) -> N; -live_regs_1(R, N) -> - case R band 1 of - 0 -> throw(not_possible); - 1 -> live_regs_1(R bsr 1, N+1) - end. - -x_live([{x,N}|Rs], Regs) -> x_live(Rs, Regs bor (1 bsl N)); -x_live([_|Rs], Regs) -> x_live(Rs, Regs); -x_live([], Regs) -> Regs. - -%% update(Instruction, TypeDb) -> NewTypeDb -%% Update the type database to account for executing an instruction. -%% -%% First the cases for instructions inside basic blocks. -update({'%anno',_}, Ts) -> - Ts; -update({set,[D],[S],move}, Ts) -> - tdb_copy(S, D, Ts); -update({set,[D],[Index,Reg],{bif,element,_}}, Ts0) -> - MinSize = case Index of - {integer,I} -> I; - _ -> 0 - end, - Ts = tdb_meet(Reg, {tuple,min_size,MinSize,[]}, Ts0), - tdb_store(D, any, Ts); -update({set,[D],[_Key,Map],{bif,map_get,_}}, Ts0) -> - Ts = tdb_meet(Map, map, Ts0), - tdb_store(D, any, Ts); -update({set,[D],Args,{bif,N,_}}, Ts) -> - Ar = length(Args), - BoolOp = erl_internal:new_type_test(N, Ar) orelse - erl_internal:comp_op(N, Ar) orelse - erl_internal:bool_op(N, Ar), - Type = case BoolOp of - true -> boolean; - false -> unary_op_type(N) - end, - tdb_store(D, Type, Ts); -update({set,[D],[S],{get_tuple_element,0}}, Ts0) -> - if - D =:= S -> - tdb_store(D, any, Ts0); - true -> - Ts = tdb_store(D, {tuple_element,S,0}, Ts0), - tdb_store(S, {tuple,min_size,1,[]}, Ts) - end; -update({set,[D],[S],{alloc,_,{gc_bif,float,{f,0}}}}, Ts0) -> - %% Make sure we reject non-numeric literal argument. - case possibly_numeric(S) of - true -> tdb_store(D, float, Ts0); - false -> Ts0 - end; -update({set,[D],[S1,S2],{alloc,_,{gc_bif,'band',{f,0}}}}, Ts) -> - Type = band_type(S1, S2, Ts), - tdb_store(D, Type, Ts); -update({set,[D],[S1,S2],{alloc,_,{gc_bif,'/',{f,0}}}}, Ts) -> - %% Make sure we reject non-numeric literals. - case possibly_numeric(S1) andalso possibly_numeric(S2) of - true -> tdb_store(D, float, Ts); - false -> Ts - end; -update({set,[D],[S1,S2],{alloc,_,{gc_bif,Op,{f,0}}}}, Ts0) -> - case op_type(Op) of - integer -> - tdb_store(D, integer, Ts0); - {float,_} -> - case {tdb_find(S1, Ts0),tdb_find(S2, Ts0)} of - {float,_} -> tdb_store(D, float, Ts0); - {_,float} -> tdb_store(D, float, Ts0); - {_,_} -> tdb_store(D, any, Ts0) - end; - Type -> - tdb_store(D, Type, Ts0) - end; -update({set,[D],[_],{alloc,_,{gc_bif,Op,{f,0}}}}, Ts) -> - tdb_store(D, unary_op_type(Op), Ts); -update({set,[],_Src,_Op}, Ts) -> - Ts; -update({set,[D],_Src,_Op}, Ts) -> - tdb_store(D, any, Ts); -update({kill,D}, Ts) -> - tdb_store(D, any, Ts); - -%% Instructions outside of blocks. -update({test,test_arity,_Fail,[Src,Arity]}, Ts) -> - tdb_meet(Src, {tuple,exact_size,Arity,[]}, Ts); -update({get_map_elements,_,Src,{list,Elems0}}, Ts0) -> - Ts1 = tdb_meet(Src, map, Ts0), - {_Ss,Ds} = beam_utils:split_even(Elems0), - foldl(fun(Dst, A) -> tdb_store(Dst, any, A) end, Ts1, Ds); -update({test,is_eq_exact,_,[Reg,{atom,_}=Atom]}, Ts0) -> - Ts = case tdb_find_source_tuple(Reg, Ts0) of - {source_tuple,TupleReg} -> - tdb_meet(TupleReg, {tuple,min_size,1,[Atom]}, Ts0); - none -> - Ts0 - end, - tdb_meet(Reg, Atom, Ts); -update({test,is_record,_Fail,[Src,Tag,{integer,Arity}]}, Ts) -> - tdb_meet(Src, {tuple,exact_size,Arity,[Tag]}, Ts); - -%% Binaries and binary matching. - -update({test,bs_get_integer2,_,_,Args,Dst}, Ts) -> - tdb_store(Dst, get_bs_integer_type(Args), Ts); -update({test,bs_get_utf8,_,_,_,Dst}, Ts) -> - tdb_store(Dst, ?UNICODE_INT, Ts); -update({test,bs_get_utf16,_,_,_,Dst}, Ts) -> - tdb_store(Dst, ?UNICODE_INT, Ts); -update({test,bs_get_utf32,_,_,_,Dst}, Ts) -> - tdb_store(Dst, ?UNICODE_INT, Ts); -update({bs_init,_,{bs_init2,_,_},_,_,Dst}, Ts) -> - tdb_store(Dst, {binary,8}, Ts); -update({bs_init,_,_,_,_,Dst}, Ts) -> - tdb_store(Dst, {binary,1}, Ts); -update({bs_put,_,_,_}, Ts) -> - Ts; -update({bs_save2,_,_}, Ts) -> - Ts; -update({bs_restore2,_,_}, Ts) -> - Ts; -update({bs_context_to_binary,Dst}, Ts) -> - tdb_store(Dst, any, Ts); -update({test,bs_start_match2,_,_,[Src,_],Dst}, Ts0) -> - Ts = tdb_meet(Src, {binary,1}, Ts0), - tdb_copy(Src, Dst, Ts); -update({test,bs_get_binary2,_,_,[_,_,Unit,_],Dst}, Ts) -> - true = is_integer(Unit), %Assertion. - tdb_store(Dst, {binary,Unit}, Ts); -update({test,bs_get_float2,_,_,_,Dst}, Ts) -> - tdb_store(Dst, float, Ts); -update({test,bs_test_unit,_,[Src,Unit]}, Ts) -> - tdb_meet(Src, {binary,Unit}, Ts); - -%% Other test instructions -update({test,Test,_Fail,[Src]}, Ts) -> - Type = case Test of - is_binary -> {binary,8}; - is_bitstr -> {binary,1}; - is_boolean -> boolean; - is_float -> float; - is_integer -> integer; - is_map -> map; - is_nonempty_list -> nonempty_list; - _ -> any - end, - tdb_meet(Src, Type, Ts); -update({test,_Test,_Fail,_Other}, Ts) -> - Ts; - -%% Calls - -update({call_ext,Ar,{extfunc,math,Math,Ar}}, Ts) -> - case is_math_bif(Math, Ar) of - true -> tdb_store({x,0}, float, Ts); - false -> tdb_kill_xregs(Ts) - end; -update({call_ext,3,{extfunc,erlang,setelement,3}}, Ts0) -> - Ts = tdb_kill_xregs(Ts0), - case tdb_find({x,1}, Ts0) of - {tuple,SzKind,Sz,_}=T0 -> - T = case tdb_find({x,0}, Ts0) of - {integer,{I,I}} when I > 1 -> - %% First element is not changed. The result - %% will have the same type. - T0; - _ -> - %% Position is 1 or unknown. May change the - %% first element of the tuple. - {tuple,SzKind,Sz,[]} - end, - tdb_store({x,0}, T, Ts); - _ -> - Ts - end; -update({call,_Arity,_Func}, Ts) -> tdb_kill_xregs(Ts); -update({call_ext,_Arity,_Func}, Ts) -> tdb_kill_xregs(Ts); -update({make_fun2,_,_,_,_}, Ts) -> tdb_kill_xregs(Ts); -update({call_fun, _}, Ts) -> tdb_kill_xregs(Ts); -update({apply, _}, Ts) -> tdb_kill_xregs(Ts); - -update({line,_}, Ts) -> Ts; -update({'%',_}, Ts) -> Ts; - -%% The instruction is unknown. Kill all information. -update(_I, _Ts) -> tdb_new(). - -band_type({integer,Int}, Other, Ts) -> - band_type_1(Int, Other, Ts); -band_type(Other, {integer,Int}, Ts) -> - band_type_1(Int, Other, Ts); -band_type(_, _, _) -> integer. - -band_type_1(Int, OtherSrc, Ts) -> - Type = band_type_2(Int, 0), - OtherType = tdb_find(OtherSrc, Ts), - meet(Type, OtherType). - -band_type_2(N, Bits) when Bits < 64 -> - case 1 bsl Bits of - P when P =:= N + 1 -> - {integer,{0,N}}; - P when P > N + 1 -> - integer; - _ -> - band_type_2(N, Bits+1) - end; -band_type_2(_, _) -> - %% Negative or large positive number. Give up. - integer. - -get_bs_integer_type([_,{integer,N},U,{field_flags,Fl}]) - when N*U < 64 -> - NumBits = N*U, - case member(unsigned, Fl) of - true -> - {integer,{0,(1 bsl NumBits)-1}}; - false -> - %% Signed integer. Don't bother. - integer - end; -get_bs_integer_type(_) -> - %% Avoid creating ranges with a huge upper limit. - integer. - -is_math_bif(cos, 1) -> true; -is_math_bif(cosh, 1) -> true; -is_math_bif(sin, 1) -> true; -is_math_bif(sinh, 1) -> true; -is_math_bif(tan, 1) -> true; -is_math_bif(tanh, 1) -> true; -is_math_bif(acos, 1) -> true; -is_math_bif(acosh, 1) -> true; -is_math_bif(asin, 1) -> true; -is_math_bif(asinh, 1) -> true; -is_math_bif(atan, 1) -> true; -is_math_bif(atanh, 1) -> true; -is_math_bif(erf, 1) -> true; -is_math_bif(erfc, 1) -> true; -is_math_bif(exp, 1) -> true; -is_math_bif(log, 1) -> true; -is_math_bif(log2, 1) -> true; -is_math_bif(log10, 1) -> true; -is_math_bif(sqrt, 1) -> true; -is_math_bif(atan2, 2) -> true; -is_math_bif(pow, 2) -> true; -is_math_bif(ceil, 1) -> true; -is_math_bif(floor, 1) -> true; -is_math_bif(fmod, 2) -> true; -is_math_bif(pi, 0) -> true; -is_math_bif(_, _) -> false. - -%% Reject non-numeric literals. -possibly_numeric({x,_}) -> true; -possibly_numeric({y,_}) -> true; -possibly_numeric({integer,_}) -> true; -possibly_numeric({float,_}) -> true; -possibly_numeric(_) -> false. - -max_tuple_size(Reg, Ts) -> - case tdb_find(Reg, Ts) of - {tuple,_,Sz,_} -> Sz; - _Other -> 0 - end. - -float_op('/', A, B, _) -> - case possibly_numeric(A) andalso possibly_numeric(B) of - true -> {yes,fdiv}; - false -> no - end; -float_op(Op, {float,_}, B, _) -> - case possibly_numeric(B) of - true -> arith_op(Op); - false -> no - end; -float_op(Op, A, {float,_}, _) -> - case possibly_numeric(A) of - true -> arith_op(Op); - false -> no - end; -float_op(Op, A, B, Ts) -> - case {tdb_find(A, Ts),tdb_find(B, Ts)} of - {float,_} -> arith_op(Op); - {_,float} -> arith_op(Op); - {_,_} -> no - end. - -find_dest(V, Rs0) -> - case find_reg(V, Rs0) of - {ok,FR} -> - {FR,mark(V, Rs0, dirty)}; - error -> - Rs = put_reg(V, Rs0, dirty), - {ok,FR} = find_reg(V, Rs), - {FR,Rs} - end. - -load_reg({float,_}=F, _, Rs0, Is0) -> - Rs = put_reg(F, Rs0, clean), - {ok,FR} = find_reg(F, Rs), - Is = [{set,[FR],[F],fmove}|Is0], - {Rs,Is}; -load_reg(V, Ts, Rs0, Is0) -> - case find_reg(V, Rs0) of - {ok,_FR} -> {Rs0,Is0}; - error -> - Rs = put_reg(V, Rs0, clean), - {ok,FR} = find_reg(V, Rs), - Op = case tdb_find(V, Ts) of - float -> fmove; - _ -> fconv - end, - Is = [{set,[FR],[V],Op}|Is0], - {Rs,Is} - end. - -arith_op(Op) -> - case op_type(Op) of - {float,Instr} -> {yes,Instr}; - _ -> no - end. - -op_type('+') -> {float,fadd}; -op_type('-') -> {float,fsub}; -op_type('*') -> {float,fmul}; -%% '/' and 'band' are specially handled. -op_type('bor') -> integer; -op_type('bxor') -> integer; -op_type('bsl') -> integer; -op_type('bsr') -> integer; -op_type('div') -> integer; -op_type(_) -> any. - -unary_op_type(bit_size) -> integer; -unary_op_type(byte_size) -> integer; -unary_op_type(length) -> integer; -unary_op_type(map_size) -> integer; -unary_op_type(size) -> integer; -unary_op_type(tuple_size) -> integer; -unary_op_type(_) -> any. - -flush(Rs, [{set,[_],[_,_,_],{bif,is_record,_}}|_]=Is0, Acc0) -> - Acc = flush_all(Rs, Is0, Acc0), - {[],Acc}; -flush(Rs, [{set,[_],[],{put_tuple,_}}|_]=Is0, Acc0) -> - Acc = flush_all(Rs, Is0, Acc0), - {[],Acc}; -flush(Rs0, [{set,Ds,Ss,_Op}|_], Acc0) -> - Save = cerl_sets:from_list(Ss), - Acc = save_regs(Rs0, Save, Acc0), - Rs1 = foldl(fun(S, A) -> mark(S, A, clean) end, Rs0, Ss), - Kill = cerl_sets:from_list(Ds), - Rs = kill_regs(Rs1, Kill), - {Rs,Acc}; -flush(Rs0, Is, Acc0) -> - Acc = flush_all(Rs0, Is, Acc0), - {[],Acc}. - -flush_all([{_,{float,_},_}|Rs], Is, Acc) -> - flush_all(Rs, Is, Acc); -flush_all([{I,V,dirty}|Rs], Is, Acc0) -> - Acc = checkerror(Acc0), - case beam_utils:is_killed_block(V, Is) of - true -> flush_all(Rs, Is, Acc); - false -> flush_all(Rs, Is, [{set,[V],[{fr,I}],fmove}|Acc]) - end; -flush_all([{_,_,clean}|Rs], Is, Acc) -> flush_all(Rs, Is, Acc); -flush_all([free|Rs], Is, Acc) -> flush_all(Rs, Is, Acc); -flush_all([], _, Acc) -> Acc. - -save_regs(Rs, Save, Acc) -> - foldl(fun(R, A) -> save_reg(R, Save, A) end, Acc, Rs). - -save_reg({I,V,dirty}, Save, Acc) -> - case cerl_sets:is_element(V, Save) of - true -> [{set,[V],[{fr,I}],fmove}|checkerror(Acc)]; - false -> Acc - end; -save_reg(_, _, Acc) -> Acc. - -kill_regs(Rs, Kill) -> - [kill_reg(R, Kill) || R <- Rs]. - -kill_reg({_,V,_}=R, Kill) -> - case cerl_sets:is_element(V, Kill) of - true -> free; - false -> R - end; -kill_reg(R, _) -> R. - -mark(V, [{I,V,_}|Rs], Mark) -> [{I,V,Mark}|Rs]; -mark(V, [R|Rs], Mark) -> [R|mark(V, Rs, Mark)]; -mark(_, [], _) -> []. - -fetch_reg(V, [{I,V,_}|_]) -> {fr,I}; -fetch_reg(V, [_|SRs]) -> fetch_reg(V, SRs). - -find_reg(V, [{I,V,_}|_]) -> {ok,{fr,I}}; -find_reg(V, [_|SRs]) -> find_reg(V, SRs); -find_reg(_, []) -> error. - -put_reg(V, Rs, Dirty) -> put_reg_1(V, Rs, Dirty, 0). - -put_reg_1(V, [free|Rs], Dirty, I) -> [{I,V,Dirty}|Rs]; -put_reg_1(V, [R|Rs], Dirty, I) -> [R|put_reg_1(V, Rs, Dirty, I+1)]; -put_reg_1(V, [], Dirty, I) -> [{I,V,Dirty}]. - -checkerror(Is) -> - checkerror_1(Is, Is). - -checkerror_1([{set,[],[],fcheckerror}|_], OrigIs) -> OrigIs; -checkerror_1([{set,_,_,{bif,fadd,_}}|_], OrigIs) -> checkerror_2(OrigIs); -checkerror_1([{set,_,_,{bif,fsub,_}}|_], OrigIs) -> checkerror_2(OrigIs); -checkerror_1([{set,_,_,{bif,fmul,_}}|_], OrigIs) -> checkerror_2(OrigIs); -checkerror_1([{set,_,_,{bif,fdiv,_}}|_], OrigIs) -> checkerror_2(OrigIs); -checkerror_1([{set,_,_,{bif,fnegate,_}}|_], OrigIs) -> checkerror_2(OrigIs); -checkerror_1([_|Is], OrigIs) -> checkerror_1(Is, OrigIs); -checkerror_1([], OrigIs) -> OrigIs. - -checkerror_2(OrigIs) -> [{set,[],[],fcheckerror}|OrigIs]. - - -%%% Routines for maintaining a type database. The type database -%%% associates type information with registers. -%%% -%%% See the comment for verified_type/1 at the end of module for -%%% a description of the possible types. - -%% tdb_new() -> EmptyDataBase -%% Creates a new, empty type database. - -tdb_new() -> []. - -%% tdb_find(Register, Db) -> Type -%% Returns type information or the atom error if there is no type -%% information available for Register. -%% -%% See the comment for verified_type/1 at the end of module for -%% a description of the possible types. - -tdb_find(Reg, Ts) -> - case tdb_find_raw(Reg, Ts) of - {tuple_element,_,_} -> any; - Type -> Type - end. - -%% tdb_find_source_tuple(Register, Ts) -> {source_tuple,Register} | 'none'. -%% Find the tuple whose first element was fetched to the register Register. - -tdb_find_source_tuple(Reg, Ts) -> - case tdb_find_raw(Reg, Ts) of - {tuple_element,Src,0} -> - {source_tuple,Src}; - _ -> - none - end. - -%% tdb_copy(Source, Dest, Db) -> Db' -%% Update the type information for Dest to have the same type -%% as the Source. - -tdb_copy({Tag,_}=S, D, Ts) when Tag =:= x; Tag =:= y -> - case tdb_find_raw(S, Ts) of - any -> orddict:erase(D, Ts); - Type -> orddict:store(D, Type, Ts) - end; -tdb_copy(Literal, D, Ts) -> - Type = case Literal of - {atom,_} -> Literal; - {float,_} -> float; - {integer,Int} -> {integer,{Int,Int}}; - {literal,[_|_]} -> nonempty_list; - {literal,#{}} -> map; - {literal,Tuple} when tuple_size(Tuple) >= 1 -> - Lit = tag_literal(element(1, Tuple)), - {tuple,exact_size,tuple_size(Tuple),[Lit]}; - _ -> any - end, - tdb_store(D, verified_type(Type), Ts). - -%% tdb_store(Register, Type, Ts0) -> Ts. -%% Store a new type for register Register. Return the update type -%% database. Use this function when a new value is assigned to -%% a register. -%% -%% See the comment for verified_type/1 at the end of module for -%% a description of the possible types. - -tdb_store(Reg, any, Ts) -> - erase(Reg, Ts); -tdb_store(Reg, Type, Ts) -> - store(Reg, verified_type(Type), Ts). - -store(Key, New, [{K,_}|_]=Dict) when Key < K -> - [{Key,New}|Dict]; -store(Key, New, [{K,Val}=E|Dict]) when Key > K -> - case Val of - {tuple_element,Key,_} -> store(Key, New, Dict); - _ -> [E|store(Key, New, Dict)] - end; -store(Key, New, [{_K,Old}|Dict]) -> %Key == K - case Old of - {tuple,_,_,_} -> - [{Key,New}|erase_tuple_element(Key, Dict)]; - _ -> - [{Key,New}|Dict] - end; -store(Key, New, []) -> [{Key,New}]. - -erase(Key, [{K,_}=E|Dict]) when Key < K -> - [E|Dict]; -erase(Key, [{K,Val}=E|Dict]) when Key > K -> - case Val of - {tuple_element,Key,_} -> erase(Key, Dict); - _ -> [E|erase(Key, Dict)] - end; -erase(Key, [{_K,Val}|Dict]) -> %Key == K - case Val of - {tuple,_,_,_} -> erase_tuple_element(Key, Dict); - _ -> Dict - end; -erase(_, []) -> []. - -erase_tuple_element(Key, [{_,{tuple_element,Key,_}}|Dict]) -> - erase_tuple_element(Key, Dict); -erase_tuple_element(Key, [E|Dict]) -> - [E|erase_tuple_element(Key, Dict)]; -erase_tuple_element(_Key, []) -> []. - -%% tdb_meet(Register, Type, Ts0) -> Ts. -%% Update information of a register that is used as the source for an -%% instruction. The type Type will be combined using the meet operation -%% with the previous type information for the register, resulting in -%% narrower (more specific) type. -%% -%% For example, if the previous type is {tuple,min_size,2,[]} and the -%% the new type is {tuple,exact_size,5,[]}, the meet of the types will -%% be {tuple,exact_size,5,[]}. -%% -%% See the comment for verified_type/1 at the end of module for -%% a description of the possible types. - -tdb_meet(Reg, NewType, Ts) -> - Update = fun(Type0) -> meet(Type0, NewType) end, - orddict:update(Reg, Update, NewType, Ts). - -%%% -%%% Here follows internal helper functions for accessing and -%%% updating the type database. -%%% - -tdb_find_raw({x,_}=K, Ts) -> tdb_find_raw_1(K, Ts); -tdb_find_raw({y,_}=K, Ts) -> tdb_find_raw_1(K, Ts); -tdb_find_raw(_, _) -> any. - -tdb_find_raw_1(K, Ts) -> - case orddict:find(K, Ts) of - {ok,Val} -> Val; - error -> any - end. - -tag_literal(A) when is_atom(A) -> {atom,A}; -tag_literal(F) when is_float(F) -> {float,F}; -tag_literal(I) when is_integer(I) -> {integer,I}; -tag_literal([]) -> nil; -tag_literal(Lit) -> {literal,Lit}. - -%% tdb_kill_xregs(Db) -> NewDb -%% Kill all information about x registers. Also kill all tuple_element -%% dependencies from y registers to x registers. - -tdb_kill_xregs([{{x,_},_Type}|Db]) -> tdb_kill_xregs(Db); -tdb_kill_xregs([{{y,_},{tuple_element,{x,_},_}}|Db]) -> tdb_kill_xregs(Db); -tdb_kill_xregs([Any|Db]) -> [Any|tdb_kill_xregs(Db)]; -tdb_kill_xregs([]) -> []. - -%% meet(Type1, Type2) -> Type -%% Returns the "meet" of Type1 and Type2. The meet is a narrower -%% type than Type1 and Type2. For example: -%% -%% meet(integer, {integer,{0,3}}) -> {integer,{0,3}} -%% -%% The meet for two different types result in 'none', which is -%% the bottom element for our type lattice: -%% -%% meet(integer, map) -> none - -meet(T, T) -> - T; -meet({integer,_}=T, integer) -> - T; -meet(integer, {integer,_}=T) -> - T; -meet({integer,{Min1,Max1}}, {integer,{Min2,Max2}}) -> - {integer,{max(Min1, Min2),min(Max1, Max2)}}; -meet({tuple,min_size,Sz1,Same}, {tuple,min_size,Sz2,Same}=Max) when Sz1 < Sz2 -> - Max; -meet({tuple,min_size,Sz1,Same}=Max, {tuple,min_size,Sz2,Same}) when Sz1 > Sz2 -> - Max; -meet({tuple,exact_size,_,Same}=Exact, {tuple,_,_,Same}) -> - Exact; -meet({tuple,_,_,Same},{tuple,exact_size,_,Same}=Exact) -> - Exact; -meet({tuple,SzKind1,Sz1,[]}, {tuple,_SzKind2,_Sz2,First}=Tuple2) -> - meet({tuple,SzKind1,Sz1,First}, Tuple2); -meet({tuple,_SzKind1,_Sz1,First}=Tuple1, {tuple,SzKind2,Sz2,_}) -> - meet(Tuple1, {tuple,SzKind2,Sz2,First}); -meet({binary,U1}, {binary,U2}) -> - {binary,max(U1, U2)}; -meet(T1, T2) -> - case is_any(T1) of - true -> - verified_type(T2); - false -> - case is_any(T2) of - true -> - verified_type(T1); - false -> - none %The bottom element. - end - end. - -is_any(any) -> true; -is_any({tuple_element,_,_}) -> true; -is_any(_) -> false. - -%% verified_type(Type) -> Type -%% Returns the passed in type if it is one of the defined types. -%% Crashes if there is anything wrong with the type. -%% -%% Here are all possible types: -%% -%% any Any Erlang term (top element for the type lattice). -%% -%% {atom,Atom} The specific atom Atom. -%% {binary,Unit} Binary/bitstring aligned to unit Unit. -%% boolean 'true' | 'false' -%% float Floating point number. -%% integer Integer. -%% {integer,{Min,Max}} Integer in the inclusive range Min through Max. -%% map Map. -%% nonempty_list Nonempty list. -%% {tuple,_,_,_} Tuple (see below). -%% -%% none No type (bottom element for the type lattice). -%% -%% {tuple,min_size,Size,First} means that the corresponding register -%% contains a tuple with *at least* Size elements (conversely, -%% {tuple,exact_size,Size,First} means that it contains a tuple with -%% *exactly* Size elements). An tuple with unknown size is -%% represented as {tuple,min_size,0,[]}. First is either [] (meaning -%% that the tuple's first element is unknown) or [FirstElement] (the -%% contents of the first element). -%% -%% There is also a pseudo-type called {tuple_element,_,_}: -%% -%% {tuple_element,SrcTuple,ElementNumber} -%% -%% that does not provide any information about the type of the -%% register itself, but provides a link back to the source tuple that -%% the register got its value from. -%% -%% Note that {tuple_element,_,_} will *never* be returned by tdb_find/2. -%% Use tdb_find_source_tuple/2 to locate the source tuple for a register. - -verified_type(any=T) -> T; -verified_type({atom,_}=T) -> T; -verified_type({binary,U}=T) when is_integer(U) -> T; -verified_type(boolean=T) -> T; -verified_type(integer=T) -> T; -verified_type({integer,{Min,Max}}=T) - when is_integer(Min), is_integer(Max) -> T; -verified_type(map=T) -> T; -verified_type(nonempty_list=T) -> T; -verified_type({tuple,_,Sz,[]}=T) when is_integer(Sz) -> T; -verified_type({tuple,_,Sz,[_]}=T) when is_integer(Sz) -> T; -verified_type({tuple_element,_,_}=T) -> T; -verified_type(float=T) -> T. diff --git a/lib/compiler/src/v3_codegen.erl b/lib/compiler/src/v3_codegen.erl deleted file mode 100644 index e9152ba88f..0000000000 --- a/lib/compiler/src/v3_codegen.erl +++ /dev/null @@ -1,2925 +0,0 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 1999-2018. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. -%% -%% %CopyrightEnd% -%% -%% Purpose : Code generator for Beam. - --module(v3_codegen). - -%% The main interface. --export([module/2]). - --import(lists, [member/2,keymember/3,keysort/2,keydelete/3, - append/1,flatmap/2,filter/2,foldl/3,foldr/3,mapfoldl/3, - sort/1,reverse/1,reverse/2,map/2]). --import(ordsets, [add_element/2,intersection/2,union/2]). - --include("v3_kernel.hrl"). - -%% These are not defined in v3_kernel.hrl. -get_kanno(Kthing) -> element(2, Kthing). -set_kanno(Kthing, Anno) -> setelement(2, Kthing, Anno). - -%% Main codegen structure. --record(cg, {lcount=1, %Label counter - bfail, %Fail label for BIFs - break, %Break label - recv, %Receive label - is_top_block, %Boolean: top block or not - functable=#{}, %Map of local functions: {Name,Arity}=>Label - in_catch=false, %Inside a catch or not. - need_frame, %Need a stack frame. - ultimate_failure, %Label for ultimate match failure. - ctx %Match context. - }). - -%% Stack/register state record. --record(sr, {reg=[], %Register table - stk=[], %Stack table - res=[]}). %Registers to reserve - -%% Internal records. --record(cg_need_heap, {anno=[] :: term(), - h=0 :: integer()}). --record(cg_block, {anno=[] :: term(), - es=[] :: [term()]}). - --type vdb_entry() :: {atom(),non_neg_integer(),non_neg_integer()}. - --record(l, {i=0 :: non_neg_integer(), %Op number - vdb=[] :: [vdb_entry()], %Variable database - a=[] :: [term()]}). %Core annotation - --spec module(#k_mdef{}, [compile:option()]) -> {'ok',beam_asm:module_code()}. - -module(#k_mdef{name=Mod,exports=Es,attributes=Attr,body=Forms}, _Opts) -> - {Asm,St} = functions(Forms, {atom,Mod}), - {ok,{Mod,Es,Attr,Asm,St#cg.lcount}}. - -functions(Forms, AtomMod) -> - mapfoldl(fun (F, St) -> function(F, AtomMod, St) end, #cg{lcount=1}, Forms). - -function(#k_fdef{anno=#k{a=Anno},func=Name,arity=Arity, - vars=As,body=Kb}, AtomMod, St0) -> - try - #k_match{} = Kb, %Assertion. - - %% Try to suppress the stack frame unless it is - %% really needed. - Body0 = avoid_stack_frame(Kb), - - %% Annotate kernel records with variable usage. - Vdb0 = init_vars(As), - {Body,_,Vdb} = body(Body0, 1, Vdb0), - - %% Generate the BEAM assembly code. - {Asm,EntryLabel,St} = cg_fun(Body, As, Vdb, AtomMod, - {Name,Arity}, Anno, St0), - Func = {function,Name,Arity,EntryLabel,Asm}, - {Func,St} - catch - Class:Error:Stack -> - io:fwrite("Function: ~w/~w\n", [Name,Arity]), - erlang:raise(Class, Error, Stack) - end. - - -%% avoid_stack_frame(Kernel) -> Kernel' -%% If possible, avoid setting up a stack frame. Functions -%% that only do matching, calls to guard BIFs, and tail-recursive -%% calls don't need a stack frame. - -avoid_stack_frame(#k_match{body=Body}=M) -> - try - M#k_match{body=avoid_stack_frame_1(Body)} - catch - impossible -> - M - end. - -avoid_stack_frame_1(#k_alt{first=First0,then=Then0}=Alt) -> - First = avoid_stack_frame_1(First0), - Then = avoid_stack_frame_1(Then0), - Alt#k_alt{first=First,then=Then}; -avoid_stack_frame_1(#k_bif{op=Op}=Bif) -> - case Op of - #k_internal{} -> - %% Most internal BIFs clobber the X registers. - throw(impossible); - _ -> - Bif - end; -avoid_stack_frame_1(#k_break{anno=Anno,args=Args}) -> - #k_guard_break{anno=Anno,args=Args}; -avoid_stack_frame_1(#k_guard_break{}=Break) -> - Break; -avoid_stack_frame_1(#k_enter{}=Enter) -> - %% Tail-recursive calls don't need a stack frame. - Enter; -avoid_stack_frame_1(#k_guard{clauses=Cs0}=Guard) -> - Cs = avoid_stack_frame_list(Cs0), - Guard#k_guard{clauses=Cs}; -avoid_stack_frame_1(#k_guard_clause{guard=G0,body=B0}=C) -> - G = avoid_stack_frame_1(G0), - B = avoid_stack_frame_1(B0), - C#k_guard_clause{guard=G,body=B}; -avoid_stack_frame_1(#k_match{anno=A,vars=Vs,body=B0,ret=Ret}) -> - %% Use #k_guard_match{} instead to avoid saving the X registers - %% to the stack before matching. - B = avoid_stack_frame_1(B0), - #k_guard_match{anno=A,vars=Vs,body=B,ret=Ret}; -avoid_stack_frame_1(#k_guard_match{body=B0}=M) -> - B = avoid_stack_frame_1(B0), - M#k_guard_match{body=B}; -avoid_stack_frame_1(#k_protected{arg=Arg0}=Prot) -> - Arg = avoid_stack_frame_1(Arg0), - Prot#k_protected{arg=Arg}; -avoid_stack_frame_1(#k_put{}=Put) -> - Put; -avoid_stack_frame_1(#k_return{}=Ret) -> - Ret; -avoid_stack_frame_1(#k_select{var=#k_var{anno=Vanno},types=Types0}=Select) -> - case member(reuse_for_context, Vanno) of - false -> - Types = avoid_stack_frame_list(Types0), - Select#k_select{types=Types}; - true -> - %% Including binary patterns that overwrite the register containing - %% the binary with the match context may not be safe. For example, - %% bs_match_SUITE:bin_tail_e/1 with inlining will be rejected by - %% beam_validator. - %% - %% Essentially the following code is produced: - %% - %% bs_match {x,0} => {x,0} - %% ... - %% bs_match {x,0} => {x,1} %% ILLEGAL - %% - %% A bs_match instruction will only accept a match context as the - %% source operand if the source and destination registers are the - %% the same (as in the first bs_match instruction above). - %% The second bs_match instruction is therefore illegal. - %% - %% This situation is avoided if there is a stack frame: - %% - %% move {x,0} => {y,0} - %% bs_match {x,0} => {x,0} - %% ... - %% bs_match {y,0} => {x,1} %% LEGAL - %% - throw(impossible) - end; -avoid_stack_frame_1(#k_seq{arg=#k_call{anno=Anno,op=Op}=Call, - body=#k_break{args=BrArgs0}}=Seq) -> - case Op of - #k_remote{mod=#k_atom{val=Mod}, - name=#k_atom{val=Name}, - arity=Arity} -> - case erl_bifs:is_exit_bif(Mod, Name, Arity) of - false -> - %% Will clobber X registers. Must have a stack frame. - throw(impossible); - true -> - %% The call to this BIF will never return. It is safe - %% to suppress the stack frame. - Bif = #k_bif{anno=Anno, - op=#k_internal{name=guard_error,arity=1}, - args=[Call],ret=[]}, - BrArgs = lists:duplicate(length(BrArgs0), #k_nil{}), - GB = #k_guard_break{anno=#k{us=[],ns=[],a=[]},args=BrArgs}, - Seq#k_seq{arg=Bif,body=GB} - end; - _ -> - %% Will clobber X registers. Must have a stack frame. - throw(impossible) - end; -avoid_stack_frame_1(#k_seq{arg=A0,body=B0}=Seq) -> - A = avoid_stack_frame_1(A0), - B = avoid_stack_frame_1(B0), - Seq#k_seq{arg=A,body=B}; -avoid_stack_frame_1(#k_test{}=Test) -> - Test; -avoid_stack_frame_1(#k_type_clause{values=Values0}=TC) -> - Values = avoid_stack_frame_list(Values0), - TC#k_type_clause{values=Values}; -avoid_stack_frame_1(#k_val_clause{body=B0}=VC) -> - B = avoid_stack_frame_1(B0), - VC#k_val_clause{body=B}; -avoid_stack_frame_1(_Body) -> - throw(impossible). - -avoid_stack_frame_list([H|T]) -> - [avoid_stack_frame_1(H)|avoid_stack_frame_list(T)]; -avoid_stack_frame_list([]) -> []. - - -%% This pass creates beam format annotated with variable lifetime -%% information. Each thing is given an index and for each variable we -%% store the first and last index for its occurrence. The variable -%% database, VDB, attached to each thing is only relevant internally -%% for that thing. -%% -%% For nested things like matches the numbering continues locally and -%% the VDB for that thing refers to the variable usage within that -%% thing. Variables which live through a such a thing are internally -%% given a very large last index. Internally the indexes continue -%% after the index of that thing. This creates no problems as the -%% internal variable info never escapes and externally we only see -%% variable which are alive both before or after. -%% -%% This means that variables never "escape" from a thing and the only -%% way to get values from a thing is to "return" them, with 'break' or -%% 'return'. Externally these values become the return values of the -%% thing. This is no real limitation as most nested things have -%% multiple threads so working out a common best variable usage is -%% difficult. - -%% body(Kbody, I, Vdb) -> {[Expr],MaxI,Vdb}. -%% Handle a body. - -body(#k_seq{arg=Ke,body=Kb}, I, Vdb0) -> - %%ok = io:fwrite("life ~w:~p~n", [?LINE,{Ke,I,Vdb0}]), - A = get_kanno(Ke), - Vdb1 = use_vars(union(A#k.us, A#k.ns), I, Vdb0), - {Es,MaxI,Vdb2} = body(Kb, I+1, Vdb1), - E = expr(Ke, I, Vdb2), - {[E|Es],MaxI,Vdb2}; -body(Ke, I, Vdb0) -> - %%ok = io:fwrite("life ~w:~p~n", [?LINE,{Ke,I,Vdb0}]), - A = get_kanno(Ke), - Vdb1 = use_vars(union(A#k.us, A#k.ns), I, Vdb0), - E = expr(Ke, I, Vdb1), - {[E],I,Vdb1}. - -%% expr(Kexpr, I, Vdb) -> Expr. - -expr(#k_test{anno=A}=Test, I, _Vdb) -> - Test#k_test{anno=#l{i=I,a=A#k.a}}; -expr(#k_call{anno=A}=Call, I, _Vdb) -> - Call#k_call{anno=#l{i=I,a=A#k.a}}; -expr(#k_enter{anno=A}=Enter, I, _Vdb) -> - Enter#k_enter{anno=#l{i=I,a=A#k.a}}; -expr(#k_bif{anno=A}=Bif, I, _Vdb) -> - Bif#k_bif{anno=#l{i=I,a=A#k.a}}; -expr(#k_match{anno=A,body=Kb,ret=Rs}, I, Vdb) -> - %% Work out imported variables which need to be locked. - Mdb = vdb_sub(I, I+1, Vdb), - M = match(Kb, A#k.us, I+1, Mdb), - L = #l{i=I,vdb=use_vars(A#k.us, I+1, Mdb),a=A#k.a}, - #k_match{anno=L,body=M,ret=Rs}; -expr(#k_guard_match{anno=A,body=Kb,ret=Rs}, I, Vdb) -> - %% Work out imported variables which need to be locked. - Mdb = vdb_sub(I, I+1, Vdb), - M = match(Kb, A#k.us, I+1, Mdb), - L = #l{i=I,vdb=use_vars(A#k.us, I+1, Mdb),a=A#k.a}, - #k_guard_match{anno=L,body=M,ret=Rs}; -expr(#k_protected{}=Protected, I, Vdb) -> - protected(Protected, I, Vdb); -expr(#k_try{anno=A,arg=Ka,vars=Vs,body=Kb,evars=Evs,handler=Kh}=Try, I, Vdb) -> - %% Lock variables that are alive before the catch and used afterwards. - %% Don't lock variables that are only used inside the try. - Tdb0 = vdb_sub(I, I+1, Vdb), - %% This is the tricky bit. Lock variables in Arg that are used in - %% the body and handler. Add try tag 'variable'. - Ab = get_kanno(Kb), - Ah = get_kanno(Kh), - Tdb1 = use_vars(union(Ab#k.us, Ah#k.us), I+3, Tdb0), - Tdb2 = vdb_sub(I, I+2, Tdb1), - Vnames = fun (Kvar) -> Kvar#k_var.name end, %Get the variable names - {Aes,_,Adb} = body(Ka, I+2, add_var({catch_tag,I+1}, I+1, locked, Tdb2)), - {Bes,_,Bdb} = body(Kb, I+4, new_vars(sort(map(Vnames, Vs)), I+3, Tdb2)), - {Hes,_,Hdb} = body(Kh, I+4, new_vars(sort(map(Vnames, Evs)), I+3, Tdb2)), - L = #l{i=I,vdb=Tdb1,a=A#k.a}, - Try#k_try{anno=L, - arg=#cg_block{es=Aes,anno=#l{i=I+1,vdb=Adb,a=[]}}, - vars=Vs,body=#cg_block{es=Bes,anno=#l{i=I+3,vdb=Bdb,a=[]}}, - evars=Evs,handler=#cg_block{es=Hes,anno=#l{i=I+3,vdb=Hdb,a=[]}}}; -expr(#k_try_enter{anno=A,arg=Ka,vars=Vs,body=Kb,evars=Evs,handler=Kh}, I, Vdb) -> - %% Lock variables that are alive before the catch and used afterwards. - %% Don't lock variables that are only used inside the try. - Tdb0 = vdb_sub(I, I+1, Vdb), - %% This is the tricky bit. Lock variables in Arg that are used in - %% the body and handler. Add try tag 'variable'. - Ab = get_kanno(Kb), - Ah = get_kanno(Kh), - Tdb1 = use_vars(union(Ab#k.us, Ah#k.us), I+3, Tdb0), - Tdb2 = vdb_sub(I, I+2, Tdb1), - Vnames = fun (Kvar) -> Kvar#k_var.name end, %Get the variable names - {Aes,_,Adb} = body(Ka, I+2, add_var({catch_tag,I+1}, I+1, 1000000, Tdb2)), - {Bes,_,Bdb} = body(Kb, I+4, new_vars(sort(map(Vnames, Vs)), I+3, Tdb2)), - {Hes,_,Hdb} = body(Kh, I+4, new_vars(sort(map(Vnames, Evs)), I+3, Tdb2)), - L = #l{i=I,vdb=Tdb1,a=A#k.a}, - #k_try_enter{anno=L, - arg=#cg_block{es=Aes,anno=#l{i=I+1,vdb=Adb,a=[]}}, - vars=Vs,body=#cg_block{es=Bes,anno=#l{i=I+3,vdb=Bdb,a=[]}}, - evars=Evs,handler=#cg_block{es=Hes,anno=#l{i=I+3,vdb=Hdb,a=[]}}}; -expr(#k_catch{anno=A,body=Kb}=Catch, I, Vdb) -> - %% Lock variables that are alive before the catch and used afterwards. - %% Don't lock variables that are only used inside the catch. - %% Add catch tag 'variable'. - Cdb0 = vdb_sub(I, I+1, Vdb), - {Es,_,Cdb1} = body(Kb, I+1, add_var({catch_tag,I}, I, locked, Cdb0)), - L = #l{i=I,vdb=Cdb1,a=A#k.a}, - Catch#k_catch{anno=L,body=#cg_block{es=Es}}; -expr(#k_receive{anno=A,var=V,body=Kb,action=Ka}=Recv, I, Vdb) -> - %% Work out imported variables which need to be locked. - Rdb = vdb_sub(I, I+1, Vdb), - M = match(Kb, add_element(V#k_var.name, A#k.us), I+1, - new_vars([V#k_var.name], I, Rdb)), - {Tes,_,Adb} = body(Ka, I+1, Rdb), - Le = #l{i=I,vdb=use_vars(A#k.us, I+1, Vdb),a=A#k.a}, - Recv#k_receive{anno=Le,body=M, - action=#cg_block{anno=#l{i=I+1,vdb=Adb,a=[]},es=Tes}}; -expr(#k_receive_accept{anno=A}, I, _Vdb) -> - #k_receive_accept{anno=#l{i=I,a=A#k.a}}; -expr(#k_receive_next{anno=A}, I, _Vdb) -> - #k_receive_next{anno=#l{i=I,a=A#k.a}}; -expr(#k_put{anno=A}=Put, I, _Vdb) -> - Put#k_put{anno=#l{i=I,a=A#k.a}}; -expr(#k_break{anno=A}=Break, I, _Vdb) -> - Break#k_break{anno=#l{i=I,a=A#k.a}}; -expr(#k_guard_break{anno=A}=Break, I, _Vdb) -> - Break#k_guard_break{anno=#l{i=I,a=A#k.a}}; -expr(#k_return{anno=A}=Ret, I, _Vdb) -> - Ret#k_return{anno=#l{i=I,a=A#k.a}}. - -%% protected(Kprotected, I, Vdb) -> Protected. -%% Only used in guards. - -protected(#k_protected{anno=A,arg=Ts}=Prot, I, Vdb) -> - %% Lock variables that are alive before try and used afterwards. - %% Don't lock variables that are only used inside the protected - %% expression. - Pdb0 = vdb_sub(I, I+1, Vdb), - {T,MaxI,Pdb1} = body(Ts, I+1, Pdb0), - Pdb2 = use_vars(A#k.ns, MaxI+1, Pdb1), %Save "return" values - Prot#k_protected{arg=T,anno=#l{i=I,a=A#k.a,vdb=Pdb2}}. - -%% match(Kexpr, [LockVar], I, Vdb) -> Expr. -%% Convert match tree to old format. - -match(#k_alt{anno=A,first=Kf,then=Kt}, Ls, I, Vdb0) -> - Vdb1 = use_vars(union(A#k.us, Ls), I, Vdb0), - F = match(Kf, Ls, I+1, Vdb1), - T = match(Kt, Ls, I+1, Vdb1), - #k_alt{anno=[],first=F,then=T}; -match(#k_select{anno=A,types=Kts}=Select, Ls, I, Vdb0) -> - Vdb1 = use_vars(union(A#k.us, Ls), I, Vdb0), - Ts = [type_clause(Tc, Ls, I+1, Vdb1) || Tc <- Kts], - Select#k_select{anno=[],types=Ts}; -match(#k_guard{anno=A,clauses=Kcs}, Ls, I, Vdb0) -> - Vdb1 = use_vars(union(A#k.us, Ls), I, Vdb0), - Cs = [guard_clause(G, Ls, I+1, Vdb1) || G <- Kcs], - #k_guard{anno=[],clauses=Cs}; -match(Other, Ls, I, Vdb0) -> - Vdb1 = use_vars(Ls, I, Vdb0), - {B,_,Vdb2} = body(Other, I+1, Vdb1), - Le = #l{i=I,vdb=Vdb2,a=[]}, - #cg_block{anno=Le,es=B}. - -type_clause(#k_type_clause{anno=A,type=T,values=Kvs}, Ls, I, Vdb0) -> - %%ok = io:format("life ~w: ~p~n", [?LINE,{T,Kvs}]), - Vdb1 = use_vars(union(A#k.us, Ls), I+1, Vdb0), - Vs = [val_clause(Vc, Ls, I+1, Vdb1) || Vc <- Kvs], - #k_type_clause{anno=[],type=T,values=Vs}. - -val_clause(#k_val_clause{anno=A,val=V,body=Kb}, Ls0, I, Vdb0) -> - New = (get_kanno(V))#k.ns, - Bus = (get_kanno(Kb))#k.us, - %%ok = io:format("Ls0 = ~p, Used=~p\n New=~p, Bus=~p\n", [Ls0,Used,New,Bus]), - Ls1 = union(intersection(New, Bus), Ls0), %Lock for safety - Vdb1 = use_vars(union(A#k.us, Ls1), I+1, new_vars(New, I, Vdb0)), - B = match(Kb, Ls1, I+1, Vdb1), - Le = #l{i=I,vdb=use_vars(Bus, I+1, Vdb1),a=A#k.a}, - #k_val_clause{anno=Le,val=V,body=B}. - -guard_clause(#k_guard_clause{anno=A,guard=Kg,body=Kb}, Ls, I, Vdb0) -> - Vdb1 = use_vars(union(A#k.us, Ls), I+2, Vdb0), - Gdb = vdb_sub(I+1, I+2, Vdb1), - G = protected(Kg, I+1, Gdb), - B = match(Kb, Ls, I+2, Vdb1), - Le = #l{i=I,vdb=use_vars((get_kanno(Kg))#k.us, I+2, Vdb1),a=A#k.a}, - #k_guard_clause{anno=Le,guard=G,body=B}. - - -%% Here follows the code generator pass. -%% -%% The following assumptions have been made: -%% -%% 1. Matches, i.e. things with {match,M,Ret} wrappers, only return -%% values; no variables are exported. If the match would have returned -%% extra variables then these have been transformed to multiple return -%% values. -%% -%% 2. All BIF's called in guards are gc-safe so there is no need to -%% put thing on the stack in the guard. While this would in principle -%% work it would be difficult to keep track of the stack depth when -%% trimming. -%% -%% The code generation uses variable lifetime information added by -%% the previous pass to save variables, allocate registers and -%% move registers to the stack when necessary. -%% -%% We try to use a consistent variable name scheme throughout. The -%% StackReg record is always called Bef,Int,Aft. - -%% cg_fun([Lkexpr], [HeadVar], Vdb, State) -> {[Ainstr],State} - -cg_fun(Les, Hvs, Vdb, AtomMod, NameArity, Anno, St0) -> - {Fi,St1} = new_label(St0), %FuncInfo label - {Fl,St2} = local_func_label(NameArity, St1), - - %% - %% The pattern matching compiler (in v3_kernel) no longer - %% provides its own catch-all clause, because the - %% call to erlang:exit/1 caused problem when cases were - %% used in guards. Therefore, there may be tests that - %% cannot fail (providing that there is not a bug in a - %% previous optimzation pass), but still need to provide - %% a label (there are instructions, such as is_tuple/2, - %% that do not allow {f,0}). - %% - %% We will generate an ultimate failure label and put it - %% at the end of function, followed by an 'if_end' instruction. - %% Note that and 'if_end' instruction does not need any - %% live x registers, so it will always be safe to jump to - %% it. (We never ever expect the jump to be taken, and in - %% most functions there will never be any references to - %% the label in the first place.) - %% - - {UltimateMatchFail,St3} = new_label(St2), - - %% Create initial stack/register state, clear unused arguments. - Bef = clear_dead(#sr{reg=foldl(fun (#k_var{name=V}, Reg) -> - put_reg(V, Reg) - end, [], Hvs), - stk=[]}, 0, Vdb), - {B,_Aft,St} = cg_list(Les, Vdb, Bef, - St3#cg{bfail=0, - ultimate_failure=UltimateMatchFail, - is_top_block=true}), - {Name,Arity} = NameArity, - Asm = [{label,Fi},line(Anno),{func_info,AtomMod,{atom,Name},Arity}, - {label,Fl}|B++[{label,UltimateMatchFail},if_end]], - {Asm,Fl,St}. - -%% cg(Lkexpr, Vdb, StackReg, State) -> {[Ainstr],StackReg,State}. -%% Generate code for a kexpr. - -cg(#cg_block{anno=Le,es=Es}, Vdb, Bef, St) -> - block_cg(Es, Le, Vdb, Bef, St); -cg(#k_match{anno=Le,body=M,ret=Rs}, Vdb, Bef, St) -> - match_cg(M, Rs, Le, Vdb, Bef, St); -cg(#k_guard_match{anno=Le,body=M,ret=Rs}, Vdb, Bef, St) -> - guard_match_cg(M, Rs, Le, Vdb, Bef, St); -cg(#k_call{anno=Le,op=Func,args=As,ret=Rs}, Vdb, Bef, St) -> - call_cg(Func, As, Rs, Le, Vdb, Bef, St); -cg(#k_enter{anno=Le,op=Func,args=As}, Vdb, Bef, St) -> - enter_cg(Func, As, Le, Vdb, Bef, St); -cg(#k_bif{anno=Le}=Bif, Vdb, Bef, St) -> - bif_cg(Bif, Le, Vdb, Bef, St); -cg(#k_receive{anno=Le,timeout=Te,var=Rvar,body=Rm,action=Tes,ret=Rs}, - Vdb, Bef, St) -> - recv_loop_cg(Te, Rvar, Rm, Tes, Rs, Le, Vdb, Bef, St); -cg(#k_receive_next{anno=Le}, Vdb, Bef, St) -> - recv_next_cg(Le, Vdb, Bef, St); -cg(#k_receive_accept{}, _Vdb, Bef, St) -> - {[remove_message],Bef,St}; -cg(#k_try{anno=Le,arg=Ta,vars=Vs,body=Tb,evars=Evs,handler=Th,ret=Rs}, - Vdb, Bef, St) -> - try_cg(Ta, Vs, Tb, Evs, Th, Rs, Le, Vdb, Bef, St); -cg(#k_try_enter{anno=Le,arg=Ta,vars=Vs,body=Tb,evars=Evs,handler=Th}, - Vdb, Bef, St) -> - try_enter_cg(Ta, Vs, Tb, Evs, Th, Le, Vdb, Bef, St); -cg(#k_catch{anno=Le,body=Cb,ret=[R]}, Vdb, Bef, St) -> - catch_cg(Cb, R, Le, Vdb, Bef, St); -cg(#k_put{anno=Le,arg=Con,ret=Var}, Vdb, Bef, St) -> - put_cg(Var, Con, Le, Vdb, Bef, St); -cg(#k_return{anno=Le,args=Rs}, Vdb, Bef, St) -> - return_cg(Rs, Le, Vdb, Bef, St); -cg(#k_break{anno=Le,args=Bs}, Vdb, Bef, St) -> - break_cg(Bs, Le, Vdb, Bef, St); -cg(#k_guard_break{anno=Le,args=Bs}, Vdb, Bef, St) -> - guard_break_cg(Bs, Le, Vdb, Bef, St); -cg(#cg_need_heap{h=H}, _Vdb, Bef, St) -> - {[{test_heap,H,max_reg(Bef#sr.reg)}],Bef,St}. - -%% cg_list([Kexpr], FirstI, Vdb, StackReg, St) -> {[Ainstr],StackReg,St}. - -cg_list(Kes, Vdb, Bef, St0) -> - {Keis,{Aft,St1}} = - flatmapfoldl(fun (Ke, {Inta,Sta}) -> - {Keis,Intb,Stb} = cg(Ke, Vdb, Inta, Sta), - {Keis,{Intb,Stb}} - end, {Bef,St0}, need_heap(Kes)), - {Keis,Aft,St1}. - -%% need_heap([Lkexpr], I, St) -> [Lkexpr]. -%% Insert need_heap instructions in Kexpr list. Try to be smart and -%% collect them together as much as possible. - -need_heap(Kes0) -> - {Kes,H} = need_heap_0(reverse(Kes0), 0, []), - - %% Prepend need_heap if necessary. - need_heap_need(H) ++ Kes. - -need_heap_0([Ke|Kes], H0, Acc) -> - {Ns,H} = need_heap_1(Ke, H0), - need_heap_0(Kes, H, [Ke|Ns]++Acc); -need_heap_0([], H, Acc) -> - {Acc,H}. - -need_heap_1(#k_put{arg=#k_binary{}}, H) -> - {need_heap_need(H),0}; -need_heap_1(#k_put{arg=#k_map{}}, H) -> - {need_heap_need(H),0}; -need_heap_1(#k_put{arg=Val}, H) -> - %% Just pass through adding to needed heap. - {[],H + case Val of - #k_cons{} -> 2; - #k_tuple{es=Es} -> 1 + length(Es); - _Other -> 0 - end}; -need_heap_1(#k_bif{}=Bif, H) -> - case is_gc_bif(Bif) of - false -> - {[],H}; - true -> - {need_heap_need(H),0} - end; -need_heap_1(_Ke, H) -> - %% Call or call-like instruction such as set_tuple_element/3. - {need_heap_need(H),0}. - -need_heap_need(0) -> []; -need_heap_need(H) -> [#cg_need_heap{h=H}]. - -%% is_gc_bif(#k_bif{}) -> true|false. -%% is_gc_bif(Name, Arity) -> true|false. -%% Determines whether the BIF Name/Arity might do a GC. - -is_gc_bif(#k_bif{op=#k_remote{name=#k_atom{val=Name}},args=Args}) -> - is_gc_bif(Name, length(Args)); -is_gc_bif(#k_bif{op=#k_internal{}}) -> - true. - -is_gc_bif(hd, 1) -> false; -is_gc_bif(tl, 1) -> false; -is_gc_bif(self, 0) -> false; -is_gc_bif(node, 0) -> false; -is_gc_bif(node, 1) -> false; -is_gc_bif(element, 2) -> false; -is_gc_bif(get, 1) -> false; -is_gc_bif(tuple_size, 1) -> false; -is_gc_bif(map_get, 2) -> false; -is_gc_bif(is_map_key, 2) -> false; -is_gc_bif(Bif, Arity) -> - not (erl_internal:bool_op(Bif, Arity) orelse - erl_internal:new_type_test(Bif, Arity) orelse - erl_internal:comp_op(Bif, Arity)). - -%% match_cg(Matc, [Ret], Le, Vdb, StackReg, State) -> -%% {[Ainstr],StackReg,State}. -%% Generate code for a match. First save all variables on the stack -%% that are to survive after the match. We leave saved variables in -%% their registers as they might actually be in the right place. - -match_cg(M, Rs, Le, Vdb, Bef, St0) -> - I = Le#l.i, - {Sis,Int0} = adjust_stack(Bef, I, I+1, Vdb), - {B,St1} = new_label(St0), - {Mis,Int1,St2} = match_cg(M, St1#cg.ultimate_failure, - Int0, St1#cg{break=B}), - %% Put return values in registers. - Reg = load_vars(Rs, Int1#sr.reg), - {Sis ++ Mis ++ [{label,B}], - clear_dead(Int1#sr{reg=Reg}, I, Vdb), - St2#cg{break=St1#cg.break}}. - -guard_match_cg(M, Rs, Le, Vdb, Bef, St0) -> - I = Le#l.i, - {B,St1} = new_label(St0), - Fail = case St0 of - #cg{bfail=0,ultimate_failure=Fail0} -> Fail0; - #cg{bfail=Fail0} -> Fail0 - end, - {Mis,Aft,St2} = match_cg(M, Fail, Bef, St1#cg{break=B}), - %% Update the register descriptors for the return registers. - Reg = guard_match_regs(Aft#sr.reg, Rs), - {Mis ++ [{label,B}], - clear_dead(Aft#sr{reg=Reg}, I, Vdb), - St2#cg{break=St1#cg.break}}. - -guard_match_regs([{I,gbreakvar}|Rs], [#k_var{name=V}|Vs]) -> - [{I,V}|guard_match_regs(Rs, Vs)]; -guard_match_regs([R|Rs], Vs) -> - [R|guard_match_regs(Rs, Vs)]; -guard_match_regs([], []) -> []. - - -%% match_cg(Match, Fail, StackReg, State) -> {[Ainstr],StackReg,State}. -%% Generate code for a match tree. N.B. there is no need pass Vdb -%% down as each level which uses this takes its own internal Vdb not -%% the outer one. - -match_cg(#k_alt{first=F,then=S}, Fail, Bef, St0) -> - {Tf,St1} = new_label(St0), - {Fis,Faft,St2} = match_cg(F, Tf, Bef, St1), - {Sis,Saft,St3} = match_cg(S, Fail, Bef, St2), - Aft = sr_merge(Faft, Saft), - {Fis ++ [{label,Tf}] ++ Sis,Aft,St3}; -match_cg(#k_select{var=#k_var{anno=Vanno,name=Vname}=V,types=Scs0}, Fail, Bef, St) -> - ReuseForContext = member(reuse_for_context, Vanno) andalso - find_reg(Vname, Bef#sr.reg) =/= error, - Scs = case ReuseForContext of - false -> Scs0; - true -> bsm_rename_ctx(Scs0, Vname) - end, - match_fmf(fun (S, F, Sta) -> - select_cg(S, V, F, Fail, Bef, Sta) end, - Fail, St, Scs); -match_cg(#k_guard{clauses=Gcs}, Fail, Bef, St) -> - match_fmf(fun (G, F, Sta) -> guard_clause_cg(G, F, Bef, Sta) end, - Fail, St, Gcs); -match_cg(#cg_block{anno=Le,es=Es}, _Fail, Bef, St) -> - %% Must clear registers and stack of dead variables. - Int = clear_dead(Bef, Le#l.i, Le#l.vdb), - block_cg(Es, Le, Int, St). - -%% bsm_rename_ctx([Clause], Var) -> [Clause] -%% We know from an annotation that the register for a binary can -%% be reused for the match context because the two are not truly -%% alive at the same time (even though the life time information -%% says so). -%% -%% The easiest way to have those variables share the same register is -%% to rename the variable with the shortest life-span (the match -%% context) to the variable for the binary (which can have a very -%% long life-time because it is locked during matching). We KNOW that -%% the match state variable will only be alive during the matching. -%% -%% We must also remove all information about the match context -%% variable from all life-time information databases (Vdb). - -bsm_rename_ctx([#k_type_clause{type=k_binary,values=Vcs}=TC|Cs], New) -> - [#k_val_clause{val=#k_binary{segs=#k_var{name=Old}}=Bin, - body=Ke0}=VC0] = Vcs, - Ke = bsm_rename_ctx(Ke0, Old, New, false), - VC = VC0#k_val_clause{val=Bin#k_binary{segs=#k_var{name=New}}, - body=Ke}, - [TC#k_type_clause{values=[VC]}|bsm_rename_ctx(Cs, New)]; -bsm_rename_ctx([C|Cs], New) -> - [C|bsm_rename_ctx(Cs, New)]; -bsm_rename_ctx([], _) -> []. - -%% bsm_rename_ctx(Ke, OldName, NewName, InProt) -> Ke' -%% Rename and clear OldName from life-time information. We must -%% recurse into any block contained in a protected, but it would -%% only complicatate things to recurse into blocks not in a protected -%% (the match context variable is not live inside them). - -bsm_rename_ctx(#k_select{var=#k_var{name=V},types=Cs0}=Sel, - Old, New, InProt) -> - Cs = bsm_rename_ctx_list(Cs0, Old, New, InProt), - Sel#k_select{var=#k_var{name=bsm_rename_var(V, Old, New)},types=Cs}; -bsm_rename_ctx(#k_type_clause{values=Cs0}=TC, Old, New, InProt) -> - Cs = bsm_rename_ctx_list(Cs0, Old, New, InProt), - TC#k_type_clause{values=Cs}; -bsm_rename_ctx(#k_val_clause{body=Ke0}=VC, Old, New, InProt) -> - Ke = bsm_rename_ctx(Ke0, Old, New, InProt), - VC#k_val_clause{body=Ke}; -bsm_rename_ctx(#k_alt{first=F0,then=S0}=Alt, Old, New, InProt) -> - F = bsm_rename_ctx(F0, Old, New, InProt), - S = bsm_rename_ctx(S0, Old, New, InProt), - Alt#k_alt{first=F,then=S}; -bsm_rename_ctx(#k_guard{clauses=Gcs0}=Guard, Old, New, InProt) -> - Gcs = bsm_rename_ctx_list(Gcs0, Old, New, InProt), - Guard#k_guard{clauses=Gcs}; -bsm_rename_ctx(#k_guard_clause{guard=G0,body=B0}=GC, Old, New, InProt) -> - G = bsm_rename_ctx(G0, Old, New, InProt), - B = bsm_rename_ctx(B0, Old, New, InProt), - %% A guard clause may cause unsaved variables to be saved on the stack. - %% Since the match state variable Old is an alias for New (uses the - %% same register), it is neither in the stack nor register descriptor - %% lists and we would crash when we didn't find it unless we remove - %% it from the database. - bsm_forget_var(GC#k_guard_clause{guard=G,body=B}, Old); -bsm_rename_ctx(#k_protected{arg=Ts0}=Prot, Old, New, _InProt) -> - InProt = true, - Ts = bsm_rename_ctx_list(Ts0, Old, New, InProt), - bsm_forget_var(Prot#k_protected{arg=Ts}, Old); -bsm_rename_ctx(#k_guard_match{body=Ms0}=Match, Old, New, InProt) -> - Ms = bsm_rename_ctx(Ms0, Old, New, InProt), - Match#k_guard_match{body=Ms}; -bsm_rename_ctx(#k_test{}=Test, _, _, _) -> Test; -bsm_rename_ctx(#k_bif{}=Bif, _, _, _) -> Bif; -bsm_rename_ctx(#k_put{}=Put, _, _, _) -> Put; -bsm_rename_ctx(#k_call{}=Call, _, _, _) -> Call; -bsm_rename_ctx(#cg_block{}=Block, Old, _, false) -> - %% This block is not inside a protected. The match context variable cannot - %% possibly be live inside the block. - bsm_forget_var(Block, Old); -bsm_rename_ctx(#cg_block{es=Es0}=Block, Old, New, true) -> - %% A block in a protected. We must recursively rename the variable - %% inside the block. - Es = bsm_rename_ctx_list(Es0, Old, New, true), - bsm_forget_var(Block#cg_block{es=Es}, Old); -bsm_rename_ctx(#k_guard_break{}=Break, Old, _New, _InProt) -> - bsm_forget_var(Break, Old). - -bsm_rename_ctx_list([C|Cs], Old, New, InProt) -> - [bsm_rename_ctx(C, Old, New, InProt)| - bsm_rename_ctx_list(Cs, Old, New, InProt)]; -bsm_rename_ctx_list([], _, _, _) -> []. - -bsm_rename_var(Old, Old, New) -> New; -bsm_rename_var(V, _, _) -> V. - -%% bsm_forget_var(#l{}, Variable) -> #l{} -%% Remove a variable from the variable life-time database. - -bsm_forget_var(Ke, V) -> - #l{vdb=Vdb} = L0 = get_kanno(Ke), - L = L0#l{vdb=keydelete(V, 1, Vdb)}, - set_kanno(Ke, L). - -%% block_cg([Kexpr], Le, Vdb, StackReg, St) -> {[Ainstr],StackReg,St}. -%% block_cg([Kexpr], Le, StackReg, St) -> {[Ainstr],StackReg,St}. - -block_cg(Es, Le, _Vdb, Bef, St) -> - block_cg(Es, Le, Bef, St). - -block_cg(Es, Le, Bef, #cg{is_top_block=false}=St) -> - cg_block(Es, Le#l.vdb, Bef, St); -block_cg(Es, Le, Bef, #cg{is_top_block=true}=St0) -> - %% No stack frame has been established yet. Do we need one? - case need_stackframe(Es) of - true -> - %% We need a stack frame. Generate the code and add the - %% code for creating and deallocating the stack frame. - {Is0,Aft,St} = cg_block(Es, Le#l.vdb, Bef, - St0#cg{is_top_block=false,need_frame=false}), - Is = top_level_block(Is0, Aft, max_reg(Bef#sr.reg), St), - {Is,Aft,St#cg{is_top_block=true}}; - false -> - %% This sequence of instructions ending in a #k_match{} (a - %% 'case' or 'if') in the Erlang code does not need a - %% stack frame yet. Delay the creation (if a stack frame - %% is needed at all, it will be created inside the - %% #k_match{}). - cg_list(Es, Le#l.vdb, Bef, St0) - end. - -%% need_stackframe([Kexpr]) -> true|false. -%% Does this list of instructions need a stack frame? -%% -%% A sequence of instructions that don't clobber the X registers -%% followed by a single #k_match{} doesn't need a stack frame. - -need_stackframe([H|T]) -> - case H of - #k_bif{op=#k_internal{}} -> true; - #k_put{arg=#k_binary{}} -> true; - #k_bif{} -> need_stackframe(T); - #k_put{} -> need_stackframe(T); - #k_guard_match{} -> need_stackframe(T); - #k_match{} when T =:= [] -> false; - _ -> true - end; -need_stackframe([]) -> false. - -cg_block([], _Vdb, Bef, St0) -> - {[],Bef,St0}; -cg_block(Kes0, Vdb, Bef, St0) -> - {Kes2,Int1,St1} = - case basic_block(Kes0) of - {Kes1,LastI,Args,Rest} -> - cg_basic_block(Kes1, LastI, Args, Vdb, Bef, St0); - {Kes1,Rest} -> - cg_list(Kes1, Vdb, Bef, St0) - end, - {Kes3,Int2,St2} = cg_block(Rest, Vdb, Int1, St1), - {Kes2 ++ Kes3,Int2,St2}. - -basic_block(Kes) -> basic_block(Kes, []). - -basic_block([Ke|Kes], Acc) -> - case collect_block(Ke) of - include -> basic_block(Kes, [Ke|Acc]); - {block_end,As} -> - case Acc of - [] -> - %% If the basic block does not contain any #k_put{} instructions, - %% it serves no useful purpose to do basic block optimizations. - {[Ke],Kes}; - _ -> - #l{i=I} = get_kanno(Ke), - {reverse(Acc, [Ke]),I,As,Kes} - end; - no_block -> {reverse(Acc, [Ke]),Kes} - end. - -collect_block(#k_put{arg=Arg}) -> - %% #k_put{} instructions that may garbage collect are not allowed - %% in basic blocks. - case Arg of - #k_binary{} -> no_block; - #k_map{} -> no_block; - _ -> include - end; -collect_block(#k_call{op=Func,args=As}) -> - {block_end,As++func_vars(Func)}; -collect_block(#k_enter{op=Func,args=As}) -> - {block_end,As++func_vars(Func)}; -collect_block(#k_return{args=Rs}) -> - {block_end,Rs}; -collect_block(#k_break{args=Bs}) -> - {block_end,Bs}; -collect_block(_) -> no_block. - -func_vars(#k_var{}=Var) -> - [Var]; -func_vars(#k_remote{mod=M,name=F}) - when is_record(M, k_var); is_record(F, k_var) -> - [M,F]; -func_vars(_) -> []. - -%% cg_basic_block([Kexpr], FirstI, LastI, Arguments, Vdb, StackReg, State) -> -%% {[Ainstr],StackReg,State}. -%% -%% Do a specialized code generation for a basic block of #put{} -%% instructions (that don't do any garbage collection) followed by a -%% call, break, or return. -%% -%% 'Arguments' is a list of the variables that must be loaded into -%% consecutive X registers before the last instruction in the block. -%% The point of this specialized code generation is to try put the -%% all of the variables in 'Arguments' into the correct X register -%% to begin with, instead of putting them into the first available -%% X register and having to move them to the correct X register -%% later. -%% -%% To achieve that, we attempt to reserve the X registers that the -%% variables in 'Arguments' will need to be in when the block ends. -%% -%% To make it more likely that reservations will be successful, we -%% will try to save variables that need to be saved to the stack as -%% early as possible (if an X register needed by a variable in -%% Arguments is occupied by another variable, the value in the -%% X register can be evicted if it is saved on the stack). -%% -%% We will take care not to increase the size of stack frame compared -%% to what the standard code generator would have done (that is, to -%% save all X registers at the last possible moment). We will do that -%% by extending the stack frame to the minimal size needed to save -%% all that needs to be saved using extend_stack/4, and use -%% save_carefully/4 during code generation to only save the variables -%% that can be saved without growing the stack frame. - -cg_basic_block(Kes, Lf, As, Vdb, Bef, St0) -> - Int0 = reserve_arg_regs(As, Bef), - Int = extend_stack(Int0, Lf, Lf+1, Vdb), - {Keis,{Aft,St1}} = - flatmapfoldl(fun(Ke, St) -> cg_basic_block(Ke, St, Lf, Vdb) end, - {Int,St0}, need_heap(Kes)), - {Keis,Aft,St1}. - -cg_basic_block(#cg_need_heap{}=Ke, {Bef,St0}, _Lf, Vdb) -> - {Keis,Aft,St1} = cg(Ke, Vdb, Bef, St0), - {Keis,{Aft,St1}}; -cg_basic_block(Ke, {Bef,St0}, Lf, Vdb) -> - #l{i=I} = get_kanno(Ke), - - %% Save all we can to increase the possibility that reserving - %% registers will succeed. - {Sis,Int0} = save_carefully(Bef, I, Lf+1, Vdb), - Int1 = reserve(Int0), - {Keis,Aft,St1} = cg(Ke, Vdb, Int1, St0), - {Sis ++ Keis,{Aft,St1}}. - -%% reserve_arg_regs([Argument], Bef) -> Aft. -%% Try to reserve the X registers for all arguments. All registers -%% that we wish to reserve will be saved in Bef#sr.res. - -reserve_arg_regs(As, Bef) -> - Res = reserve_arg_regs_1(As, 0), - reserve(Bef#sr{res=Res}). - -reserve_arg_regs_1([#k_var{name=V}|As], I) -> - [{I,V}|reserve_arg_regs_1(As, I+1)]; -reserve_arg_regs_1([A|As], I) -> - [{I,A}|reserve_arg_regs_1(As, I+1)]; -reserve_arg_regs_1([], _) -> []. - -%% reserve(Bef) -> Aft. -%% Try to reserve more registers. The registers we wish to reserve -%% are found in Bef#sr.res. - -reserve(#sr{reg=Regs,stk=Stk,res=Res}=Sr) -> - Sr#sr{reg=reserve_1(Res, Regs, Stk)}. - -reserve_1([{I,V}|Rs], [free|Regs], Stk) -> - [{reserved,I,V}|reserve_1(Rs, Regs, Stk)]; -reserve_1([{I,V}|Rs], [{I,V}|Regs], Stk) -> - [{I,V}|reserve_1(Rs, Regs, Stk)]; -reserve_1([{I,V}|Rs], [{I,Var}|Regs], Stk) -> - case on_stack(Var, Stk) of - true -> [{reserved,I,V}|reserve_1(Rs, Regs, Stk)]; - false -> [{I,Var}|reserve_1(Rs, Regs, Stk)] - end; -reserve_1([{I,V}|Rs], [{reserved,I,_}|Regs], Stk) -> - [{reserved,I,V}|reserve_1(Rs, Regs, Stk)]; -reserve_1([{I,V}|Rs], [], Stk) -> - [{reserved,I,V}|reserve_1(Rs, [], Stk)]; -reserve_1([], Regs, _) -> Regs. - -%% extend_stack(Bef, FirstBefore, LastFrom, Vdb) -> Aft. -%% Extend the stack enough to fit all variables alive past LastFrom -%% and not already on the stack. - -extend_stack(#sr{stk=Stk0}=Bef, Fb, Lf, Vdb) -> - Stk1 = clear_dead_stk(Stk0, Fb, Vdb), - New = new_not_on_stack(Stk1, Fb, Lf, Vdb), - Stk2 = foldl(fun ({V,_,_}, Stk) -> put_stack(V, Stk) end, Stk1, New), - Stk = Stk0 ++ lists:duplicate(length(Stk2) - length(Stk0), free), - Bef#sr{stk=Stk}. - -%% save_carefully(Bef, FirstBefore, LastFrom, Vdb) -> {[SaveVar],Aft}. -%% Save variables which are used past current point and which are not -%% already on the stack, but only if the variables can be saved without -%% growing the stack frame. - -save_carefully(#sr{stk=Stk}=Bef, Fb, Lf, Vdb) -> - New0 = new_not_on_stack(Stk, Fb, Lf, Vdb), - New = keysort(2, New0), - save_carefully_1(New, Bef, []). - -save_carefully_1([{V,_,_}|Vs], #sr{reg=Regs,stk=Stk0}=Bef, Acc) -> - case put_stack_carefully(V, Stk0) of - error -> - {reverse(Acc),Bef}; - Stk1 -> - SrcReg = fetch_reg(V, Regs), - Move = {move,SrcReg,fetch_stack(V, Stk1)}, - {x,_} = SrcReg, %Assertion - must be X register. - save_carefully_1(Vs, Bef#sr{stk=Stk1}, [Move|Acc]) - end; -save_carefully_1([], Bef, Acc) -> - {reverse(Acc),Bef}. - -%% top_level_block([Instruction], Bef, MaxRegs, St) -> [Instruction]. -%% For the top-level block, allocate a stack frame a necessary, -%% adjust Y register numbering and instructions that return -%% from the function. - -top_level_block(Keis, #sr{stk=[]}, _MaxRegs, #cg{need_frame=false}) -> - Keis; -top_level_block(Keis, Bef, MaxRegs, _St) -> - %% This top block needs an allocate instruction before it, and a - %% deallocate instruction before each return. - FrameSz = length(Bef#sr.stk), - MaxY = FrameSz-1, - Keis1 = flatmap(fun ({call_only,Arity,Func}) -> - [{call_last,Arity,Func,FrameSz}]; - ({call_ext_only,Arity,Func}) -> - [{call_ext_last,Arity,Func,FrameSz}]; - ({apply_only,Arity}) -> - [{apply_last,Arity,FrameSz}]; - (return) -> - [{deallocate,FrameSz},return]; - (Tuple) when is_tuple(Tuple) -> - [turn_yregs(Tuple, MaxY)]; - (Other) -> - [Other] - end, Keis), - [{allocate_zero,FrameSz,MaxRegs}|Keis1]. - -%% turn_yregs(Size, Tuple, MaxY) -> Tuple' -%% Renumber y register so that {y,0} becomes {y,FrameSize-1}, -%% {y,FrameSize-1} becomes {y,0} and so on. This is to make nested -%% catches work. The code generation algorithm gives a lower register -%% number to the outer catch, which is wrong. - -turn_yregs({call,_,_}=I, _MaxY) -> I; -turn_yregs({call_ext,_,_}=I, _MaxY) -> I; -turn_yregs({jump,_}=I, _MaxY) -> I; -turn_yregs({label,_}=I, _MaxY) -> I; -turn_yregs({line,_}=I, _MaxY) -> I; -turn_yregs({test_heap,_,_}=I, _MaxY) -> I; -turn_yregs({bif,Op,F,A,B}, MaxY) -> - {bif,Op,F,turn_yreg(A, MaxY),turn_yreg(B, MaxY)}; -turn_yregs({gc_bif,Op,F,Live,A,B}, MaxY) when is_integer(Live) -> - {gc_bif,Op,F,Live,turn_yreg(A, MaxY),turn_yreg(B, MaxY)}; -turn_yregs({get_tuple_element,S,N,D}, MaxY) -> - {get_tuple_element,turn_yreg(S, MaxY),N,turn_yreg(D, MaxY)}; -turn_yregs({put_tuple,Arity,D}, MaxY) -> - {put_tuple,Arity,turn_yreg(D, MaxY)}; -turn_yregs({select_val,R,F,L}, MaxY) -> - {select_val,turn_yreg(R, MaxY),F,L}; -turn_yregs({test,Op,F,L}, MaxY) -> - {test,Op,F,turn_yreg(L, MaxY)}; -turn_yregs({test,Op,F,Live,A,B}, MaxY) when is_integer(Live) -> - {test,Op,F,Live,turn_yreg(A, MaxY),turn_yreg(B, MaxY)}; -turn_yregs({Op,A}, MaxY) -> - {Op,turn_yreg(A, MaxY)}; -turn_yregs({Op,A,B}, MaxY) -> - {Op,turn_yreg(A, MaxY),turn_yreg(B, MaxY)}; -turn_yregs({Op,A,B,C}, MaxY) -> - {Op,turn_yreg(A, MaxY),turn_yreg(B, MaxY),turn_yreg(C, MaxY)}; -turn_yregs(Tuple, MaxY) -> - turn_yregs(tuple_size(Tuple), Tuple, MaxY). - -turn_yregs(1, Tp, _) -> - Tp; -turn_yregs(N, Tp, MaxY) -> - E = turn_yreg(element(N, Tp), MaxY), - turn_yregs(N-1, setelement(N, Tp, E), MaxY). - -turn_yreg({yy,YY}, MaxY) -> - {y,MaxY-YY}; -turn_yreg({list,Ls},MaxY) -> - {list,turn_yreg(Ls, MaxY)}; -turn_yreg([_|_]=Ts, MaxY) -> - [turn_yreg(T, MaxY) || T <- Ts]; -turn_yreg(Other, _MaxY) -> - Other. - -%% select_cg(Sclause, V, TypeFail, ValueFail, StackReg, State) -> -%% {Is,StackReg,State}. -%% Selecting type and value needs two failure labels, TypeFail is the -%% label to jump to of the next type test when this type fails, and -%% ValueFail is the label when this type is correct but the value is -%% wrong. These are different as in the second case there is no need -%% to try the next type, it will always fail. - -select_cg(#k_type_clause{type=Type,values=Vs}, Var, Tf, Vf, Bef, St) -> - #k_var{name=V} = Var, - select_cg(Type, Vs, V, Tf, Vf, Bef, St). - -select_cg(k_cons, [S], V, Tf, Vf, Bef, St) -> - select_cons(S, V, Tf, Vf, Bef, St); -select_cg(k_nil, [S], V, Tf, Vf, Bef, St) -> - select_nil(S, V, Tf, Vf, Bef, St); -select_cg(k_binary, [S], V, Tf, Vf, Bef, St) -> - select_binary(S, V, Tf, Vf, Bef, St); -select_cg(k_bin_seg, S, V, Tf, _Vf, Bef, St) -> - select_bin_segs(S, V, Tf, Bef, St); -select_cg(k_bin_int, S, V, Tf, _Vf, Bef, St) -> - select_bin_segs(S, V, Tf, Bef, St); -select_cg(k_bin_end, [S], V, Tf, _Vf, Bef, St) -> - select_bin_end(S, V, Tf, Bef, St); -select_cg(k_map, S, V, Tf, Vf, Bef, St) -> - select_map(S, V, Tf, Vf, Bef, St); -select_cg(k_literal, S, V, Tf, Vf, Bef, St) -> - select_literal(S, V, Tf, Vf, Bef, St); -select_cg(Type, Scs, V, Tf, Vf, Bef, St0) -> - {Vis,{Aft,St1}} = - mapfoldl(fun (S, {Int,Sta}) -> - {Val,Is,Inta,Stb} = select_val(S, V, Vf, Bef, Sta), - {{Is,[Val]},{sr_merge(Int, Inta),Stb}} - end, {void,St0}, Scs), - OptVls = combine(lists:sort(combine(Vis))), - {Vls,Sis,St2} = select_labels(OptVls, St1, [], []), - {select_val_cg(Type, fetch_var(V, Bef), Vls, Tf, Vf, Sis), Aft, St2}. - -select_val_cg(k_tuple, R, [Arity,{f,Lbl}], Tf, Vf, [{label,Lbl}|Sis]) -> - [{test,is_tuple,{f,Tf},[R]},{test,test_arity,{f,Vf},[R,Arity]}|Sis]; -select_val_cg(k_tuple, R, Vls, Tf, Vf, Sis) -> - [{test,is_tuple,{f,Tf},[R]},{select_tuple_arity,R,{f,Vf},{list,Vls}}|Sis]; -select_val_cg(Type, R, [Val, {f,Lbl}], Fail, Fail, [{label,Lbl}|Sis]) -> - [{test,is_eq_exact,{f,Fail},[R,{type(Type),Val}]}|Sis]; -select_val_cg(Type, R, [Val, {f,Lbl}], Tf, Vf, [{label,Lbl}|Sis]) -> - [{test,select_type_test(Type),{f,Tf},[R]}, - {test,is_eq_exact,{f,Vf},[R,{type(Type),Val}]}|Sis]; -select_val_cg(Type, R, Vls0, Tf, Vf, Sis) -> - Vls1 = [case Value of - {f,_Lbl} -> Value; - _ -> {type(Type),Value} - end || Value <- Vls0], - [{test,select_type_test(Type),{f,Tf},[R]}, {select_val,R,{f,Vf},{list,Vls1}}|Sis]. - -type(k_atom) -> atom; -type(k_float) -> float; -type(k_int) -> integer. - -select_type_test(k_int) -> is_integer; -select_type_test(k_atom) -> is_atom; -select_type_test(k_float) -> is_float. - -combine([{Is,Vs1}, {Is,Vs2}|Vis]) -> combine([{Is,Vs1 ++ Vs2}|Vis]); -combine([V|Vis]) -> [V|combine(Vis)]; -combine([]) -> []. - -select_labels([{Is,Vs}|Vis], St0, Vls, Sis) -> - {Lbl,St1} = new_label(St0), - select_labels(Vis, St1, add_vls(Vs, Lbl, Vls), [[{label,Lbl}|Is]|Sis]); -select_labels([], St, Vls, Sis) -> - {Vls,append(Sis),St}. - -add_vls([V|Vs], Lbl, Acc) -> - add_vls(Vs, Lbl, [V, {f,Lbl}|Acc]); -add_vls([], _, Acc) -> Acc. - -select_literal(S, V, Tf, Vf, Bef, St) -> - Reg = fetch_var(V, Bef), - F = fun(ValClause, Fail, St0) -> - {Val,Is,Aft,St1} = select_val(ValClause, V, Vf, Bef, St0), - Test = {test,is_eq_exact,{f,Fail},[Reg,{literal,Val}]}, - {[Test|Is],Aft,St1} - end, - match_fmf(F, Tf, St, S). - -select_cons(#k_val_clause{val=#k_cons{hd=Hd,tl=Tl},body=B,anno=#l{i=I,vdb=Vdb}}, - V, Tf, Vf, Bef, St0) -> - Es = [Hd,Tl], - {Eis,Int,St1} = select_extract_cons(V, Es, I, Vdb, Bef, St0), - {Bis,Aft,St2} = match_cg(B, Vf, Int, St1), - {[{test,is_nonempty_list,{f,Tf},[fetch_var(V, Bef)]}] ++ Eis ++ Bis,Aft,St2}. - -select_nil(#k_val_clause{val=#k_nil{},body=B}, V, Tf, Vf, Bef, St0) -> - {Bis,Aft,St1} = match_cg(B, Vf, Bef, St0), - {[{test,is_nil,{f,Tf},[fetch_var(V, Bef)]}] ++ Bis,Aft,St1}. - -select_binary(#k_val_clause{val=#k_binary{segs=#k_var{name=V}},body=B, - anno=#l{i=I,vdb=Vdb}}, V, Tf, Vf, Bef, St0) -> - #cg{ctx=OldCtx} = St0, - Int0 = clear_dead(Bef#sr{reg=Bef#sr.reg}, I, Vdb), - {Bis0,Aft,St1} = match_cg(B, Vf, Int0, St0#cg{ctx=V}), - CtxReg = fetch_var(V, Int0), - Live = max_reg(Bef#sr.reg), - Bis1 = [{test,bs_start_match2,{f,Tf},Live,[CtxReg,{context,V}],CtxReg}, - {bs_save2,CtxReg,{V,V}}|Bis0], - Bis = finish_select_binary(Bis1), - {Bis,Aft,St1#cg{ctx=OldCtx}}; -select_binary(#k_val_clause{val=#k_binary{segs=#k_var{name=Ivar}},body=B, - anno=#l{i=I,vdb=Vdb}}, V, Tf, Vf, Bef, St0) -> - #cg{ctx=OldCtx} = St0, - Regs = put_reg(Ivar, Bef#sr.reg), - Int0 = clear_dead(Bef#sr{reg=Regs}, I, Vdb), - {Bis0,Aft,St1} = match_cg(B, Vf, Int0, St0#cg{ctx=Ivar}), - CtxReg = fetch_var(Ivar, Int0), - Live = max_reg(Bef#sr.reg), - Bis1 = [{test,bs_start_match2,{f,Tf},Live, - [fetch_var(V, Bef),{context,Ivar}],CtxReg}, - {bs_save2,CtxReg,{Ivar,Ivar}}|Bis0], - Bis = finish_select_binary(Bis1), - {Bis,Aft,St1#cg{ctx=OldCtx}}. - -finish_select_binary([{bs_save2,R,Point}=I,{bs_restore2,R,Point}|Is]) -> - [I|finish_select_binary(Is)]; -finish_select_binary([{bs_save2,R,Point}=I,{test,is_eq_exact,_,_}=Test, - {bs_restore2,R,Point}|Is]) -> - [I,Test|finish_select_binary(Is)]; -finish_select_binary([{test,bs_match_string,F,[Ctx,BinList]}|Is]) - when is_list(BinList) -> - I = {test,bs_match_string,F,[Ctx,list_to_bitstring(BinList)]}, - [I|finish_select_binary(Is)]; -finish_select_binary([I|Is]) -> - [I|finish_select_binary(Is)]; -finish_select_binary([]) -> []. - -%% New instructions for selection of binary segments. - -select_bin_segs(Scs, Ivar, Tf, Bef, St) -> - match_fmf(fun(S, Fail, Sta) -> - select_bin_seg(S, Ivar, Fail, Bef, Sta) end, - Tf, St, Scs). - -select_bin_seg(#k_val_clause{val=#k_bin_seg{size=Size,unit=U,type=T, - seg=Seg,flags=Fs0,next=Next}, - body=B, - anno=#l{i=I,vdb=Vdb,a=A}}, Ivar, Fail, Bef, St0) -> - Ctx = St0#cg.ctx, - Fs = [{anno,A}|Fs0], - Es = case Next of - [] -> [Seg]; - _ -> [Seg,Next] - end, - {Mis,Int,St1} = select_extract_bin(Es, Size, U, T, Fs, Fail, - I, Vdb, Bef, Ctx, B, St0), - {Bis,Aft,St2} = match_cg(B, Fail, Int, St1), - CtxReg = fetch_var(Ctx, Bef), - Is = if - Mis =:= [] -> - %% No bs_restore2 instruction needed if no match instructions. - Bis; - true -> - [{bs_restore2,CtxReg,{Ctx,Ivar}}|Mis++Bis] - end, - {Is,Aft,St2}; -select_bin_seg(#k_val_clause{val=#k_bin_int{size=Sz,unit=U,flags=Fs, - val=Val,next=Next}, - body=B, - anno=#l{i=I,vdb=Vdb}}, Ivar, Fail, Bef, St0) -> - Ctx = St0#cg.ctx, - {Mis,Int,St1} = select_extract_int(Next, Val, Sz, U, Fs, Fail, - I, Vdb, Bef, Ctx, St0), - {Bis,Aft,St2} = match_cg(B, Fail, Int, St1), - CtxReg = fetch_var(Ctx, Bef), - Is = case Mis ++ Bis of - [{test,bs_match_string,F,[OtherCtx,Bin1]}, - {bs_save2,OtherCtx,_}, - {bs_restore2,OtherCtx,_}, - {test,bs_match_string,F,[OtherCtx,Bin2]}|Is0] -> - %% We used to do this optimization later, but it - %% turns out that in huge functions with many - %% bs_match_string instructions, it's a big win - %% to do the combination now. To avoid copying the - %% binary data again and again, we'll combine bitstrings - %% in a list and convert all of it to a bitstring later. - [{test,bs_match_string,F,[OtherCtx,[Bin1,Bin2]]}|Is0]; - Is0 -> - Is0 - end, - {[{bs_restore2,CtxReg,{Ctx,Ivar}}|Is],Aft,St2}. - -select_extract_int(#k_var{name=Tl}, Val, #k_int{val=Sz}, U, Fs, Vf, - I, Vdb, Bef, Ctx, St) -> - Bits = U*Sz, - Bin = case member(big, Fs) of - true -> - <>; - false -> - true = member(little, Fs), %Assertion. - <> - end, - Bits = bit_size(Bin), %Assertion. - CtxReg = fetch_var(Ctx, Bef), - Is = if - Bits =:= 0 -> - [{bs_save2,CtxReg,{Ctx,Tl}}]; - true -> - [{test,bs_match_string,{f,Vf},[CtxReg,Bin]}, - {bs_save2,CtxReg,{Ctx,Tl}}] - end, - {Is,clear_dead(Bef, I, Vdb),St}. - -select_extract_bin([#k_var{name=Hd},#k_var{name=Tl}], Size0, Unit, Type, Flags, Vf, - I, Vdb, Bef, Ctx, _Body, St) -> - SizeReg = get_bin_size_reg(Size0, Bef), - {Es,Aft} = - case vdb_find(Hd, Vdb) of - {_,_,Lhd} when Lhd =< I -> - %% The extracted value will not be used. - CtxReg = fetch_var(Ctx, Bef), - Live = max_reg(Bef#sr.reg), - Skip = build_skip_instr(Type, Vf, CtxReg, Live, - SizeReg, Unit, Flags), - {[Skip,{bs_save2,CtxReg,{Ctx,Tl}}],Bef}; - {_,_,_} -> - Reg = put_reg(Hd, Bef#sr.reg), - Int1 = Bef#sr{reg=Reg}, - Rhd = fetch_reg(Hd, Reg), - CtxReg = fetch_reg(Ctx, Reg), - Live = max_reg(Bef#sr.reg), - {[build_bs_instr(Type, Vf, CtxReg, Live, SizeReg, - Unit, Flags, Rhd), - {bs_save2,CtxReg,{Ctx,Tl}}],Int1} - end, - {Es,clear_dead(Aft, I, Vdb),St}; -select_extract_bin([#k_var{name=Hd}], Size, Unit, binary, Flags, Vf, - I, Vdb, Bef, Ctx, Body, St) -> - %% Match the last segment of a binary. We KNOW that the size - %% must be 'all'. - #k_atom{val=all} = Size, %Assertion. - {Es,Aft} = - case vdb_find(Hd, Vdb) of - {_,_,Lhd} when Lhd =< I -> - %% The result will not be used. Furthermore, since we - %% we are at the end of the binary, the position will - %% not be used again; thus, it is safe to do a cheaper - %% test of the unit. - CtxReg = fetch_var(Ctx, Bef), - {case Unit of - 1 -> - []; - _ -> - [{test,bs_test_unit,{f,Vf},[CtxReg,Unit]}] - end,Bef}; - {_,_,_} -> - case is_context_unused(Body) of - false -> - Reg = put_reg(Hd, Bef#sr.reg), - Int1 = Bef#sr{reg=Reg}, - Rhd = fetch_reg(Hd, Reg), - CtxReg = fetch_reg(Ctx, Reg), - Name = bs_get_binary2, - Live = max_reg(Bef#sr.reg), - {[{test,Name,{f,Vf},Live, - [CtxReg,atomic(Size),Unit,{field_flags,Flags}],Rhd}], - Int1}; - true -> - %% Since the matching context will not be used again, - %% we can reuse its register. Reusing the register - %% opens some interesting optimizations in the - %% run-time system. - - Reg0 = Bef#sr.reg, - CtxReg = fetch_reg(Ctx, Reg0), - Reg = replace_reg_contents(Ctx, Hd, Reg0), - Int1 = Bef#sr{reg=Reg}, - Name = bs_get_binary2, - Live = max_reg(Int1#sr.reg), - {[{test,Name,{f,Vf},Live, - [CtxReg,atomic(Size),Unit,{field_flags,Flags}],CtxReg}], - Int1} - end - end, - {Es,clear_dead(Aft, I, Vdb),St}. - -%% is_context_unused(Ke) -> true | false -%% Simple heurististic to determine whether the code that follows -%% will use the current matching context again. (The liveness -%% information is too conservative to be useful for this purpose.) -%% 'true' means that the code that follows will definitely not use -%% the context again (because it is a block, not guard or matching -%% code); 'false' that we are not sure (there could be more -%% matching). - -is_context_unused(#k_alt{then=Then}) -> - %% #k_alt{} can be used for different purposes. If the Then part - %% is a block, it means that matching has finished and is used for a guard - %% to choose between the matched clauses. - is_context_unused(Then); -is_context_unused(#cg_block{}) -> - true; -is_context_unused(_) -> - false. - -select_bin_end(#k_val_clause{val=#k_bin_end{},body=B}, Ivar, Tf, Bef, St0) -> - Ctx = St0#cg.ctx, - {Bis,Aft,St2} = match_cg(B, Tf, Bef, St0), - CtxReg = fetch_var(Ctx, Bef), - {[{bs_restore2,CtxReg,{Ctx,Ivar}}, - {test,bs_test_tail2,{f,Tf},[CtxReg,0]}|Bis],Aft,St2}. - -get_bin_size_reg(#k_var{name=V}, Bef) -> - fetch_var(V, Bef); -get_bin_size_reg(Literal, _Bef) -> - atomic(Literal). - -build_bs_instr(Type, Vf, CtxReg, Live, SizeReg, Unit, Flags, Rhd) -> - {Format,Name} = case Type of - integer -> {plain,bs_get_integer2}; - float -> {plain,bs_get_float2}; - binary -> {plain,bs_get_binary2}; - utf8 -> {utf,bs_get_utf8}; - utf16 -> {utf,bs_get_utf16}; - utf32 -> {utf,bs_get_utf32} - end, - case Format of - plain -> - {test,Name,{f,Vf},Live, - [CtxReg,SizeReg,Unit,{field_flags,Flags}],Rhd}; - utf -> - {test,Name,{f,Vf},Live, - [CtxReg,{field_flags,Flags}],Rhd} - end. - -build_skip_instr(Type, Vf, CtxReg, Live, SizeReg, Unit, Flags) -> - {Format,Name} = case Type of - utf8 -> {utf,bs_skip_utf8}; - utf16 -> {utf,bs_skip_utf16}; - utf32 -> {utf,bs_skip_utf32}; - _ -> {plain,bs_skip_bits2} - end, - case Format of - plain -> - {test,Name,{f,Vf},[CtxReg,SizeReg,Unit,{field_flags,Flags}]}; - utf -> - {test,Name,{f,Vf},[CtxReg,Live,{field_flags,Flags}]} - end. - -select_val(#k_val_clause{val=#k_tuple{es=Es},body=B,anno=#l{i=I,vdb=Vdb}}, - V, Vf, Bef, St0) -> - {Eis,Int,St1} = select_extract_tuple(V, Es, I, Vdb, Bef, St0), - {Bis,Aft,St2} = match_cg(B, Vf, Int, St1), - {length(Es),Eis ++ Bis,Aft,St2}; -select_val(#k_val_clause{val=Val0,body=B}, _V, Vf, Bef, St0) -> - Val = case Val0 of - #k_atom{val=Lit} -> Lit; - #k_float{val=Lit} -> Lit; - #k_int{val=Lit} -> Lit; - #k_literal{val=Lit} -> Lit - end, - {Bis,Aft,St1} = match_cg(B, Vf, Bef, St0), - {Val,Bis,Aft,St1}. - -%% select_extract_tuple(Src, [V], I, Vdb, StackReg, State) -> -%% {[E],StackReg,State}. -%% Extract tuple elements, but only if they do not immediately die. - -select_extract_tuple(Src, Vs, I, Vdb, Bef, St) -> - F = fun (#k_var{name=V}, {Int0,Elem}) -> - case vdb_find(V, Vdb) of - {V,_,L} when L =< I -> {[], {Int0,Elem+1}}; - _Other -> - Reg1 = put_reg(V, Int0#sr.reg), - Int1 = Int0#sr{reg=Reg1}, - Rsrc = fetch_var(Src, Int1), - {[{get_tuple_element,Rsrc,Elem,fetch_reg(V, Reg1)}], - {Int1,Elem+1}} - end - end, - {Es,{Aft,_}} = flatmapfoldl(F, {Bef,0}, Vs), - {Es,Aft,St}. - -select_map(Scs, V, Tf, Vf, Bef, St0) -> - Reg = fetch_var(V, Bef), - {Is,Aft,St1} = - match_fmf(fun(#k_val_clause{val=#k_map{op=exact,es=Es}, - body=B,anno=#l{i=I,vdb=Vdb}}, Fail, St1) -> - select_map_val(V, Es, B, Fail, I, Vdb, Bef, St1) - end, Vf, St0, Scs), - {[{test,is_map,{f,Tf},[Reg]}|Is],Aft,St1}. - -select_map_val(V, Es, B, Fail, I, Vdb, Bef, St0) -> - {Eis,Int,St1} = select_extract_map(V, Es, Fail, I, Vdb, Bef, St0), - {Bis,Aft,St2} = match_cg(B, Fail, Int, St1), - {Eis++Bis,Aft,St2}. - -select_extract_map(_, [], _, _, _, Bef, St) -> {[],Bef,St}; -select_extract_map(Src, Vs, Fail, I, Vdb, Bef, St) -> - %% First split the instruction flow - %% We want one set of each - %% 1) has_map_fields (no target registers) - %% 2) get_map_elements (with target registers) - %% Assume keys are term-sorted - Rsrc = fetch_var(Src, Bef), - - {{HasKs,GetVs,HasVarKs,GetVarVs},Aft} = - foldr(fun(#k_map_pair{key=#k_var{name=K},val=#k_var{name=V}}, - {{HasKsi,GetVsi,HasVarVsi,GetVarVsi},Int0}) -> - case vdb_find(V, Vdb) of - {V,_,L} when L =< I -> - RK = fetch_var(K,Int0), - {{HasKsi,GetVsi,[RK|HasVarVsi],GetVarVsi},Int0}; - _Other -> - Reg1 = put_reg(V, Int0#sr.reg), - Int1 = Int0#sr{reg=Reg1}, - RK = fetch_var(K,Int0), - RV = fetch_reg(V,Reg1), - {{HasKsi,GetVsi,HasVarVsi,[[RK,RV]|GetVarVsi]},Int1} - end; - (#k_map_pair{key=Key,val=#k_var{name=V}}, - {{HasKsi,GetVsi,HasVarVsi,GetVarVsi},Int0}) -> - case vdb_find(V, Vdb) of - {V,_,L} when L =< I -> - {{[atomic(Key)|HasKsi],GetVsi,HasVarVsi,GetVarVsi},Int0}; - _Other -> - Reg1 = put_reg(V, Int0#sr.reg), - Int1 = Int0#sr{reg=Reg1}, - {{HasKsi,[atomic(Key),fetch_reg(V, Reg1)|GetVsi], - HasVarVsi,GetVarVsi},Int1} - end - end, {{[],[],[],[]},Bef}, Vs), - - Code = [{test,has_map_fields,{f,Fail},Rsrc,{list,HasKs}} || HasKs =/= []] ++ - [{test,has_map_fields,{f,Fail},Rsrc,{list,[K]}} || K <- HasVarKs] ++ - [{get_map_elements, {f,Fail},Rsrc,{list,GetVs}} || GetVs =/= []] ++ - [{get_map_elements, {f,Fail},Rsrc,{list,[K,V]}} || [K,V] <- GetVarVs], - {Code, Aft, St}. - - -select_extract_cons(Src, [#k_var{name=Hd},#k_var{name=Tl}], I, Vdb, Bef, St) -> - Rsrc = fetch_var(Src, Bef), - Int = clear_dead(Bef, I, Vdb), - {{_,_,Lhd},{_,_,Ltl}} = {vdb_find(Hd, Vdb),vdb_find(Tl, Vdb)}, - case {Lhd =< I, Ltl =< I} of - {true,true} -> - %% Both dead. - {[],Bef,St}; - {true,false} -> - %% Head dead. - Reg0 = put_reg(Tl, Bef#sr.reg), - Aft = Int#sr{reg=Reg0}, - Rtl = fetch_reg(Tl, Reg0), - {[{get_tl,Rsrc,Rtl}],Aft,St}; - {false,true} -> - %% Tail dead. - Reg0 = put_reg(Hd, Bef#sr.reg), - Aft = Int#sr{reg=Reg0}, - Rhd = fetch_reg(Hd, Reg0), - {[{get_hd,Rsrc,Rhd}],Aft,St}; - {false,false} -> - %% Both used. - Reg0 = put_reg(Tl, put_reg(Hd, Bef#sr.reg)), - Aft = Bef#sr{reg=Reg0}, - Rhd = fetch_reg(Hd, Reg0), - Rtl = fetch_reg(Tl, Reg0), - {[{get_hd,Rsrc,Rhd},{get_tl,Rsrc,Rtl}],Aft,St} - end. - -guard_clause_cg(#k_guard_clause{anno=#l{vdb=Vdb},guard=G,body=B}, Fail, Bef, St0) -> - {Gis,Int,St1} = guard_cg(G, Fail, Vdb, Bef, St0), - {Bis,Aft,St} = match_cg(B, Fail, Int, St1), - {Gis ++ Bis,Aft,St}. - -%% guard_cg(Guard, Fail, Vdb, StackReg, State) -> -%% {[Ainstr],StackReg,State}. -%% A guard is a boolean expression of tests. Tests return true or -%% false. A fault in a test causes the test to return false. Tests -%% never return the boolean, instead we generate jump code to go to -%% the correct exit point. Primops and tests all go to the next -%% instruction on success or jump to a failure label. - -guard_cg(#k_protected{arg=Ts,ret=Rs,anno=#l{vdb=Pdb}}, Fail, _Vdb, Bef, St) -> - protected_cg(Ts, Rs, Fail, Pdb, Bef, St); -guard_cg(#k_test{anno=#l{i=I},op=Test0,args=As,inverted=Inverted}, - Fail, Vdb, Bef, St0) -> - #k_remote{mod=#k_atom{val=erlang},name=#k_atom{val=Test}} = Test0, - case Inverted of - false -> - test_cg(Test, As, Fail, I, Vdb, Bef, St0); - true -> - {Psucc,St1} = new_label(St0), - {Is,Aft,St2} = test_cg(Test, As, Psucc, I, Vdb, Bef, St1), - {Is++[{jump,{f,Fail}},{label,Psucc}],Aft,St2} - end; -guard_cg(G, _Fail, Vdb, Bef, St) -> - %%ok = io:fwrite("cg ~w: ~p~n", [?LINE,{G,Fail,Vdb,Bef}]), - {Gis,Aft,St1} = cg(G, Vdb, Bef, St), - %%ok = io:fwrite("cg ~w: ~p~n", [?LINE,{Aft}]), - {Gis,Aft,St1}. - -%% guard_cg_list([Kexpr], Fail, I, Vdb, StackReg, St) -> -%% {[Ainstr],StackReg,St}. - -guard_cg_list(Kes, Fail, Vdb, Bef, St0) -> - {Keis,{Aft,St1}} = - flatmapfoldl(fun (Ke, {Inta,Sta}) -> - {Keis,Intb,Stb} = - guard_cg(Ke, Fail, Vdb, Inta, Sta), - {Keis,{Intb,Stb}} - end, {Bef,St0}, need_heap(Kes)), - {Keis,Aft,St1}. - -%% protected_cg([Kexpr], [Ret], Fail, I, Vdb, Bef, St) -> {[Ainstr],Aft,St}. -%% Do a protected. Protecteds without return values are just done -%% for effect, the return value is not checked, success passes on to -%% the next instruction and failure jumps to Fail. If there are -%% return values then these must be set to 'false' on failure, -%% control always passes to the next instruction. - -protected_cg(Ts, [], Fail, Vdb, Bef, St0) -> - %% Protect these calls, revert when done. - {Tis,Aft,St1} = guard_cg_list(Ts, Fail, Vdb, Bef, St0#cg{bfail=Fail}), - {Tis,Aft,St1#cg{bfail=St0#cg.bfail}}; -protected_cg(Ts, Rs, _Fail, Vdb, Bef, St0) -> - {Pfail,St1} = new_label(St0), - {Psucc,St2} = new_label(St1), - {Tis,Aft,St3} = guard_cg_list(Ts, Pfail, Vdb, Bef, - St2#cg{bfail=Pfail}), - %%ok = io:fwrite("cg ~w: ~p~n", [?LINE,{Rs,I,Vdb,Aft}]), - %% Set return values to false. - Mis = [{move,{atom,false},fetch_var(V,Aft)}||#k_var{name=V} <- Rs], - {Tis ++ [{jump,{f,Psucc}}, - {label,Pfail}] ++ Mis ++ [{label,Psucc}], - Aft,St3#cg{bfail=St0#cg.bfail}}. - -%% test_cg(TestName, Args, Fail, I, Vdb, Bef, St) -> {[Ainstr],Aft,St}. -%% Generate test instruction. Use explicit fail label here. - -test_cg(is_map, [A], Fail, I, Vdb, Bef, St) -> - %% We must avoid creating code like this: - %% - %% move x(0) y(0) - %% is_map Fail [x(0)] - %% make_fun => x(0) %% Overwrite x(0) - %% put_map_assoc y(0) ... - %% - %% The code is safe, but beam_validator does not understand that. - %% Extending beam_validator to handle such (rare) code as the - %% above would make it slower for all programs. Instead, change - %% the code generator to always prefer the Y register for is_map() - %% and put_map_assoc() instructions, ensuring that they use the - %% same register. - Arg = cg_reg_arg_prefer_y(A, Bef), - Aft = clear_dead(Bef, I, Vdb), - {[{test,is_map,{f,Fail},[Arg]}],Aft,St}; -test_cg(is_boolean, [#k_atom{val=Val}], Fail, I, Vdb, Bef, St) -> - Aft = clear_dead(Bef, I, Vdb), - Is = case is_boolean(Val) of - true -> []; - false -> [{jump,{f,Fail}}] - end, - {Is,Aft,St}; -test_cg(Test, As, Fail, I, Vdb, Bef, St) -> - Args = cg_reg_args(As, Bef), - Aft = clear_dead(Bef, I, Vdb), - {[beam_utils:bif_to_test(Test, Args, {f,Fail})],Aft,St}. - -%% match_fmf(Fun, LastFail, State, [Clause]) -> {Is,Aft,State}. -%% This is a special flatmapfoldl for match code gen where we -%% generate a "failure" label for each clause. The last clause uses -%% an externally generated failure label, LastFail. N.B. We do not -%% know or care how the failure labels are used. - -match_fmf(F, LastFail, St, [H]) -> - F(H, LastFail, St); -match_fmf(F, LastFail, St0, [H|T]) -> - {Fail,St1} = new_label(St0), - {R,Aft1,St2} = F(H, Fail, St1), - {Rs,Aft2,St3} = match_fmf(F, LastFail, St2, T), - {R ++ [{label,Fail}] ++ Rs,sr_merge(Aft1, Aft2),St3}. - -%% call_cg(Func, [Arg], [Ret], Le, Vdb, StackReg, State) -> -%% {[Ainstr],StackReg,State}. -%% enter_cg(Func, [Arg], Le, Vdb, Bef, St) -> {[Ainstr],Aft,St}. -%% Call and enter first put the arguments into registers and save any -%% other registers, then clean up and compress the stack and set the -%% frame size. Finally the actual call is made. Call then needs the -%% return values filled in. - -call_cg(#k_var{}=Var, As, Rs, Le, Vdb, Bef, St0) -> - {Sis,Int} = cg_setup_call(As++[Var], Bef, Le#l.i, Vdb), - %% Put return values in registers. - Reg = load_vars(Rs, clear_regs(Int#sr.reg)), - %% Build complete code and final stack/register state. - Arity = length(As), - {Frees,Aft} = free_dead(clear_dead(Int#sr{reg=Reg}, Le#l.i, Vdb)), - {Sis ++ Frees ++ [line(Le),{call_fun,Arity}],Aft, - need_stack_frame(St0)}; -call_cg(#k_remote{mod=Mod,name=Name}, As, Rs, Le, Vdb, Bef, St0) - when is_record(Mod, k_var); is_record(Name, k_var) -> - {Sis,Int} = cg_setup_call(As++[Mod,Name], Bef, Le#l.i, Vdb), - %% Put return values in registers. - Reg = load_vars(Rs, clear_regs(Int#sr.reg)), - %% Build complete code and final stack/register state. - Arity = length(As), - St = need_stack_frame(St0), - %%{Call,St1} = build_call(Func, Arity, St0), - {Frees,Aft} = free_dead(clear_dead(Int#sr{reg=Reg}, Le#l.i, Vdb)), - {Sis ++ Frees ++ [line(Le),{apply,Arity}],Aft,St}; -call_cg(Func, As, Rs, Le, Vdb, Bef, St0) -> - case St0 of - #cg{bfail=Fail} when Fail =/= 0 -> - %% Inside a guard. The only allowed function call is to - %% erlang:error/1,2. We will generate the following code: - %% - %% move {atom,ok} DestReg - %% jump FailureLabel - #k_remote{mod=#k_atom{val=erlang}, - name=#k_atom{val=error}} = Func, %Assertion. - [#k_var{name=DestVar}] = Rs, - Int0 = clear_dead(Bef, Le#l.i, Vdb), - Reg = put_reg(DestVar, Int0#sr.reg), - Int = Int0#sr{reg=Reg}, - Dst = fetch_reg(DestVar, Reg), - {[{move,{atom,ok},Dst},{jump,{f,Fail}}], - clear_dead(Int, Le#l.i, Vdb),St0}; - #cg{} -> - %% Ordinary function call in a function body. - {Sis,Int} = cg_setup_call(As, Bef, Le#l.i, Vdb), - %% Put return values in registers. - Reg = load_vars(Rs, clear_regs(Int#sr.reg)), - %% Build complete code and final stack/register state. - Arity = length(As), - {Call,St1} = build_call(Func, Arity, St0), - {Frees,Aft} = free_dead(clear_dead(Int#sr{reg=Reg}, Le#l.i, Vdb)), - {Sis ++ Frees ++ [line(Le)|Call],Aft,St1} - end. - -build_call(#k_remote{mod=#k_atom{val=erlang},name=#k_atom{val='!'}}, 2, St0) -> - {[send],need_stack_frame(St0)}; -build_call(#k_remote{mod=#k_atom{val=Mod},name=#k_atom{val=Name}}, Arity, St0) -> - {[{call_ext,Arity,{extfunc,Mod,Name,Arity}}],need_stack_frame(St0)}; -build_call(#k_local{name=Name}, Arity, St0) when is_atom(Name) -> - {Lbl,St1} = local_func_label(Name, Arity, need_stack_frame(St0)), - {[{call,Arity,{f,Lbl}}],St1}. - -free_dead(#sr{stk=Stk0}=Aft) -> - {Instr,Stk} = free_dead(Stk0, 0, [], []), - {Instr,Aft#sr{stk=Stk}}. - -free_dead([dead|Stk], Y, Instr, StkAcc) -> - %% Note: kill/1 is equivalent to init/1 (translated by beam_asm). - %% We use kill/1 to help further optimisation passes. - free_dead(Stk, Y+1, [{kill,{yy,Y}}|Instr], [free|StkAcc]); -free_dead([Any|Stk], Y, Instr, StkAcc) -> - free_dead(Stk, Y+1, Instr, [Any|StkAcc]); -free_dead([], _, Instr, StkAcc) -> {Instr,reverse(StkAcc)}. - -enter_cg(#k_var{} = Var, As, Le, Vdb, Bef, St0) -> - {Sis,Int} = cg_setup_call(As++[Var], Bef, Le#l.i, Vdb), - %% Build complete code and final stack/register state. - Arity = length(As), - {Sis ++ [line(Le),{call_fun,Arity},return], - clear_dead(Int#sr{reg=clear_regs(Int#sr.reg)}, Le#l.i, Vdb), - need_stack_frame(St0)}; -enter_cg(#k_remote{mod=Mod,name=Name}, As, Le, Vdb, Bef, St0) - when is_record(Mod, k_var); is_record(Name, k_var) -> - {Sis,Int} = cg_setup_call(As++[Mod,Name], Bef, Le#l.i, Vdb), - %% Build complete code and final stack/register state. - Arity = length(As), - St = need_stack_frame(St0), - {Sis ++ [line(Le),{apply_only,Arity}], - clear_dead(Int#sr{reg=clear_regs(Int#sr.reg)}, Le#l.i, Vdb), - St}; -enter_cg(Func, As, Le, Vdb, Bef, St0) -> - {Sis,Int} = cg_setup_call(As, Bef, Le#l.i, Vdb), - %% Build complete code and final stack/register state. - Arity = length(As), - {Call,St1} = build_enter(Func, Arity, St0), - Line = enter_line(Func, Arity, Le), - {Sis ++ Line ++ Call, - clear_dead(Int#sr{reg=clear_regs(Int#sr.reg)}, Le#l.i, Vdb), - St1}. - -build_enter(#k_remote{mod=#k_atom{val=erlang},name=#k_atom{val='!'}}, 2, St0) -> - {[send,return],need_stack_frame(St0)}; -build_enter(#k_remote{mod=#k_atom{val=Mod},name=#k_atom{val=Name}}, Arity, St0) -> - St1 = case trap_bif(Mod, Name, Arity) of - true -> need_stack_frame(St0); - false -> St0 - end, - {[{call_ext_only,Arity,{extfunc,Mod,Name,Arity}}],St1}; -build_enter(#k_local{name=Name}, Arity, St0) when is_atom(Name) -> - {Lbl,St1} = local_func_label(Name, Arity, St0), - {[{call_only,Arity,{f,Lbl}}],St1}. - -enter_line(#k_remote{mod=#k_atom{val=Mod},name=#k_atom{val=Name}}, Arity, Le) -> - case erl_bifs:is_safe(Mod, Name, Arity) of - false -> - %% Tail-recursive call, possibly to a BIF. - %% We'll need a line instruction in case the - %% BIF call fails. - [line(Le)]; - true -> - %% Call to a safe BIF. Since it cannot fail, - %% we don't need any line instruction here. - [] - end; -enter_line(_, _, _) -> - %% Tail-recursive call to a local function. A line - %% instruction will not be useful. - []. - -%% local_func_label(Name, Arity, State) -> {Label,State'} -%% local_func_label({Name,Arity}, State) -> {Label,State'} -%% Get the function entry label for a local function. - -local_func_label(Name, Arity, St) -> - local_func_label({Name,Arity}, St). - -local_func_label(Key, #cg{functable=Map}=St0) -> - case Map of - #{Key := Label} -> {Label,St0}; - _ -> - {Label,St} = new_label(St0), - {Label,St#cg{functable=Map#{Key => Label}}} - end. - -%% need_stack_frame(State) -> State' -%% Make a note in the state that this function will need a stack frame. - -need_stack_frame(#cg{need_frame=true}=St) -> St; -need_stack_frame(St) -> St#cg{need_frame=true}. - -%% trap_bif(Mod, Name, Arity) -> true|false -%% Trap bifs that need a stack frame. - -trap_bif(erlang, link, 1) -> true; -trap_bif(erlang, unlink, 1) -> true; -trap_bif(erlang, monitor_node, 2) -> true; -trap_bif(erlang, group_leader, 2) -> true; -trap_bif(erlang, exit, 2) -> true; -trap_bif(_, _, _) -> false. - -%% bif_cg(#k_bif{}, Le, Vdb, StackReg, State) -> -%% {[Ainstr],StackReg,State}. -%% Generate code a BIF. - -bif_cg(#k_bif{op=#k_internal{name=Name},args=As,ret=Rs}, Le, Vdb, Bef, St) -> - internal_cg(Name, As, Rs, Le, Vdb, Bef, St); -bif_cg(#k_bif{op=#k_remote{mod=#k_atom{val=erlang},name=#k_atom{val=Name}}, - args=As,ret=Rs}, Le, Vdb, Bef, St) -> - Ar = length(As), - case is_gc_bif(Name, Ar) of - false -> - bif_cg(Name, As, Rs, Le, Vdb, Bef, St); - true -> - gc_bif_cg(Name, As, Rs, Le, Vdb, Bef, St) - end. - -%% internal_cg(Bif, [Arg], [Ret], Le, Vdb, StackReg, State) -> -%% {[Ainstr],StackReg,State}. - -internal_cg(bs_context_to_binary=Instr, [Src0], [], Le, Vdb, Bef, St0) -> - [Src] = cg_reg_args([Src0], Bef), - {[{Instr,Src}],clear_dead(Bef, Le#l.i, Vdb), St0}; -internal_cg(dsetelement, [Index0,Tuple0,New0], _Rs, Le, Vdb, Bef, St0) -> - [New,Tuple,{integer,Index1}] = cg_reg_args([New0,Tuple0,Index0], Bef), - Index = Index1-1, - {[{set_tuple_element,New,Tuple,Index}], - clear_dead(Bef, Le#l.i, Vdb), St0}; -internal_cg(make_fun, [Func0,Arity0|As], Rs, Le, Vdb, Bef, St0) -> - %% This behaves more like a function call. - #k_atom{val=Func} = Func0, - #k_int{val=Arity} = Arity0, - {Sis,Int} = cg_setup_call(As, Bef, Le#l.i, Vdb), - Reg = load_vars(Rs, clear_regs(Int#sr.reg)), - {FuncLbl,St1} = local_func_label(Func, Arity, St0), - MakeFun = {make_fun2,{f,FuncLbl},0,0,length(As)}, - {Sis ++ [MakeFun], - clear_dead(Int#sr{reg=Reg}, Le#l.i, Vdb), - St1}; -internal_cg(bs_init_writable=I, As, Rs, Le, Vdb, Bef, St) -> - %% This behaves like a function call. - {Sis,Int} = cg_setup_call(As, Bef, Le#l.i, Vdb), - Reg = load_vars(Rs, clear_regs(Int#sr.reg)), - {Sis++[I],clear_dead(Int#sr{reg=Reg}, Le#l.i, Vdb),St}; -internal_cg(build_stacktrace=I, As, Rs, Le, Vdb, Bef, St) -> - %% This behaves like a function call. - {Sis,Int} = cg_setup_call(As, Bef, Le#l.i, Vdb), - Reg = load_vars(Rs, clear_regs(Int#sr.reg)), - {Sis++[I],clear_dead(Int#sr{reg=Reg}, Le#l.i, Vdb),St}; -internal_cg(raise, As, Rs, Le, Vdb, Bef, St) -> - %% raise can be treated like a guard BIF. - bif_cg(raise, As, Rs, Le, Vdb, Bef, St); -internal_cg(guard_error, [ExitCall], _Rs, Le, Vdb, Bef, St) -> - %% A call an exit BIF from inside a #k_guard_match{}. - %% Generate a standard call, but leave the register descriptors - %% alone, effectively pretending that there was no call. - #k_call{op=#k_remote{mod=#k_atom{val=Mod},name=#k_atom{val=Name}}, - args=As} = ExitCall, - Arity = length(As), - {Ms,_} = cg_call_args(As, Bef, Le#l.i, Vdb), - Call = {call_ext,Arity,{extfunc,Mod,Name,Arity}}, - Is = Ms++[line(Le),Call], - {Is,Bef,St}; -internal_cg(raw_raise=I, As, Rs, Le, Vdb, Bef, St) -> - %% This behaves like a function call. - {Sis,Int} = cg_setup_call(As, Bef, Le#l.i, Vdb), - Reg = load_vars(Rs, clear_regs(Int#sr.reg)), - {Sis++[I],clear_dead(Int#sr{reg=Reg}, Le#l.i, Vdb),St}. - -%% bif_cg(Bif, [Arg], [Ret], Le, Vdb, StackReg, State) -> -%% {[Ainstr],StackReg,State}. - -bif_cg(Bif, As, [#k_var{name=V}], Le, Vdb, Bef, St0) -> - Ars = cg_reg_args(As, Bef), - - %% If we are inside a catch and in a body (not in guard) and the - %% BIF may fail, we must save everything that will be alive after - %% the catch (because the code after the code assumes that all - %% variables that are live are stored on the stack). - %% - %% Currently, we are somewhat pessimistic in - %% that we save any variable that will be live after this BIF call. - - MayFail = not erl_bifs:is_safe(erlang, Bif, length(As)), - {Sis,Int0} = - case MayFail of - true -> - maybe_adjust_stack(Bef, Le#l.i, Le#l.i+1, Vdb, St0); - false -> - {[],Bef} - end, - Int1 = clear_dead(Int0, Le#l.i, Vdb), - Reg = put_reg(V, Int1#sr.reg), - Int = Int1#sr{reg=Reg}, - Dst = fetch_reg(V, Reg), - BifFail = {f,St0#cg.bfail}, - %% We need a line instructions for BIFs that may fail in a body. - Line = case BifFail of - {f,0} when MayFail -> - [line(Le)]; - _ -> - [] - end, - {Sis++Line++[{bif,Bif,BifFail,Ars,Dst}], - clear_dead(Int, Le#l.i, Vdb), St0}. - - -%% gc_bif_cg(Bif, [Arg], [Ret], Le, Vdb, StackReg, State) -> -%% {[Ainstr],StackReg,State}. - -gc_bif_cg(Bif, As, [#k_var{name=V}], Le, Vdb, Bef, St0) -> - Ars = cg_reg_args(As, Bef), - - %% If we are inside a catch and in a body (not in guard) and the - %% BIF may fail, we must save everything that will be alive after - %% the catch (because the code after the code assumes that all - %% variables that are live are stored on the stack). - %% - %% Currently, we are somewhat pessimistic in - %% that we save any variable that will be live after this BIF call. - - {Sis,Int0} = maybe_adjust_stack(Bef, Le#l.i, Le#l.i+1, Vdb, St0), - - Int1 = clear_dead(Int0, Le#l.i, Vdb), - Reg = put_reg(V, Int1#sr.reg), - Int = Int1#sr{reg=Reg}, - Dst = fetch_reg(V, Reg), - BifFail = {f,St0#cg.bfail}, - Line = case BifFail of - {f,0} -> [line(Le)]; - {f,_} -> [] - end, - {Sis++Line++[{gc_bif,Bif,BifFail,max_reg(Bef#sr.reg),Ars,Dst}], - clear_dead(Int, Le#l.i, Vdb), St0}. - -%% recv_loop_cg(TimeOut, ReceiveVar, ReceiveMatch, TimeOutExprs, -%% [Ret], Le, Vdb, Bef, St) -> {[Ainstr],Aft,St}. - -recv_loop_cg(Te, Rvar, Rm, Tes, Rs, Le, Vdb, Bef, St0) -> - {Sis,Int0} = adjust_stack(Bef, Le#l.i, Le#l.i, Vdb), - Int1 = Int0#sr{reg=clear_regs(Int0#sr.reg)}, - %% Get labels. - {Rl,St1} = new_label(St0), - {Tl,St2} = new_label(St1), - {Bl,St3} = new_label(St2), - St4 = St3#cg{break=Bl,recv=Rl}, %Set correct receive labels - {Ris,Raft,St5} = cg_recv_mesg(Rvar, Rm, Tl, Int1, St4), - {Wis,Taft,St6} = cg_recv_wait(Te, Tes, Le#l.i, Int1, St5), - Int2 = sr_merge(Raft, Taft), %Merge stack/registers - Reg = load_vars(Rs, Int2#sr.reg), - {Sis ++ [line(Le)] ++ Ris ++ [{label,Tl}] ++ Wis ++ [{label,Bl}], - clear_dead(Int2#sr{reg=Reg}, Le#l.i, Vdb), - St6#cg{break=St0#cg.break,recv=St0#cg.recv}}. - -%% cg_recv_mesg( ) -> {[Ainstr],Aft,St}. - -cg_recv_mesg(#k_var{name=R}, Rm, Tl, Bef, St0) -> - Int0 = Bef#sr{reg=put_reg(R, Bef#sr.reg)}, - Ret = fetch_reg(R, Int0#sr.reg), - %% Int1 = clear_dead(Int0, I, Rm#l.vdb), - Int1 = Int0, - {Mis,Int2,St1} = match_cg(Rm, none, Int1, St0), - {[{label,St1#cg.recv},{loop_rec,{f,Tl},Ret}|Mis],Int2,St1}. - -%% cg_recv_wait(Te, Tes, I, Vdb, Int2, St3) -> {[Ainstr],Aft,St}. - -cg_recv_wait(#k_atom{val=infinity}, #cg_block{anno=Le,es=Tes}, I, Bef, St0) -> - %% We know that the 'after' body will never be executed. - %% But to keep the stack and register information up to date, - %% we will generate the code for the 'after' body, and then discard it. - Int1 = clear_dead(Bef, I, Le#l.vdb), - {_,Int2,St1} = cg_block(Tes, Le#l.vdb, - Int1#sr{reg=clear_regs(Int1#sr.reg)}, St0), - {[{wait,{f,St1#cg.recv}}],Int2,St1}; -cg_recv_wait(#k_int{val=0}, #cg_block{anno=Le,es=Tes}, _I, Bef, St0) -> - {Tis,Int,St1} = cg_block(Tes, Le#l.vdb, Bef, St0), - {[timeout|Tis],Int,St1}; -cg_recv_wait(Te, #cg_block{anno=Le,es=Tes}, I, Bef, St0) -> - Reg = cg_reg_arg(Te, Bef), - %% Must have empty registers here! Bug if anything in registers. - Int0 = clear_dead(Bef, I, Le#l.vdb), - {Tis,Int,St1} = cg_block(Tes, Le#l.vdb, - Int0#sr{reg=clear_regs(Int0#sr.reg)}, St0), - {[{wait_timeout,{f,St1#cg.recv},Reg},timeout] ++ Tis,Int,St1}. - -%% recv_next_cg(Le, Vdb, StackReg, St) -> {[Ainstr],StackReg,St}. -%% Use adjust stack to clear stack, but only need it for Aft. - -recv_next_cg(Le, Vdb, Bef, St) -> - {Sis,Aft} = adjust_stack(Bef, Le#l.i, Le#l.i+1, Vdb), - {[{loop_rec_end,{f,St#cg.recv}}] ++ Sis,Aft,St}. %Joke - -%% try_cg(TryBlock, [BodyVar], TryBody, [ExcpVar], TryHandler, [Ret], -%% Le, Vdb, StackReg, St) -> {[Ainstr],StackReg,St}. - -try_cg(Ta, Vs, Tb, Evs, Th, Rs, Le, Vdb, Bef, St0) -> - {B,St1} = new_label(St0), %Body label - {H,St2} = new_label(St1), %Handler label - {E,St3} = new_label(St2), %End label - #l{i=TryTag} = get_kanno(Ta), - Int1 = Bef#sr{stk=put_catch(TryTag, Bef#sr.stk)}, - TryReg = fetch_stack({catch_tag,TryTag}, Int1#sr.stk), - {Ais,Int2,St4} = cg(Ta, Vdb, Int1, St3#cg{break=B,in_catch=true}), - Int3 = Int2#sr{stk=drop_catch(TryTag, Int2#sr.stk)}, - St5 = St4#cg{break=E,in_catch=St3#cg.in_catch}, - {Bis,Baft,St6} = cg(Tb, Vdb, Int3#sr{reg=load_vars(Vs, Int3#sr.reg)}, St5), - {His,Haft,St7} = cg(Th, Vdb, Int3#sr{reg=load_vars(Evs, Int3#sr.reg)}, St6), - Int4 = sr_merge(Baft, Haft), %Merge stack/registers - Aft = Int4#sr{reg=load_vars(Rs, Int4#sr.reg)}, - {[{'try',TryReg,{f,H}}] ++ Ais ++ - [{label,B},{try_end,TryReg}] ++ Bis ++ - [{label,H},{try_case,TryReg}] ++ His ++ - [{label,E}], - clear_dead(Aft, Le#l.i, Vdb), - St7#cg{break=St0#cg.break}}. - -try_enter_cg(Ta, Vs, Tb, Evs, Th, Le, Vdb, Bef, St0) -> - {B,St1} = new_label(St0), %Body label - {H,St2} = new_label(St1), %Handler label - #l{i=TryTag} = get_kanno(Ta), - Int1 = Bef#sr{stk=put_catch(TryTag, Bef#sr.stk)}, - TryReg = fetch_stack({catch_tag,TryTag}, Int1#sr.stk), - {Ais,Int2,St3} = cg(Ta, Vdb, Int1, St2#cg{break=B,in_catch=true}), - Int3 = Int2#sr{stk=drop_catch(TryTag, Int2#sr.stk)}, - St4 = St3#cg{in_catch=St2#cg.in_catch}, - {Bis,Baft,St5} = cg(Tb, Vdb, Int3#sr{reg=load_vars(Vs, Int3#sr.reg)}, St4), - {His,Haft,St6} = cg(Th, Vdb, Int3#sr{reg=load_vars(Evs, Int3#sr.reg)}, St5), - Int4 = sr_merge(Baft, Haft), %Merge stack/registers - Aft = Int4, - {[{'try',TryReg,{f,H}}] ++ Ais ++ - [{label,B},{try_end,TryReg}] ++ Bis ++ - [{label,H},{try_case,TryReg}] ++ His, - clear_dead(Aft, Le#l.i, Vdb), - St6#cg{break=St0#cg.break}}. - -%% catch_cg(CatchBlock, Ret, Le, Vdb, Bef, St) -> {[Ainstr],Aft,St}. - -catch_cg(#cg_block{es=C}, #k_var{name=R}, Le, Vdb, Bef, St0) -> - {B,St1} = new_label(St0), - CatchTag = Le#l.i, - Int1 = Bef#sr{stk=put_catch(CatchTag, Bef#sr.stk)}, - CatchReg = fetch_stack({catch_tag,CatchTag}, Int1#sr.stk), - {Cis,Int2,St2} = cg_block(C, Le#l.vdb, Int1, - St1#cg{break=B,in_catch=true}), - [] = Int2#sr.reg, %Assertion. - Aft = Int2#sr{reg=[{0,R}],stk=drop_catch(CatchTag, Int2#sr.stk)}, - {[{'catch',CatchReg,{f,B}}] ++ Cis ++ - [{label,B},{catch_end,CatchReg}], - clear_dead(Aft, Le#l.i, Vdb), - St2#cg{break=St1#cg.break,in_catch=St1#cg.in_catch}}. - -%% put_cg([Var], Constr, Le, Vdb, Bef, St) -> {[Ainstr],Aft,St}. -%% We have to be careful how a 'put' works. First the structure is -%% built, then it is filled and finally things can be cleared. The -%% annotation must reflect this and make sure that the return -%% variable is allocated first. -%% -%% put_list and put_map are atomic instructions, both of -%% which can safely resuse one of the source registers as target. - -put_cg([#k_var{name=R}], #k_cons{hd=Hd,tl=Tl}, Le, Vdb, Bef, St) -> - [S1,S2] = cg_reg_args([Hd,Tl], Bef), - Int0 = clear_dead(Bef, Le#l.i, Vdb), - Int1 = Int0#sr{reg=put_reg(R, Int0#sr.reg)}, - Ret = fetch_reg(R, Int1#sr.reg), - {[{put_list,S1,S2,Ret}], Int1, St}; -put_cg([#k_var{name=R}], #k_binary{segs=Segs}, Le, Vdb, Bef, - #cg{bfail=Bfail}=St) -> - %% At run-time, binaries are constructed in three stages: - %% 1) First the size of the binary is calculated. - %% 2) Then the binary is allocated. - %% 3) Then each field in the binary is constructed. - %% For simplicity, we use the target register to also hold the - %% size of the binary. Therefore the target register must *not* - %% be one of the source registers. - - %% First allocate the target register. - Int0 = Bef#sr{reg=put_reg(R, Bef#sr.reg)}, - Target = fetch_reg(R, Int0#sr.reg), - - %% Also allocate a scratch register for size calculations. - Temp = find_scratch_reg(Int0#sr.reg), - - %% First generate the code that constructs each field. - Fail = {f,Bfail}, - PutCode = cg_bin_put(Segs, Fail, Bef), - {Sis,Int1} = maybe_adjust_stack(Int0, Le#l.i, Le#l.i+1, Vdb, St), - MaxRegs = max_reg(Bef#sr.reg), - Aft = clear_dead(Int1, Le#l.i, Vdb), - - %% Now generate the complete code for constructing the binary. - Code = cg_binary(PutCode, Target, Temp, Fail, MaxRegs, Le#l.a), - {Sis++Code,Aft,St}; - -%% Map: single variable key. -put_cg([#k_var{name=R}], #k_map{op=Op,var=Map, - es=[#k_map_pair{key=#k_var{}=K,val=V}]}, - Le, Vdb, Bef, St0) -> - {Sis,Int0} = maybe_adjust_stack(Bef, Le#l.i, Le#l.i+1, Vdb, St0), - - SrcReg = cg_reg_arg_prefer_y(Map, Int0), - Line = line(Le#l.a), - - List = [cg_reg_arg(K,Int0),cg_reg_arg(V,Int0)], - - Live = max_reg(Bef#sr.reg), - - %% The target register can reuse one of the source registers. - Aft0 = clear_dead(Int0, Le#l.i, Vdb), - Aft = Aft0#sr{reg=put_reg(R, Aft0#sr.reg)}, - Target = fetch_reg(R, Aft#sr.reg), - - {Is,St1} = put_cg_map(Line, Op, SrcReg, Target, Live, List, St0), - {Sis++Is,Aft,St1}; - -%% Map: (possibly) multiple literal keys. -put_cg([#k_var{name=R}], #k_map{op=Op,var=Map,es=Es}, Le, Vdb, Bef, St0) -> - - %% assert key literals - [] = [Var || #k_map_pair{key=#k_var{}=Var} <- Es], - - {Sis,Int0} = maybe_adjust_stack(Bef, Le#l.i, Le#l.i+1, Vdb, St0), - SrcReg = cg_reg_arg_prefer_y(Map, Int0), - Line = line(Le#l.a), - - %% fetch registers for values to be put into the map - List = flatmap(fun(#k_map_pair{key=K,val=V}) -> - [atomic(K),cg_reg_arg(V, Int0)] - end, Es), - - Live = max_reg(Bef#sr.reg), - - %% The target register can reuse one of the source registers. - Aft0 = clear_dead(Int0, Le#l.i, Vdb), - Aft = Aft0#sr{reg=put_reg(R, Aft0#sr.reg)}, - Target = fetch_reg(R, Aft#sr.reg), - - {Is,St1} = put_cg_map(Line, Op, SrcReg, Target, Live, List, St0), - {Sis++Is,Aft,St1}; - -%% Everything else. -put_cg([#k_var{name=R}], Con, Le, Vdb, Bef, St) -> - %% Find a place for the return register first. - Int = Bef#sr{reg=put_reg(R, Bef#sr.reg)}, - Ret = fetch_reg(R, Int#sr.reg), - Ais = case Con of - #k_tuple{es=Es} -> - [{put_tuple,length(Es),Ret}] ++ cg_build_args(Es, Bef); - Other -> - [{move,cg_reg_arg(Other, Int),Ret}] - end, - {Ais,clear_dead(Int, Le#l.i, Vdb),St}. - - -put_cg_map(Line, Op0, SrcReg, Target, Live, List, St0) -> - Bfail = St0#cg.bfail, - Fail = {f,St0#cg.bfail}, - Op = case Op0 of - assoc -> put_map_assoc; - exact -> put_map_exact - end, - {OkLbl,St1} = new_label(St0), - {BadLbl,St2} = new_label(St1), - Is = if - Bfail =:= 0 orelse Op =:= put_map_assoc -> - [Line,{Op,{f,0},SrcReg,Target,Live,{list,List}}]; - true -> - %% Ensure that Target is always set, even if - %% the map update operation fails. That is necessary - %% because Target may be included in a test_heap - %% instruction. - [Line, - {Op,{f,BadLbl},SrcReg,Target,Live,{list,List}}, - {jump,{f,OkLbl}}, - {label,BadLbl}, - {move,{atom,ok},Target}, - {jump,Fail}, - {label,OkLbl}] - end, - {Is,St2}. - -%%% -%%% Code generation for constructing binaries. -%%% - -cg_binary([{bs_put_binary,Fail,{atom,all},U,_Flags,Src}|PutCode], - Target, Temp, Fail, MaxRegs, Anno) -> - Line = line(Anno), - Live = cg_live(Target, MaxRegs), - SzCode = cg_bitstr_size(PutCode, Target, Temp, Fail, Live), - BinFlags = {field_flags,[]}, - Code = [Line|SzCode] ++ - [case member(single_use, Anno) of - true -> - {bs_private_append,Fail,Target,U,Src,BinFlags,Target}; - false -> - {bs_append,Fail,Target,0,MaxRegs,U,Src,BinFlags,Target} - end] ++ PutCode, - cg_bin_opt(Code); -cg_binary(PutCode, Target, Temp, Fail, MaxRegs, Anno) -> - Line = line(Anno), - Live = cg_live(Target, MaxRegs), - {InitOp,SzCode} = cg_binary_size(PutCode, Target, Temp, Fail, Live), - - Code = [Line|SzCode] ++ [{InitOp,Fail,Target,0,MaxRegs, - {field_flags,[]},Target}|PutCode], - cg_bin_opt(Code). - -cg_live({x,X}, MaxRegs) when X =:= MaxRegs -> MaxRegs+1; -cg_live({x,X}, MaxRegs) when X < MaxRegs -> MaxRegs. - -%% Generate code that calculate the size of the bitstr to be -%% built in BITS. - -cg_bitstr_size(PutCode, Target, Temp, Fail, Live) -> - {Bits,Es} = cg_bitstr_size_1(PutCode, 0, []), - reverse(cg_gen_binsize(Es, Target, Temp, Fail, Live, - [{move,{integer,Bits},Target}])). - -cg_bitstr_size_1([{bs_put_utf8,_,_,Src}|Next], Bits, Acc) -> - cg_bitstr_size_1(Next, Bits, [{'*',{bs_utf8_size,Src},8}|Acc]); -cg_bitstr_size_1([{bs_put_utf16,_,_,Src}|Next], Bits, Acc) -> - cg_bitstr_size_1(Next, Bits, [{'*',{bs_utf16_size,Src},8}|Acc]); -cg_bitstr_size_1([{bs_put_utf32,_,_,_}|Next], Bits, Acc) -> - cg_bitstr_size_1(Next, Bits+32, Acc); -cg_bitstr_size_1([{_,_,S,U,_,Src}|Next], Bits, Acc) -> - case S of - {integer,N} -> cg_bitstr_size_1(Next, Bits+N*U, Acc); - {atom,all} -> cg_bitstr_size_1(Next, Bits, [{bit_size,Src}|Acc]); - _ when U =:= 1 -> cg_bitstr_size_1(Next, Bits, [S|Acc]); - _ -> cg_bitstr_size_1(Next, Bits, [{'*',S,U}|Acc]) - end; -cg_bitstr_size_1([], Bits, Acc) -> {Bits,Acc}. - -%% Generate code that calculate the size of the bitstr to be -%% built in BYTES or BITS (depending on what is easiest). - -cg_binary_size(PutCode, Target, Temp, Fail, Live) -> - {InitInstruction,Szs} = cg_binary_size_1(PutCode, 0, []), - SizeExpr = reverse(cg_gen_binsize(Szs, Target, Temp, Fail, Live, [{move,{integer,0},Target}])), - {InitInstruction,SizeExpr}. - -cg_binary_size_1([{bs_put_utf8,_Fail,_Flags,Src}|T], Bits, Acc) -> - cg_binary_size_1(T, Bits, [{8,{bs_utf8_size,Src}}|Acc]); -cg_binary_size_1([{bs_put_utf16,_Fail,_Flags,Src}|T], Bits, Acc) -> - cg_binary_size_1(T, Bits, [{8,{bs_utf16_size,Src}}|Acc]); -cg_binary_size_1([{bs_put_utf32,_Fail,_Flags,_Src}|T], Bits, Acc) -> - cg_binary_size_1(T, Bits+32, Acc); -cg_binary_size_1([{_Put,_Fail,S,U,_Flags,Src}|T], Bits, Acc) -> - cg_binary_size_2(S, U, Src, T, Bits, Acc); -cg_binary_size_1([], Bits, Acc) -> - Bytes = Bits div 8, - RemBits = Bits rem 8, - Sizes0 = sort([{1,{integer,RemBits}},{8,{integer,Bytes}}|Acc]), - Sizes = filter(fun({_,{integer,0}}) -> false; - (_) -> true end, Sizes0), - case Sizes of - [{1,_}|_] -> - {bs_init_bits,cg_binary_bytes_to_bits(Sizes, [])}; - [{8,_}|_] -> - {bs_init2,[E || {8,E} <- Sizes]}; - [] -> - {bs_init_bits,[]} - end. - -cg_binary_size_2({integer,N}, U, _, Next, Bits, Acc) -> - cg_binary_size_1(Next, Bits+N*U, Acc); -cg_binary_size_2({atom,all}, U, E, Next, Bits, Acc) -> - if - U rem 8 =:= 0 -> - cg_binary_size_1(Next, Bits, [{8,{byte_size,E}}|Acc]); - true -> - cg_binary_size_1(Next, Bits, [{1,{bit_size,E}}|Acc]) - end; -cg_binary_size_2(Reg, 1, _, Next, Bits, Acc) -> - cg_binary_size_1(Next, Bits, [{1,Reg}|Acc]); -cg_binary_size_2(Reg, 8, _, Next, Bits, Acc) -> - cg_binary_size_1(Next, Bits, [{8,Reg}|Acc]); -cg_binary_size_2(Reg, U, _, Next, Bits, Acc) -> - cg_binary_size_1(Next, Bits, [{1,{'*',Reg,U}}|Acc]). - -cg_binary_bytes_to_bits([{8,{integer,N}}|T], Acc) -> - cg_binary_bytes_to_bits(T, [{integer,8*N}|Acc]); -cg_binary_bytes_to_bits([{8,{byte_size,Reg}}|T], Acc) -> - cg_binary_bytes_to_bits(T, [{bit_size,Reg}|Acc]); -cg_binary_bytes_to_bits([{8,Reg}|T], Acc) -> - cg_binary_bytes_to_bits(T, [{'*',Reg,8}|Acc]); -cg_binary_bytes_to_bits([{1,Sz}|T], Acc) -> - cg_binary_bytes_to_bits(T, [Sz|Acc]); -cg_binary_bytes_to_bits([], Acc) -> - cg_binary_bytes_to_bits_1(sort(Acc)). - -cg_binary_bytes_to_bits_1([{integer,I},{integer,J}|T]) -> - cg_binary_bytes_to_bits_1([{integer,I+J}|T]); -cg_binary_bytes_to_bits_1([H|T]) -> - [H|cg_binary_bytes_to_bits_1(T)]; -cg_binary_bytes_to_bits_1([]) -> []. - -cg_gen_binsize([{'*',{bs_utf8_size,Src},B}|T], Target, Temp, Fail, Live, Acc) -> - Size = {bs_utf8_size,Fail,Src,Temp}, - Add = {bs_add,Fail,[Target,Temp,B],Target}, - cg_gen_binsize(T, Target, Temp, Fail, Live, - [Add,Size|Acc]); -cg_gen_binsize([{'*',{bs_utf16_size,Src},B}|T], Target, Temp, Fail, Live, Acc) -> - Size = {bs_utf16_size,Fail,Src,Temp}, - Add = {bs_add,Fail,[Target,Temp,B],Target}, - cg_gen_binsize(T, Target, Temp, Fail, Live, - [Add,Size|Acc]); -cg_gen_binsize([{'*',A,B}|T], Target, Temp, Fail, Live, Acc) -> - cg_gen_binsize(T, Target, Temp, Fail, Live, - [{bs_add,Fail,[Target,A,B],Target}|Acc]); -cg_gen_binsize([{bit_size,B}|T], Target, Temp, Fail, Live, Acc) -> - cg_gen_binsize([Temp|T], Target, Temp, Fail, Live, - [{gc_bif,bit_size,Fail,Live,[B],Temp}|Acc]); -cg_gen_binsize([{byte_size,B}|T], Target, Temp, Fail, Live, Acc) -> - cg_gen_binsize([Temp|T], Target, Temp, Fail, Live, - [{gc_bif,byte_size,Fail,Live,[B],Temp}|Acc]); -cg_gen_binsize([{bs_utf8_size,B}|T], Target, Temp, Fail, Live, Acc) -> - cg_gen_binsize([Temp|T], Target, Temp, Fail, Live, - [{bs_utf8_size,Fail,B,Temp}|Acc]); -cg_gen_binsize([{bs_utf16_size,B}|T], Target, Temp, Fail, Live, Acc) -> - cg_gen_binsize([Temp|T], Target, Temp, Fail, Live, - [{bs_utf16_size,Fail,B,Temp}|Acc]); -cg_gen_binsize([E0|T], Target, Temp, Fail, Live, Acc) -> - cg_gen_binsize(T, Target, Temp, Fail, Live, - [{bs_add,Fail,[Target,E0,1],Target}|Acc]); -cg_gen_binsize([], _, _, _, _, Acc) -> Acc. - - -%% cg_bin_opt(Code0) -> Code -%% Optimize the size calculations for binary construction. - -cg_bin_opt([{move,S1,{x,X}=D},{gc_bif,Op,Fail,Live0,As,Dst}|Is]) -> - Live = if - X + 1 =:= Live0 -> X; - true -> Live0 - end, - [{gc_bif,Op,Fail,Live,As,D}|cg_bin_opt([{move,S1,Dst}|Is])]; -cg_bin_opt([{move,_,_}=I1,{Op,_,_,_}=I2|Is]) - when Op =:= bs_utf8_size orelse Op =:= bs_utf16_size -> - [I2|cg_bin_opt([I1|Is])]; -cg_bin_opt([{bs_add,_,[{integer,0},Src,1],Dst}|Is]) -> - cg_bin_opt_1([{move,Src,Dst}|Is]); -cg_bin_opt([{bs_add,_,[Src,{integer,0},_],Dst}|Is]) -> - cg_bin_opt_1([{move,Src,Dst}|Is]); -cg_bin_opt(Is) -> - cg_bin_opt_1(Is). - -cg_bin_opt_1([{move,Size,D},{bs_append,Fail,D,Extra,Regs,U,Bin,Flags,D}|Is]) -> - [{bs_append,Fail,Size,Extra,Regs,U,Bin,Flags,D}|cg_bin_opt(Is)]; -cg_bin_opt_1([{move,Size,D},{bs_private_append,Fail,D,U,Bin,Flags,D}|Is]) -> - [{bs_private_append,Fail,Size,U,Bin,Flags,D}|cg_bin_opt(Is)]; -cg_bin_opt_1([{move,Size,D},{Op,Fail,D,Extra,Regs,Flags,D}|Is]) - when Op =:= bs_init2; Op =:= bs_init_bits -> - Bytes = case Size of - {integer,Int} -> Int; - _ -> Size - end, - [{Op,Fail,Bytes,Extra,Regs,Flags,D}|cg_bin_opt(Is)]; -cg_bin_opt_1([{move,S1,D},{bs_add,Fail,[D,S2,U],Dst}|Is]) -> - cg_bin_opt([{bs_add,Fail,[S1,S2,U],Dst}|Is]); -cg_bin_opt_1([{move,S1,D},{bs_add,Fail,[S2,D,U],Dst}|Is]) -> - cg_bin_opt([{bs_add,Fail,[S2,S1,U],Dst}|Is]); -cg_bin_opt_1([I|Is]) -> - [I|cg_bin_opt(Is)]; -cg_bin_opt_1([]) -> - []. - -cg_bin_put(#k_bin_seg{size=S0,unit=U,type=T,flags=Fs,seg=E0,next=Next}, - Fail, Bef) -> - S1 = cg_reg_arg(S0, Bef), - E1 = cg_reg_arg(E0, Bef), - {Format,Op} = case T of - integer -> {plain,bs_put_integer}; - utf8 -> {utf,bs_put_utf8}; - utf16 -> {utf,bs_put_utf16}; - utf32 -> {utf,bs_put_utf32}; - binary -> {plain,bs_put_binary}; - float -> {plain,bs_put_float} - end, - case Format of - plain -> - [{Op,Fail,S1,U,{field_flags,Fs},E1}|cg_bin_put(Next, Fail, Bef)]; - utf -> - [{Op,Fail,{field_flags,Fs},E1}|cg_bin_put(Next, Fail, Bef)] - end; -cg_bin_put(#k_bin_end{}, _, _) -> []. - -cg_build_args(As, Bef) -> - [{put,cg_reg_arg(A, Bef)} || A <- As]. - -%% return_cg([Val], Le, Vdb, Bef, St) -> {[Ainstr],Aft,St}. -%% break_cg([Val], Le, Vdb, Bef, St) -> {[Ainstr],Aft,St}. -%% These are very simple, just put return/break values in registers -%% from 0, then return/break. Use the call setup to clean up stack, -%% but must clear registers to ensure sr_merge works correctly. - -return_cg(Rs, Le, Vdb, Bef, St) -> - {Ms,Int} = cg_setup_call(Rs, Bef, Le#l.i, Vdb), - {Ms ++ [return],Int#sr{reg=clear_regs(Int#sr.reg)},St}. - -break_cg(Bs, Le, Vdb, Bef, St) -> - {Ms,Int} = cg_setup_call(Bs, Bef, Le#l.i, Vdb), - {Ms ++ [{jump,{f,St#cg.break}}], - Int#sr{reg=clear_regs(Int#sr.reg)},St}. - -guard_break_cg(Bs, #l{i=I}, Vdb, #sr{reg=Reg0}=Bef, St) -> - #sr{reg=Reg1} = Int = clear_dead(Bef, I, Vdb), - Reg2 = trim_free(Reg1), - NumLocked = length(Reg2), - Moves0 = gen_moves(Bs, Bef, NumLocked, []), - Moves = order_moves(Moves0, find_scratch_reg(Reg0)), - {BreakVars,_} = mapfoldl(fun(_, RegNum) -> - {{RegNum,gbreakvar},RegNum+1} - end, length(Reg2), Bs), - Reg = Reg2 ++ BreakVars, - Aft = Int#sr{reg=Reg}, - {Moves ++ [{jump,{f,St#cg.break}}],Aft,St}. - -%% cg_reg_arg(Arg0, Info) -> Arg -%% cg_reg_args([Arg0], Info) -> [Arg] -%% Convert argument[s] into registers. Literal values are returned unchanged. - -cg_reg_args(As, Bef) -> [cg_reg_arg(A, Bef) || A <- As]. - -cg_reg_arg(#k_var{name=V}, Bef) -> fetch_var(V, Bef); -cg_reg_arg(Literal, _) -> atomic(Literal). - -cg_reg_arg_prefer_y(#k_var{name=V}, Bef) -> fetch_var_prefer_y(V, Bef); -cg_reg_arg_prefer_y(Literal, _) -> atomic(Literal). - -%% cg_setup_call([Arg], Bef, Cur, Vdb) -> {[Instr],Aft}. -%% Do the complete setup for a call/enter. - -cg_setup_call(As, Bef, I, Vdb) -> - {Ms,Int0} = cg_call_args(As, Bef, I, Vdb), - %% Have set up arguments, can now clean up, compress and save to stack. - Int1 = Int0#sr{stk=clear_dead_stk(Int0#sr.stk, I, Vdb),res=[]}, - {Sis,Int2} = adjust_stack(Int1, I, I+1, Vdb), - {Ms ++ Sis,Int2}. - -%% cg_call_args([Arg], SrState) -> {[Instr],SrState}. -%% Setup the arguments to a call/enter/bif. Put the arguments into -%% consecutive registers starting at {x,0} moving any data which -%% needs to be saved. Return a modified SrState structure with the -%% new register contents. N.B. the resultant register info will -%% contain non-variable values when there are non-variable values. -%% -%% This routine is complicated by unsaved values in x registers. -%% We'll move away any unsaved values that are in the registers -%% to be overwritten by the arguments. - -cg_call_args(As, Bef, I, Vdb) -> - Regs0 = load_arg_regs(Bef#sr.reg, As), - Unsaved = unsaved_registers(Regs0, Bef#sr.stk, I, I+1, Vdb), - {UnsavedMoves,Regs} = move_unsaved(Unsaved, Bef#sr.reg, Regs0), - Moves0 = gen_moves(As, Bef), - Moves = order_moves(Moves0, find_scratch_reg(Regs)), - {UnsavedMoves ++ Moves,Bef#sr{reg=Regs}}. - -%% load_arg_regs([Reg], Arguments) -> [Reg] -%% Update the register descriptor to include the arguments (from {x,0} -%% and upwards). Values in argument register are overwritten. -%% Values in x registers above the arguments are preserved. - -load_arg_regs(Regs, As) -> load_arg_regs(Regs, As, 0). - -load_arg_regs([_|Rs], [#k_var{name=V}|As], I) -> [{I,V}|load_arg_regs(Rs, As, I+1)]; -load_arg_regs([_|Rs], [A|As], I) -> [{I,A}|load_arg_regs(Rs, As, I+1)]; -load_arg_regs([], [#k_var{name=V}|As], I) -> [{I,V}|load_arg_regs([], As, I+1)]; -load_arg_regs([], [A|As], I) -> [{I,A}|load_arg_regs([], As, I+1)]; -load_arg_regs(Rs, [], _) -> Rs. - -%% Returns the variables must be saved and are currently in the -%% x registers that are about to be overwritten by the arguments. - -unsaved_registers(Regs, Stk, Fb, Lf, Vdb) -> - [V || {V,F,L} <- Vdb, - F < Fb, - L >= Lf, - not on_stack(V, Stk), - not in_reg(V, Regs)]. - -in_reg(V, Regs) -> keymember(V, 2, Regs). - -%% Move away unsaved variables from the registers that are to be -%% overwritten by the arguments. -move_unsaved(Vs, OrigRegs, NewRegs) -> - move_unsaved(Vs, OrigRegs, NewRegs, []). - -move_unsaved([V|Vs], OrigRegs, NewRegs0, Acc) -> - NewRegs = put_reg(V, NewRegs0), - Src = fetch_reg(V, OrigRegs), - Dst = fetch_reg(V, NewRegs), - move_unsaved(Vs, OrigRegs, NewRegs, [{move,Src,Dst}|Acc]); -move_unsaved([], _, Regs, Acc) -> {Acc,Regs}. - -%% gen_moves(As, Sr) -%% Generate the basic move instruction to move the arguments -%% to their proper registers. The list will be sorted on -%% destinations. (I.e. the move to {x,0} will be first -- -%% see the comment to order_moves/2.) - -gen_moves(As, Sr) -> gen_moves(As, Sr, 0, []). - -gen_moves([#k_var{name=V}|As], Sr, I, Acc) -> - case fetch_var(V, Sr) of - {x,I} -> gen_moves(As, Sr, I+1, Acc); - Reg -> gen_moves(As, Sr, I+1, [{move,Reg,{x,I}}|Acc]) - end; -gen_moves([A0|As], Sr, I, Acc) -> - A = atomic(A0), - gen_moves(As, Sr, I+1, [{move,A,{x,I}}|Acc]); -gen_moves([], _, _, Acc) -> lists:keysort(3, Acc). - -%% order_moves([Move], ScratchReg) -> [Move] -%% Orders move instruction so that source registers are not -%% destroyed before they are used. If there are cycles -%% (such as {move,{x,0},{x,1}}, {move,{x,1},{x,1}}), -%% the scratch register is used to break up the cycle. -%% If possible, the first move of the input list is placed -%% last in the result list (to make the move to {x,0} occur -%% just before the call to allow the Beam loader to coalesce -%% the instructions). - -order_moves(Ms, Scr) -> order_moves(Ms, Scr, []). - -order_moves([{move,_,_}=M|Ms0], ScrReg, Acc0) -> - {Chain,Ms} = collect_chain(Ms0, [M], ScrReg), - Acc = reverse(Chain, Acc0), - order_moves(Ms, ScrReg, Acc); -order_moves([], _, Acc) -> Acc. - -collect_chain(Ms, Path, ScrReg) -> - collect_chain(Ms, Path, [], ScrReg). - -collect_chain([{move,Src,Same}=M|Ms0], [{move,Same,_}|_]=Path, Others, ScrReg) -> - case lists:keyfind(Src, 3, Path) of - false -> - collect_chain(reverse(Others, Ms0), [M|Path], [], ScrReg); - _ -> % We have a cycle. - {break_up_cycle(M, Path, ScrReg),reverse(Others, Ms0)} - end; -collect_chain([M|Ms], Path, Others, ScrReg) -> - collect_chain(Ms, Path, [M|Others], ScrReg); -collect_chain([], Path, Others, _) -> - {Path,Others}. - -break_up_cycle({move,Src,_}=M, Path, ScrReg) -> - [{move,ScrReg,Src},M|break_up_cycle1(Src, Path, ScrReg)]. - -break_up_cycle1(Dst, [{move,Src,Dst}|Path], ScrReg) -> - [{move,Src,ScrReg}|Path]; -break_up_cycle1(Dst, [M|Path], LastMove) -> - [M|break_up_cycle1(Dst, Path, LastMove)]. - -%% clear_dead(Sr, Until, Vdb) -> Aft. -%% Remove all variables in Sr which have died AT ALL so far. - -clear_dead(#sr{stk=Stk}=Sr0, Until, Vdb) -> - Sr = Sr0#sr{reg=clear_dead_reg(Sr0, Until, Vdb), - stk=clear_dead_stk(Stk, Until, Vdb)}, - reserve(Sr). - -clear_dead_reg(Sr, Until, Vdb) -> - [case R of - {_I,V} = IV -> - case vdb_find(V, Vdb) of - {V,_,L} when L > Until -> IV; - _ -> free %Remove anything else - end; - {reserved,_I,_V}=Reserved -> Reserved; - free -> free - end || R <- Sr#sr.reg]. - -clear_dead_stk(Stk, Until, Vdb) -> - [case S of - {V} = T -> - case vdb_find(V, Vdb) of - {V,_,L} when L > Until -> T; - _ -> dead %Remove anything else - end; - free -> free; - dead -> dead - end || S <- Stk]. - - -%% sr_merge(Sr1, Sr2) -> Sr. -%% Merge two stack/register states keeping the longest of both stack -%% and register. Perform consistency check on both, elements must be -%% the same. Allow frame size 'void' to make easy creation of -%% "empty" frame. - -sr_merge(#sr{reg=R1,stk=S1,res=[]}, #sr{reg=R2,stk=S2,res=[]}) -> - #sr{reg=longest(R1, R2),stk=longest(S1, S2),res=[]}; -sr_merge(void, S2) -> S2#sr{res=[]}. - -longest([H|T1], [H|T2]) -> [H|longest(T1, T2)]; -longest([dead|T1], [free|T2]) -> [dead|longest(T1, T2)]; -longest([free|T1], [dead|T2]) -> [dead|longest(T1, T2)]; -longest([dead|_] = L, []) -> L; -longest([], [dead|_] = L) -> L; -longest([free|_] = L, []) -> L; -longest([], [free|_] = L) -> L; -longest([], []) -> []. - -trim_free([R|Rs0]) -> - case {trim_free(Rs0),R} of - {[],free} -> []; - {Rs,R} -> [R|Rs] - end; -trim_free([]) -> []. - -%% maybe_adjust_stack(Bef, FirstBefore, LastFrom, Vdb, St) -> {[Ainstr],Aft}. -%% Adjust the stack, but only if the code is inside a catch and not -%% inside a guard. Use this funtion before instructions that may -%% cause an exception. - -maybe_adjust_stack(Bef, Fb, Lf, Vdb, St) -> - case St of - #cg{in_catch=true,bfail=0} -> - adjust_stack(Bef, Fb, Lf, Vdb); - #cg{} -> - {[],Bef} - end. - -%% adjust_stack(Bef, FirstBefore, LastFrom, Vdb) -> {[Ainstr],Aft}. -%% Do complete stack adjustment by compressing stack and adding -%% variables to be saved. Try to optimise ordering on stack by -%% having reverse order to their lifetimes. -%% -%% In Beam, there is a fixed stack frame and no need to do stack compression. - -adjust_stack(Bef, Fb, Lf, Vdb) -> - Stk0 = Bef#sr.stk, - {Stk1,Saves} = save_stack(Stk0, Fb, Lf, Vdb), - {saves(Saves, Bef#sr.reg, Stk1), - Bef#sr{stk=Stk1}}. - -%% save_stack(Stack, FirstBefore, LastFrom, Vdb) -> {[SaveVar],NewStack}. -%% Save variables which are used past current point and which are not -%% already on the stack. - -save_stack(Stk0, Fb, Lf, Vdb) -> - %% New variables that are in use but not on stack. - New = new_not_on_stack(Stk0, Fb, Lf, Vdb), - - %% Add new variables that are not just dropped immediately. - %% N.B. foldr works backwards from the end!! - Saves = [V || {V,_,_} <- keysort(3, New)], - Stk1 = foldr(fun (V, Stk) -> put_stack(V, Stk) end, Stk0, Saves), - {Stk1,Saves}. - -%% new_not_on_stack(Stack, FirstBefore, LastFrom, Vdb) -> -%% [{Variable,First,Last}] -%% Return information about all variables that are used past current -%% point and that are not already on the stack. - -new_not_on_stack(Stk, Fb, Lf, Vdb) -> - [VFL || {V,F,L} = VFL <- Vdb, - F < Fb, - L >= Lf, - not on_stack(V, Stk)]. - -%% saves([SaveVar], Reg, Stk) -> [{move,Reg,Stk}]. -%% Generate move instructions to save variables onto stack. The -%% stack/reg info used is that after the new stack has been made. - -saves(Ss, Reg, Stk) -> - [{move,fetch_reg(V, Reg),fetch_stack(V, Stk)} || V <- Ss]. - -%% fetch_var(VarName, StkReg) -> r{R} | sp{Sp}. -%% find_var(VarName, StkReg) -> ok{r{R} | sp{Sp}} | error. -%% Fetch/find a variable in either the registers or on the -%% stack. Fetch KNOWS it's there. - -fetch_var(V, Sr) -> - case find_reg(V, Sr#sr.reg) of - {ok,R} -> R; - error -> fetch_stack(V, Sr#sr.stk) - end. - -fetch_var_prefer_y(V, #sr{reg=Reg,stk=Stk}) -> - case find_stack(V, Stk) of - {ok,R} -> R; - error -> fetch_reg(V, Reg) - end. - -load_vars(Vs, Regs) -> - foldl(fun (#k_var{name=V}, Rs) -> put_reg(V, Rs) end, Regs, Vs). - -%% put_reg(Val, Regs) -> Regs. -%% find_reg(Val, Regs) -> {ok,r{R}} | error. -%% fetch_reg(Val, Regs) -> r{R}. -%% Functions to interface the registers. - -% put_regs(Vs, Rs) -> foldl(fun put_reg/2, Rs, Vs). - -put_reg(V, Rs) -> put_reg_1(V, Rs, 0). - -put_reg_1(V, [free|Rs], I) -> [{I,V}|Rs]; -put_reg_1(V, [{reserved,I,V}|Rs], I) -> [{I,V}|Rs]; -put_reg_1(V, [R|Rs], I) -> [R|put_reg_1(V, Rs, I+1)]; -put_reg_1(V, [], I) -> [{I,V}]. - -fetch_reg(V, [{I,V}|_]) -> {x,I}; -fetch_reg(V, [_|SRs]) -> fetch_reg(V, SRs). - -find_reg(V, [{I,V}|_]) -> {ok,{x,I}}; -find_reg(V, [_|SRs]) -> find_reg(V, SRs); -find_reg(_, []) -> error. - -%% For the bit syntax, we need a scratch register if we are constructing -%% a binary that will not be used. - -find_scratch_reg(Rs) -> find_scratch_reg(Rs, 0). - -find_scratch_reg([free|_], I) -> {x,I}; -find_scratch_reg([_|Rs], I) -> find_scratch_reg(Rs, I+1); -find_scratch_reg([], I) -> {x,I}. - -replace_reg_contents(Old, New, [{I,Old}|Rs]) -> [{I,New}|Rs]; -replace_reg_contents(Old, New, [R|Rs]) -> [R|replace_reg_contents(Old, New, Rs)]. - -%%clear_regs(Regs) -> map(fun (R) -> free end, Regs). -clear_regs(_) -> []. - -max_reg(Regs) -> - foldl(fun ({I,_}, _) -> I; - (_, Max) -> Max end, - -1, Regs) + 1. - -%% put_stack(Val, [{Val}]) -> [{Val}]. -%% fetch_stack(Var, Stk) -> sp{S}. -%% find_stack(Var, Stk) -> ok{sp{S}} | error. -%% Functions to interface the stack. - -put_stack(Val, []) -> [{Val}]; -put_stack(Val, [dead|Stk]) -> [{Val}|Stk]; -put_stack(Val, [free|Stk]) -> [{Val}|Stk]; -put_stack(Val, [NotFree|Stk]) -> [NotFree|put_stack(Val, Stk)]. - -put_stack_carefully(Val, Stk0) -> - try - put_stack_carefully1(Val, Stk0) - catch - throw:error -> - error - end. - -put_stack_carefully1(_, []) -> throw(error); -put_stack_carefully1(Val, [dead|Stk]) -> [{Val}|Stk]; -put_stack_carefully1(Val, [free|Stk]) -> [{Val}|Stk]; -put_stack_carefully1(Val, [NotFree|Stk]) -> - [NotFree|put_stack_carefully1(Val, Stk)]. - -fetch_stack(Var, Stk) -> fetch_stack(Var, Stk, 0). - -fetch_stack(V, [{V}|_], I) -> {yy,I}; -fetch_stack(V, [_|Stk], I) -> fetch_stack(V, Stk, I+1). - -find_stack(Var, Stk) -> find_stack(Var, Stk, 0). - -find_stack(V, [{V}|_], I) -> {ok,{yy,I}}; -find_stack(V, [_|Stk], I) -> find_stack(V, Stk, I+1); -find_stack(_, [], _) -> error. - -on_stack(V, Stk) -> keymember(V, 1, Stk). - -%% put_catch(CatchTag, Stack) -> Stack' -%% drop_catch(CatchTag, Stack) -> Stack' -%% Special interface for putting and removing catch tags, to ensure that -%% catches nest properly. Also used for try tags. - -put_catch(Tag, Stk0) -> put_catch(Tag, reverse(Stk0), []). - -put_catch(Tag, [], Stk) -> - put_stack({catch_tag,Tag}, Stk); -put_catch(Tag, [{{catch_tag,_}}|_]=RevStk, Stk) -> - reverse(RevStk, put_stack({catch_tag,Tag}, Stk)); -put_catch(Tag, [Other|Stk], Acc) -> - put_catch(Tag, Stk, [Other|Acc]). - -drop_catch(Tag, [{{catch_tag,Tag}}|Stk]) -> [free|Stk]; -drop_catch(Tag, [Other|Stk]) -> [Other|drop_catch(Tag, Stk)]. - -%% atomic(Klit) -> Lit. -%% atomic_list([Klit]) -> [Lit]. - -atomic(#k_literal{val=V}) -> {literal,V}; -atomic(#k_int{val=I}) -> {integer,I}; -atomic(#k_float{val=F}) -> {float,F}; -atomic(#k_atom{val=A}) -> {atom,A}; -%%atomic(#k_char{val=C}) -> {char,C}; -atomic(#k_nil{}) -> nil. - -%% new_label(St) -> {L,St}. - -new_label(#cg{lcount=Next}=St) -> - {Next,St#cg{lcount=Next+1}}. - -%% line(Le) -> {line,[] | {location,File,Line}} -%% Create a line instruction, containing information about -%% the current filename and line number. A line information -%% instruction should be placed before any operation that could -%% cause an exception. - -line(#l{a=Anno}) -> - line(Anno); -line([Line,{file,Name}]) when is_integer(Line) -> - line_1(Name, Line); -line([_|_]=A) -> - {Name,Line} = find_loc(A, no_file, 0), - line_1(Name, Line); -line([]) -> - {line,[]}. - -line_1(no_file, _) -> - {line,[]}; -line_1(_, 0) -> - %% Missing line number or line number 0. - {line,[]}; -line_1(Name, Line) -> - {line,[{location,Name,Line}]}. - -find_loc([Line|T], File, _) when is_integer(Line) -> - find_loc(T, File, Line); -find_loc([{file,File}|T], _, Line) -> - find_loc(T, File, Line); -find_loc([_|T], File, Line) -> - find_loc(T, File, Line); -find_loc([], File, Line) -> {File,Line}. - -flatmapfoldl(F, Accu0, [Hd|Tail]) -> - {R,Accu1} = F(Hd, Accu0), - {Rs,Accu2} = flatmapfoldl(F, Accu1, Tail), - {R++Rs,Accu2}; -flatmapfoldl(_, Accu, []) -> {[],Accu}. - -%% Keep track of life time for variables. -%% -%% init_vars([{var,VarName}]) -> Vdb. -%% new_vars([VarName], I, Vdb) -> Vdb. -%% use_vars([VarName], I, Vdb) -> Vdb. -%% add_var(VarName, F, L, Vdb) -> Vdb. -%% -%% The list of variable names for new_vars/3 and use_vars/3 -%% must be sorted. - -init_vars(Vs) -> - vdb_new(Vs). - -new_vars([], _, Vdb) -> Vdb; -new_vars([V], I, Vdb) -> vdb_store_new(V, {V,I,I}, Vdb); -new_vars(Vs, I, Vdb) -> vdb_update_vars(Vs, Vdb, I). - -use_vars([], _, Vdb) -> - Vdb; -use_vars([V], I, Vdb) -> - case vdb_find(V, Vdb) of - {V,F,L} when I > L -> vdb_update(V, {V,F,I}, Vdb); - {V,_,_} -> Vdb; - error -> vdb_store_new(V, {V,I,I}, Vdb) - end; -use_vars(Vs, I, Vdb) -> vdb_update_vars(Vs, Vdb, I). - -add_var(V, F, L, Vdb) -> - vdb_store_new(V, {V,F,L}, Vdb). - -%% vdb - -vdb_new(Vs) -> - ordsets:from_list([{V,0,0} || #k_var{name=V} <- Vs]). - --type var() :: atom(). - --spec vdb_find(var(), [vdb_entry()]) -> 'error' | vdb_entry(). - -vdb_find(V, Vdb) -> - case lists:keyfind(V, 1, Vdb) of - false -> error; - Vd -> Vd - end. - -vdb_update(V, Update, [{V,_,_}|Vdb]) -> - [Update|Vdb]; -vdb_update(V, Update, [Vd|Vdb]) -> - [Vd|vdb_update(V, Update, Vdb)]. - -vdb_store_new(V, New, [{V1,_,_}=Vd|Vdb]) when V > V1 -> - [Vd|vdb_store_new(V, New, Vdb)]; -vdb_store_new(V, New, [{V1,_,_}|_]=Vdb) when V < V1 -> - [New|Vdb]; -vdb_store_new(_, New, []) -> [New]. - -vdb_update_vars([V|_]=Vs, [{V1,_,_}=Vd|Vdb], I) when V > V1 -> - [Vd|vdb_update_vars(Vs, Vdb, I)]; -vdb_update_vars([V|Vs], [{V1,_,_}|_]=Vdb, I) when V < V1 -> - %% New variable. - [{V,I,I}|vdb_update_vars(Vs, Vdb, I)]; -vdb_update_vars([V|Vs], [{_,F,L}=Vd|Vdb], I) -> - %% Existing variable. - if - I > L -> [{V,F,I}|vdb_update_vars(Vs, Vdb, I)]; - true -> [Vd|vdb_update_vars(Vs, Vdb, I)] - end; -vdb_update_vars([V|Vs], [], I) -> - %% New variable. - [{V,I,I}|vdb_update_vars(Vs, [], I)]; -vdb_update_vars([], Vdb, _) -> Vdb. - -%% vdb_sub(Min, Max, Vdb) -> Vdb. -%% Extract variables which are used before and after Min. Lock -%% variables alive after Max. - -vdb_sub(Min, Max, Vdb) -> - [ if L >= Max -> {V,F,locked}; - true -> Vd - end || {V,F,L}=Vd <- Vdb, - F < Min, - L >= Min ]. -- cgit v1.2.3