diff options
Diffstat (limited to 'lib/compiler/src')
-rw-r--r-- | lib/compiler/src/Makefile | 1 | ||||
-rw-r--r-- | lib/compiler/src/beam_a.erl | 3 | ||||
-rw-r--r-- | lib/compiler/src/beam_block.erl | 3 | ||||
-rw-r--r-- | lib/compiler/src/beam_clean.erl | 18 | ||||
-rw-r--r-- | lib/compiler/src/beam_except.erl | 96 | ||||
-rw-r--r-- | lib/compiler/src/beam_flatten.erl | 72 | ||||
-rw-r--r-- | lib/compiler/src/beam_jump.erl | 84 | ||||
-rw-r--r-- | lib/compiler/src/beam_listing.erl | 2 | ||||
-rw-r--r-- | lib/compiler/src/beam_ssa.erl | 19 | ||||
-rw-r--r-- | lib/compiler/src/beam_ssa_bsm.erl | 22 | ||||
-rw-r--r-- | lib/compiler/src/beam_ssa_codegen.erl | 175 | ||||
-rw-r--r-- | lib/compiler/src/beam_ssa_dead.erl | 59 | ||||
-rw-r--r-- | lib/compiler/src/beam_ssa_funs.erl | 149 | ||||
-rw-r--r-- | lib/compiler/src/beam_ssa_pre_codegen.erl | 116 | ||||
-rw-r--r-- | lib/compiler/src/beam_ssa_type.erl | 103 | ||||
-rw-r--r-- | lib/compiler/src/beam_trim.erl | 316 | ||||
-rw-r--r-- | lib/compiler/src/beam_utils.erl | 610 | ||||
-rw-r--r-- | lib/compiler/src/beam_validator.erl | 9 | ||||
-rw-r--r-- | lib/compiler/src/compile.erl | 60 | ||||
-rw-r--r-- | lib/compiler/src/compiler.app.src | 1 | ||||
-rw-r--r-- | lib/compiler/src/erl_bifs.erl | 1 |
21 files changed, 969 insertions, 950 deletions
diff --git a/lib/compiler/src/Makefile b/lib/compiler/src/Makefile index 26ae6566e6..d475e5a19a 100644 --- a/lib/compiler/src/Makefile +++ b/lib/compiler/src/Makefile @@ -63,6 +63,7 @@ MODULES = \ beam_ssa_bsm \ beam_ssa_codegen \ beam_ssa_dead \ + beam_ssa_funs \ beam_ssa_lint \ beam_ssa_opt \ beam_ssa_pp \ diff --git a/lib/compiler/src/beam_a.erl b/lib/compiler/src/beam_a.erl index 0abc845310..dd2537a699 100644 --- a/lib/compiler/src/beam_a.erl +++ b/lib/compiler/src/beam_a.erl @@ -59,6 +59,9 @@ rename_instrs([{test,is_eq_exact,_,[Dst,Src]}=Test, rename_instrs([{test,is_eq_exact,_,[Same,Same]}|Is]) -> %% Same literal or same register. Will always succeed. rename_instrs(Is); +rename_instrs([{loop_rec,{f,Fail},{x,0}},{loop_rec_end,_},{label,Fail}|Is]) -> + %% This instruction sequence does nothing. + rename_instrs(Is); rename_instrs([{apply_last,A,N}|Is]) -> [{apply,A},{deallocate,N},return|rename_instrs(Is)]; rename_instrs([{call_last,A,F,N}|Is]) -> diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl index d28c0fd9e4..9d8d5b2b0c 100644 --- a/lib/compiler/src/beam_block.erl +++ b/lib/compiler/src/beam_block.erl @@ -49,9 +49,6 @@ function({function,Name,Arity,CLabel,Is0}) -> blockify(Is) -> blockify(Is, []). -blockify([{loop_rec,{f,Fail},{x,0}},{loop_rec_end,_Lbl},{label,Fail}|Is], Acc) -> - %% Useless instruction sequence. - blockify(Is, Acc); blockify([I|Is0]=IsAll, Acc) -> case collect(I) of error -> blockify(Is0, [I|Acc]); diff --git a/lib/compiler/src/beam_clean.erl b/lib/compiler/src/beam_clean.erl index f5f0ac2218..7299654476 100644 --- a/lib/compiler/src/beam_clean.erl +++ b/lib/compiler/src/beam_clean.erl @@ -23,17 +23,15 @@ -export([module/2]). -export([clean_labels/1]). --import(lists, [foldl/3]). -spec module(beam_utils:module_code(), [compile:option()]) -> {'ok',beam_utils:module_code()}. module({Mod,Exp,Attr,Fs0,_}, Opts) -> Order = [Lbl || {function,_,_,Lbl,_} <- Fs0], - All = foldl(fun({function,_,_,Lbl,_}=Func,D) -> dict:store(Lbl, Func, D) end, - dict:new(), Fs0), + All = maps:from_list([{Lbl,Func} || {function,_,_,Lbl,_}=Func <- Fs0]), WorkList = rootset(Fs0, Exp, Attr), - Used = find_all_used(WorkList, All, sets:from_list(WorkList)), + Used = find_all_used(WorkList, All, cerl_sets:from_list(WorkList)), Fs1 = remove_unused(Order, Used, All), {Fs2,Lc} = clean_labels(Fs1), Fs = maybe_remove_lines(Fs2, Opts), @@ -55,16 +53,16 @@ rootset(Fs, Root0, Attr) -> %% Remove the unused functions. remove_unused([F|Fs], Used, All) -> - case sets:is_element(F, Used) of + case cerl_sets:is_element(F, Used) of false -> remove_unused(Fs, Used, All); - true -> [dict:fetch(F, All)|remove_unused(Fs, Used, All)] + true -> [map_get(F, All)|remove_unused(Fs, Used, All)] end; remove_unused([], _, _) -> []. - + %% Find all used functions. find_all_used([F|Fs0], All, Used0) -> - {function,_,_,_,Code} = dict:fetch(F, All), + {function,_,_,_,Code} = map_get(F, All), {Fs,Used} = update_work_list(Code, {Fs0,Used0}), find_all_used(Fs, All, Used); find_all_used([], _All, Used) -> Used. @@ -78,9 +76,9 @@ update_work_list([_|Is], Sets) -> update_work_list([], Sets) -> Sets. add_to_work_list(F, {Fs,Used}=Sets) -> - case sets:is_element(F, Used) of + case cerl_sets:is_element(F, Used) of true -> Sets; - false -> {[F|Fs],sets:add_element(F, Used)} + false -> {[F|Fs],cerl_sets:add_element(F, Used)} end. diff --git a/lib/compiler/src/beam_except.erl b/lib/compiler/src/beam_except.erl index 98831d87a7..49bfb5606f 100644 --- a/lib/compiler/src/beam_except.erl +++ b/lib/compiler/src/beam_except.erl @@ -31,7 +31,7 @@ %%% erlang:error(function_clause, Args) => jump FuncInfoLabel %%% --import(lists, [reverse/1,seq/2]). +-import(lists, [reverse/1,seq/2,splitwith/2]). -spec module(beam_utils:module_code(), [compile:option()]) -> {'ok',beam_utils:module_code()}. @@ -74,13 +74,13 @@ translate([I|Is], St, Acc) -> translate([], _, Acc) -> reverse(Acc). -translate_1(Ar, I, Is, St, [{line,_}=Line|Acc1]=Acc0) -> - case dig_out(Ar, Acc1) of +translate_1(Ar, I, Is, #st{arity=Arity}=St, [{line,_}=Line|Acc1]=Acc0) -> + case dig_out(Ar, Arity, Acc1) of no -> translate(Is, St, [I|Acc0]); - {yes,{function_clause,Arity},Acc2} -> + {yes,function_clause,Acc2} -> case {Line,St} of - {{line,Loc},#st{lbl=Fi,loc=Loc,arity=Arity}} -> + {{line,Loc},#st{lbl=Fi,loc=Loc}} -> Instr = {jump,{f,Fi}}, translate(Is, St, [Instr|Acc2]); {_,_} -> @@ -92,9 +92,13 @@ translate_1(Ar, I, Is, St, [{line,_}=Line|Acc1]=Acc0) -> translate(Is, St, [Instr,Line|Acc2]) end. -dig_out(Ar, [{kill,_}|Is]) -> - dig_out(Ar, Is); -dig_out(1, [{block,Bl0}|Is]) -> +dig_out(1, _Arity, Is) -> + dig_out(Is); +dig_out(2, Arity, Is) -> + dig_out_fc(Arity, Is); +dig_out(_, _, _) -> no. + +dig_out([{block,Bl0}|Is]) -> case dig_out_block(reverse(Bl0)) of no -> no; {yes,What,[]} -> @@ -102,12 +106,7 @@ dig_out(1, [{block,Bl0}|Is]) -> {yes,What,Bl} -> {yes,What,[{block,Bl}|Is]} end; -dig_out(2, [{block,Bl}|Is]) -> - case dig_out_block_fc(Bl) of - no -> no; - {yes,What} -> {yes,What,Is} - end; -dig_out(_, _) -> no. +dig_out(_) -> no. dig_out_block([{set,[{x,0}],[{atom,if_clause}],move}]) -> {yes,if_end,[]}; @@ -141,33 +140,64 @@ fix_block_1([{set,[],[],{alloc,Live,{F1,F2,Needed0,F3}}}|Is], Words) -> fix_block_1([I|Is], Words) -> [I|fix_block_1(Is, Words)]. -dig_out_block_fc([{set,[],[],{alloc,Live,_}}|Bl]) -> - Regs = maps:from_list([{{x,X},{arg,X}} || X <- seq(0, Live-1)]), - dig_out_fc(Bl, Regs); -dig_out_block_fc(_) -> no. -dig_out_fc([{set,[Dst],[Hd,Tl],put_list}|Is], Regs0) -> +dig_out_fc(Arity, Is0) -> + Regs0 = maps:from_list([{{x,X},{arg,X}} || X <- seq(0, Arity-1)]), + {Is,Acc0} = splitwith(fun({label,_}) -> false; + ({test,_,_,_}) -> false; + (_) -> true + end, Is0), + {Regs,Acc} = dig_out_fc_1(reverse(Is), Regs0, Acc0), + case is_fc(Arity, Regs) of + true -> + {yes,function_clause,Acc}; + false -> + no + end. + +dig_out_fc_1([{block,Bl}|Is], Regs0, Acc) -> + Regs = dig_out_fc_block(Bl, Regs0), + dig_out_fc_1(Is, Regs, Acc); +dig_out_fc_1([{bs_set_position,_,_}=I|Is], Regs, Acc) -> + dig_out_fc_1(Is, Regs, [I|Acc]); +dig_out_fc_1([{bs_get_tail,_,_,Live}=I|Is], Regs0, Acc) -> + Regs = prune_xregs(Live, Regs0), + dig_out_fc_1(Is, Regs, [I|Acc]); +dig_out_fc_1([_|_], _Regs, _Acc) -> + {#{},[]}; +dig_out_fc_1([], Regs, Acc) -> + {Regs,Acc}. + +dig_out_fc_block([{set,[],[],{alloc,Live,_}}|Is], Regs0) -> + Regs = prune_xregs(Live, Regs0), + dig_out_fc_block(Is, Regs); +dig_out_fc_block([{set,[Dst],[Hd,Tl],put_list}|Is], Regs0) -> Regs = Regs0#{Dst=>{cons,get_reg(Hd, Regs0),get_reg(Tl, Regs0)}}, - dig_out_fc(Is, Regs); -dig_out_fc([{set,[Dst],[Src],move}|Is], Regs0) -> + dig_out_fc_block(Is, Regs); +dig_out_fc_block([{set,[Dst],[Src],move}|Is], Regs0) -> Regs = Regs0#{Dst=>get_reg(Src, Regs0)}, - dig_out_fc(Is, Regs); -dig_out_fc([{set,_,_,_}|_], _Regs) -> - %% Unknown instruction. It is not a function_clause error. - no; -dig_out_fc([], Regs) -> + dig_out_fc_block(Is, Regs); +dig_out_fc_block([{set,_,_,_}|_], _Regs) -> + %% Unknown instruction. Fail. + #{}; +dig_out_fc_block([], Regs) -> Regs. + +prune_xregs(Live, Regs) -> + maps:filter(fun({x,X}, _) -> X < Live end, Regs). + +is_fc(Arity, Regs) -> case Regs of #{{x,0}:={atom,function_clause},{x,1}:=Args} -> - dig_out_fc_1(Args, 0); + is_fc_1(Args, 0) =:= Arity; #{} -> - no + false end. -dig_out_fc_1({cons,{arg,I},T}, I) -> - dig_out_fc_1(T, I+1); -dig_out_fc_1(nil, I) -> - {yes,{function_clause,I}}; -dig_out_fc_1(_, _) -> no. +is_fc_1({cons,{arg,I},T}, I) -> + is_fc_1(T, I+1); +is_fc_1(nil, I) -> + I; +is_fc_1(_, _) -> -1. get_reg(R, Regs) -> case Regs of diff --git a/lib/compiler/src/beam_flatten.erl b/lib/compiler/src/beam_flatten.erl index 973d16a1bc..3e6bc1b1ed 100644 --- a/lib/compiler/src/beam_flatten.erl +++ b/lib/compiler/src/beam_flatten.erl @@ -32,8 +32,7 @@ module({Mod,Exp,Attr,Fs,Lc}, _Opt) -> {ok,{Mod,Exp,Attr,[function(F) || F <- Fs],Lc}}. function({function,Name,Arity,CLabel,Is0}) -> - Is1 = block(Is0), - Is = opt(Is1), + Is = block(Is0), {function,Name,Arity,CLabel,Is}. block(Is) -> @@ -43,21 +42,12 @@ block([{block,Is0}|Is1], Acc) -> block(Is1, norm_block(Is0, Acc)); block([I|Is], Acc) -> block(Is, [I|Acc]); block([], Acc) -> reverse(Acc). -norm_block([{set,[],[],{alloc,R,{_,nostack,_,_}=Alloc}}|Is], Acc0) -> - case insert_alloc_in_bs_init(Acc0, Alloc) of - impossible -> - norm_block(Is, reverse(norm_allocate(Alloc, R), Acc0)); - Acc -> - norm_block(Is, Acc) - end; norm_block([{set,[],[],{alloc,R,Alloc}}|Is], Acc0) -> norm_block(Is, reverse(norm_allocate(Alloc, R), Acc0)); -norm_block([{set,[D1],[S],get_hd},{set,[D2],[S],get_tl}|Is], Acc) -> - I = {get_list,S,D1,D2}, - norm_block(Is, [I|Acc]); -norm_block([I|Is], Acc) -> norm_block(Is, [norm(I)|Acc]); +norm_block([I|Is], Acc) -> + norm_block(Is, [norm(I)|Acc]); norm_block([], Acc) -> Acc. - + norm({set,[D],As,{bif,N,F}}) -> {bif,N,F,As,D}; norm({set,[D],As,{alloc,R,{gc_bif,N,F}}}) -> {gc_bif,N,F,R,As,D}; norm({set,[D],[],init}) -> {init,D}; @@ -91,57 +81,3 @@ norm_allocate({nozero,Ns,0,Inits}, Regs) -> [{allocate,Ns,Regs}|Inits]; norm_allocate({nozero,Ns,Nh,Inits}, Regs) -> [{allocate_heap,Ns,Nh,Regs}|Inits]. - -%% insert_alloc_in_bs_init(ReverseInstructionStream, AllocationInfo) -> -%% impossible | ReverseInstructionStream' -%% A bs_init/6 instruction should not be followed by a test heap instruction. -%% Given the AllocationInfo from a test heap instruction, merge the -%% allocation amounts into the previous bs_init/6 instruction (if any). -%% -insert_alloc_in_bs_init([{bs_put,_,_,_}=I|Is], Alloc) -> - %% The instruction sequence ends with an bs_put/4 instruction. - %% We'll need to search backwards for the bs_init/6 instruction. - insert_alloc_1(Is, Alloc, [I]); -insert_alloc_in_bs_init(_, _) -> impossible. - -insert_alloc_1([{bs_init=Op,Fail,Info0,Live,Ss,Dst}|Is], - {_,nostack,Ws2,[]}, Acc) when is_integer(Live) -> - %% The number of extra heap words is always in the second position - %% in the Info tuple. - Ws1 = element(2, Info0), - Al = beam_utils:combine_heap_needs(Ws1, Ws2), - Info = setelement(2, Info0, Al), - I = {Op,Fail,Info,Live,Ss,Dst}, - reverse(Acc, [I|Is]); -insert_alloc_1([{bs_put,_,_,_}=I|Is], Alloc, Acc) -> - insert_alloc_1(Is, Alloc, [I|Acc]). - -%% opt(Is0) -> Is -%% Simple peep-hole optimization to move a {move,Any,{x,0}} past -%% any kill up to the next call instruction. (To give the loader -%% an opportunity to combine the 'move' and the 'call' instructions.) -%% -opt(Is) -> - opt_1(Is, []). - -opt_1([{move,_,{x,0}}=I|Is0], Acc0) -> - case move_past_kill(Is0, I, Acc0) of - impossible -> opt_1(Is0, [I|Acc0]); - {Is,Acc} -> opt_1(Is, Acc) - end; -opt_1([I|Is], Acc) -> - opt_1(Is, [I|Acc]); -opt_1([], Acc) -> reverse(Acc). - -move_past_kill([{kill,Src}|_], {move,Src,_}, _) -> - impossible; -move_past_kill([{kill,_}=I|Is], Move, Acc) -> - move_past_kill(Is, Move, [I|Acc]); -move_past_kill([{trim,N,_}=I|Is], {move,Src,Dst}=Move, Acc) -> - case Src of - {y,Y} when Y < N-> impossible; - {y,Y} -> {Is,[{move,{y,Y-N},Dst},I|Acc]}; - _ -> {Is,[Move,I|Acc]} - end; -move_past_kill(Is, Move, Acc) -> - {Is,[Move|Acc]}. diff --git a/lib/compiler/src/beam_jump.erl b/lib/compiler/src/beam_jump.erl index fbff4cfd79..d3a618d211 100644 --- a/lib/compiler/src/beam_jump.erl +++ b/lib/compiler/src/beam_jump.erl @@ -144,13 +144,19 @@ module({Mod,Exp,Attr,Fs0,Lc0}, _Opt) -> %% %% NOTE: This function assumes that there are no labels inside blocks. function({function,Name,Arity,CLabel,Asm0}, Lc0) -> - Asm1 = eliminate_moves(Asm0), - {Asm2,Lc} = insert_labels(Asm1, Lc0, []), - Asm3 = share(Asm2), - Asm4 = move(Asm3), - Asm5 = opt(Asm4, CLabel), - Asm = remove_unused_labels(Asm5), - {{function,Name,Arity,CLabel,Asm},Lc}. + try + Asm1 = eliminate_moves(Asm0), + {Asm2,Lc} = insert_labels(Asm1, Lc0, []), + Asm3 = share(Asm2), + Asm4 = move(Asm3), + Asm5 = opt(Asm4, CLabel), + Asm = remove_unused_labels(Asm5), + {{function,Name,Arity,CLabel,Asm},Lc} + catch + Class:Error:Stack -> + io:fwrite("Function: ~w/~w\n", [Name,Arity]), + erlang:raise(Class, Error, Stack) + end. %%% %%% Scan instructions in execution order and remove redundant 'move' @@ -196,22 +202,19 @@ no_fallthrough([I|_]) -> is_unreachable_after(I). already_has_value(Lit, Lbl, Reg, D) -> - Key = {Lbl,Reg}, case D of - #{Lbl:=unsafe} -> - false; - #{Key:=Lit} -> + #{Lbl:={Reg,Lit}} -> true; #{} -> false end. update_value_dict([Lit,{f,Lbl}|T], Reg, D0) -> - Key = {Lbl,Reg}, D = case D0 of - #{Key := inconsistent} -> D0; - #{Key := _} -> D0#{Key := inconsistent}; - _ -> D0#{Key => Lit} + #{Lbl:=unsafe} -> D0; + #{Lbl:={Reg,Lit}} -> D0; + #{Lbl:=_} -> D0#{Lbl:=unsafe}; + #{} -> D0#{Lbl=>{Reg,Lit}} end, update_value_dict(T, Reg, D); update_value_dict([], _, D) -> D. @@ -407,7 +410,7 @@ find_fixpoint(OptFun, Is0) -> Is -> find_fixpoint(OptFun, Is) end. -opt([{test,_,{f,L}=Lbl,_}=I|[{jump,{f,L}}|_]=Is], Acc0, St0) -> +opt([{test,_,{f,L}=Lbl,_}=I|[{jump,{f,L}}|_]=Is], Acc, St) -> %% We have %% Test Label Ops %% jump Label @@ -416,23 +419,20 @@ opt([{test,_,{f,L}=Lbl,_}=I|[{jump,{f,L}}|_]=Is], Acc0, St0) -> case beam_utils:is_pure_test(I) of false -> %% Test is not pure; we must keep it. - opt(Is, [I|Acc0], label_used(Lbl, St0)); + opt(Is, [I|Acc], label_used(Lbl, St)); true -> %% The test is pure and its failure label is the same %% as in the jump that follows -- thus it is not needed. - %% Check if any of the previous instructions could also be eliminated. - {Acc,St} = opt_useless_loads(Acc0, L, St0), opt(Is, Acc, St) end; -opt([{test,_,{f,L}=Lbl,_}=I|[{label,L}|_]=Is], Acc0, St0) -> +opt([{test,_,{f,L}=Lbl,_}=I|[{label,L}|_]=Is], Acc, St) -> %% Similar to the above, except we have a fall-through rather than jump %% Test Label Ops %% label Label case beam_utils:is_pure_test(I) of false -> - opt(Is, [I|Acc0], label_used(Lbl, St0)); + opt(Is, [I|Acc], label_used(Lbl, St)); true -> - {Acc,St} = opt_useless_loads(Acc0, L, St0), opt(Is, Acc, St) end; opt([{test,Test0,{f,L}=Lbl,Ops}=I|[{jump,To}|Is]=Is0], Acc, St) -> @@ -499,46 +499,6 @@ normalize_replace([{From,To0}|Rest], Replace, Acc) -> normalize_replace([], _Replace, Acc) -> maps:from_list(Acc). -%% After eliminating a test, it might happen, that a register was only used -%% in this test. Let's check if that was the case and if it was so, we can -%% eliminate the load into the register completely. -opt_useless_loads([{block,_}|_]=Is, L, #st{index={lazy,FIs}}=St) -> - opt_useless_loads(Is, L, St#st{index=beam_utils:index_labels(FIs)}); -opt_useless_loads([{block,Block0}|Is], L, #st{index=Index}=St) -> - case opt_useless_block_loads(Block0, L, Index) of - [] -> - opt_useless_loads(Is, L, St); - [_|_]=Block -> - {[{block,Block}|Is],St} - end; -%% After eliminating the test and useless blocks, it might happen, -%% that the previous test could also be eliminated. -%% It might be that the label was already marked as used, even if ultimately, -%% it never will be - we can't do much about it at that point, though -opt_useless_loads([{test,_,{f,L},_}=I|Is], L, St) -> - case beam_utils:is_pure_test(I) of - false -> - {[I|Is],St}; - true -> - opt_useless_loads(Is, L, St) - end; -opt_useless_loads(Is, _L, St) -> - {Is,St}. - -opt_useless_block_loads([{set,[Dst],_,_}=I|Is], L, Index) -> - BlockJump = [{block,Is},{jump,{f,L}}], - case beam_utils:is_killed(Dst, BlockJump, Index) of - true -> - %% The register is killed and not used, we can remove the load - opt_useless_block_loads(Is, L, Index); - false -> - [I|opt_useless_block_loads(Is, L, Index)] - end; -opt_useless_block_loads([I|Is], L, Index) -> - [I|opt_useless_block_loads(Is, L, Index)]; -opt_useless_block_loads([], _L, _Index) -> - []. - collect_labels(Is, Label, #st{entry=Entry,replace=Replace} = St) -> collect_labels_1(Is, Label, Entry, Replace, St). diff --git a/lib/compiler/src/beam_listing.erl b/lib/compiler/src/beam_listing.erl index 8a0ce5b50a..6121593b11 100644 --- a/lib/compiler/src/beam_listing.erl +++ b/lib/compiler/src/beam_listing.erl @@ -66,7 +66,7 @@ module(Stream, [_|_]=Fs) -> foreach(fun (F) -> io:format(Stream, "~p.\n", [F]) end, Fs). format_asm([{label,L}|Is]) -> - [" {label,",integer_to_list(L),"}.\n"|format_asm(Is)]; + [io_lib:format(" {label,~p}.\n", [L])|format_asm(Is)]; format_asm([I|Is]) -> [io_lib:format(" ~p", [I]),".\n"|format_asm(Is)]; format_asm([]) -> []. diff --git a/lib/compiler/src/beam_ssa.erl b/lib/compiler/src/beam_ssa.erl index 1a2e759965..c5e23d2ae0 100644 --- a/lib/compiler/src/beam_ssa.erl +++ b/lib/compiler/src/beam_ssa.erl @@ -198,6 +198,7 @@ no_side_effect(#b_set{op=Op}) -> has_map_field -> true; is_nonempty_list -> true; is_tagged_tuple -> true; + make_fun -> true; put_map -> true; put_list -> true; put_tuple -> true; @@ -578,11 +579,11 @@ used(_) -> []. -spec definitions(Blocks :: block_map()) -> definition_map(). definitions(Blocks) -> - beam_ssa:fold_instrs_rpo(fun(#b_set{ dst = Var }=I, Acc) -> - maps:put(Var, I, Acc); - (_Terminator, Acc) -> - Acc - end, [0], #{}, Blocks). + fold_instrs_rpo(fun(#b_set{ dst = Var }=I, Acc) -> + maps:put(Var, I, Acc); + (_Terminator, Acc) -> + Acc + end, [0], #{}, Blocks). -spec uses(Blocks :: block_map()) -> usage_map(). uses(Blocks) -> @@ -592,7 +593,7 @@ uses(Blocks) -> From :: [label()], Blocks :: block_map(). uses(From, Blocks) -> - beam_ssa:fold_rpo(fun fold_uses_block/3, From, #{}, Blocks). + fold_rpo(fun fold_uses_block/3, From, #{}, Blocks). fold_uses_block(Lbl, #b_blk{is=Is,last=Last}, UseMap0) -> F = fun(I, UseMap) -> @@ -600,7 +601,7 @@ fold_uses_block(Lbl, #b_blk{is=Is,last=Last}, UseMap0) -> Uses0 = maps:get(Var, Acc, []), Uses = [{Lbl, I} | Uses0], maps:put(Var, Uses, Acc) - end, UseMap, beam_ssa:used(I)) + end, UseMap, used(I)) end, F(Last, foldl(F, UseMap0, Is)). @@ -797,8 +798,8 @@ split_blocks_1([L|Ls], P, Blocks0, Count0) -> BefBlk = Blk#b_blk{is=Bef,last=Br}, NewBlk = Blk#b_blk{is=Aft}, Blocks1 = Blocks0#{L:=BefBlk,NewLbl=>NewBlk}, - Successors = beam_ssa:successors(NewBlk), - Blocks = beam_ssa:update_phi_labels(Successors, L, NewLbl, Blocks1), + Successors = successors(NewBlk), + Blocks = update_phi_labels(Successors, L, NewLbl, Blocks1), split_blocks_1([NewLbl|Ls], P, Blocks, Count); no -> split_blocks_1(Ls, P, Blocks0, Count0) diff --git a/lib/compiler/src/beam_ssa_bsm.erl b/lib/compiler/src/beam_ssa_bsm.erl index 2efeb6b5b6..9631bf3334 100644 --- a/lib/compiler/src/beam_ssa_bsm.erl +++ b/lib/compiler/src/beam_ssa_bsm.erl @@ -354,6 +354,11 @@ amb_get_alias(#b_var{}=Arg, Lbl, State) -> error -> {Arg, State} end; +amb_get_alias(#b_remote{mod=Mod0,name=Name0}=Arg0, Lbl, State0) -> + {Mod, State1} = amb_get_alias(Mod0, Lbl, State0), + {Name, State} = amb_get_alias(Name0, Lbl, State1), + Arg = Arg0#b_remote{mod=Mod,name=Name}, + {Arg, State}; amb_get_alias(Arg, _Lbl, State) -> {Arg, State}. @@ -391,15 +396,22 @@ amb_insert_promotions(Blocks0, State) -> Block = maps:get(Lbl, Blocks), Alias = Promotion#b_set.dst, - {Before, After} = splitwith(fun(#b_set{args=Args}) -> - not member(Alias, Args) - end, Block#b_blk.is), + {Before, After} = splitwith( + fun(#b_set{args=Args}) -> + not is_var_in_args(Alias, Args) + end, Block#b_blk.is), Is = Before ++ [Promotion | After], maps:put(Lbl, Block#b_blk{is=Is}, Blocks) end, maps:fold(F, Blocks0, State#amb.promotions). +is_var_in_args(Var, [Var | _]) -> true; +is_var_in_args(Var, [#b_remote{name=Var} | _]) -> true; +is_var_in_args(Var, [#b_remote{mod=Var} | _]) -> true; +is_var_in_args(Var, [_ | Args]) -> is_var_in_args(Var, Args); +is_var_in_args(_Var, []) -> false. + %%% %%% Subpasses %%% @@ -732,6 +744,10 @@ aca_cs_args([Arg | Args], VRs) -> aca_cs_args([], _VRs) -> []. +aca_cs_arg(#b_remote{mod=Mod0,name=Name0}=Rem, VRs) -> + Mod = aca_cs_arg(Mod0, VRs), + Name = aca_cs_arg(Name0, VRs), + Rem#b_remote{mod=Mod,name=Name}; aca_cs_arg(Arg, VRs) -> case VRs of #{ Arg := New } -> New; diff --git a/lib/compiler/src/beam_ssa_codegen.erl b/lib/compiler/src/beam_ssa_codegen.erl index 1c7563faa0..3c14062d0b 100644 --- a/lib/compiler/src/beam_ssa_codegen.erl +++ b/lib/compiler/src/beam_ssa_codegen.erl @@ -231,7 +231,7 @@ need_heap_never(_) -> false. need_heap_blks([{L,#cg_blk{is=Is0}=Blk0}|Bs], H0, Acc) -> {Is1,H1} = need_heap_is(reverse(Is0), H0, []), - {Ns,H} = need_heap_terminator(Bs, H1), + {Ns,H} = need_heap_terminator(Bs, L, H1), Is = Ns ++ Is1, Blk = Blk0#cg_blk{is=Is}, need_heap_blks(Bs, H, [{L,Blk}|Acc]); @@ -241,6 +241,13 @@ need_heap_blks([], H, Acc) -> need_heap_is([#cg_alloc{words=Words}=Alloc0|Is], N, Acc) -> Alloc = Alloc0#cg_alloc{words=add_heap_words(N, Words)}, need_heap_is(Is, #need{}, [Alloc|Acc]); +need_heap_is([#cg_set{anno=Anno,op=bs_init}=I0|Is], N, Acc) -> + Alloc = case need_heap_need(N) of + [#cg_alloc{words=Need}] -> alloc(Need); + [] -> 0 + end, + I = I0#cg_set{anno=Anno#{alloc=>Alloc}}, + need_heap_is(Is, #need{}, [I|Acc]); need_heap_is([#cg_set{op=Op,args=Args}=I|Is], N, Acc) -> case classify_heap_need(Op, Args) of {put,Words} -> @@ -256,11 +263,31 @@ need_heap_is([#cg_set{op=Op,args=Args}=I|Is], N, Acc) -> need_heap_is([], N, Acc) -> {Acc,N}. -need_heap_terminator([{_,#cg_blk{last=#cg_br{succ=Same,fail=Same}}}|_], N) -> +need_heap_terminator([{_,#cg_blk{last=#cg_br{succ=L,fail=L}}}|_], L, N) -> + %% Fallthrough. {[],N}; -need_heap_terminator([{_,#cg_blk{}}|_], N) -> +need_heap_terminator([{_,#cg_blk{is=Is,last=#cg_br{succ=L}}}|_], L, N) -> + case need_heap_need(N) of + [] -> + {[],#need{}}; + [_|_]=Alloc -> + %% If the preceding instructions are a binary construction, + %% hoist the allocation and incorporate into the bs_init + %% instruction. + case reverse(Is) of + [#cg_set{op=succeeded},#cg_set{op=bs_init}|_] -> + {[],N}; + [#cg_set{op=bs_put}|_] -> + {[],N}; + _ -> + %% Not binary construction. Must emit an allocation + %% instruction in this block. + {Alloc,#need{}} + end + end; +need_heap_terminator([{_,#cg_blk{}}|_], _, N) -> {need_heap_need(N),#need{}}; -need_heap_terminator([], H) -> +need_heap_terminator([], _, H) -> {need_heap_need(H),#need{}}. need_heap_need(#need{h=0,f=0}) -> []; @@ -1041,12 +1068,13 @@ cg_block([#cg_set{op=bs_init,dst=Dst0,args=Args0,anno=Anno}=I, #cg_set{op=succeeded,dst=Bool}], {Bool,Fail0}, St) -> Fail = bif_fail(Fail0), Line = line(Anno), + Alloc = map_get(alloc, Anno), [#b_literal{val=Kind}|Args1] = Args0, case Kind of new -> [Dst,Size,{integer,Unit}] = beam_args([Dst0|Args1], St), Live = get_live(I), - {[Line|cg_bs_init(Dst, Size, Unit, Live, Fail)],St}; + {[Line|cg_bs_init(Dst, Size, Alloc, Unit, Live, Fail)],St}; private_append -> [Dst,Src,Bits,{integer,Unit}] = beam_args([Dst0|Args1], St), Flags = {field_flags,[]}, @@ -1056,7 +1084,7 @@ cg_block([#cg_set{op=bs_init,dst=Dst0,args=Args0,anno=Anno}=I, [Dst,Src,Bits,{integer,Unit}] = beam_args([Dst0|Args1], St), Flags = {field_flags,[]}, Live = get_live(I), - Is = [Line,{bs_append,Fail,Bits,0,Live,Unit,Src,Flags,Dst}], + Is = [Line,{bs_append,Fail,Bits,Alloc,Live,Unit,Src,Flags,Dst}], {Is,St} end; cg_block([#cg_set{anno=Anno,op=bs_start_match,dst=Ctx0,args=[Bin0]}=I, @@ -1204,6 +1232,12 @@ cg_copy_1([#cg_set{dst=Dst0,args=Args}|T], St) -> end; cg_copy_1([], _St) -> []. +-define(IS_LITERAL(Val), (Val =:= nil orelse + element(1, Val) =:= integer orelse + element(1, Val) =:= float orelse + element(1, Val) =:= atom orelse + element(1, Val) =:= literal)). + bif_to_test('and', [V1,V2], Fail) -> [{test,is_eq_exact,Fail,[V1,{atom,true}]}, {test,is_eq_exact,Fail,[V2,{atom,true}]}]; @@ -1217,15 +1251,99 @@ bif_to_test('or', [V1,V2], {f,Lbl}=Fail) when Lbl =/= 0 -> bif_to_test('not', [Var], Fail) -> [{test,is_eq_exact,Fail,[Var,{atom,false}]}]; bif_to_test(Name, Args, Fail) -> - [beam_utils:bif_to_test(Name, Args, Fail)]. + [bif_to_test_1(Name, Args, Fail)]. + +bif_to_test_1(is_atom, [_]=Ops, Fail) -> + {test,is_atom,Fail,Ops}; +bif_to_test_1(is_boolean, [_]=Ops, Fail) -> + {test,is_boolean,Fail,Ops}; +bif_to_test_1(is_binary, [_]=Ops, Fail) -> + {test,is_binary,Fail,Ops}; +bif_to_test_1(is_bitstring,[_]=Ops, Fail) -> + {test,is_bitstr,Fail,Ops}; +bif_to_test_1(is_float, [_]=Ops, Fail) -> + {test,is_float,Fail,Ops}; +bif_to_test_1(is_function, [_]=Ops, Fail) -> + {test,is_function,Fail,Ops}; +bif_to_test_1(is_function, [_,_]=Ops, Fail) -> + {test,is_function2,Fail,Ops}; +bif_to_test_1(is_integer, [_]=Ops, Fail) -> + {test,is_integer,Fail,Ops}; +bif_to_test_1(is_list, [_]=Ops, Fail) -> + {test,is_list,Fail,Ops}; +bif_to_test_1(is_map, [_]=Ops, Fail) -> + {test,is_map,Fail,Ops}; +bif_to_test_1(is_number, [_]=Ops, Fail) -> + {test,is_number,Fail,Ops}; +bif_to_test_1(is_pid, [_]=Ops, Fail) -> + {test,is_pid,Fail,Ops}; +bif_to_test_1(is_port, [_]=Ops, Fail) -> + {test,is_port,Fail,Ops}; +bif_to_test_1(is_reference, [_]=Ops, Fail) -> + {test,is_reference,Fail,Ops}; +bif_to_test_1(is_tuple, [_]=Ops, Fail) -> + {test,is_tuple,Fail,Ops}; +bif_to_test_1('=<', [A,B], Fail) -> + {test,is_ge,Fail,[B,A]}; +bif_to_test_1('>', [A,B], Fail) -> + {test,is_lt,Fail,[B,A]}; +bif_to_test_1('<', [_,_]=Ops, Fail) -> + {test,is_lt,Fail,Ops}; +bif_to_test_1('>=', [_,_]=Ops, Fail) -> + {test,is_ge,Fail,Ops}; +bif_to_test_1('==', [C,A], Fail) when ?IS_LITERAL(C) -> + {test,is_eq,Fail,[A,C]}; +bif_to_test_1('==', [_,_]=Ops, Fail) -> + {test,is_eq,Fail,Ops}; +bif_to_test_1('/=', [C,A], Fail) when ?IS_LITERAL(C) -> + {test,is_ne,Fail,[A,C]}; +bif_to_test_1('/=', [_,_]=Ops, Fail) -> + {test,is_ne,Fail,Ops}; +bif_to_test_1('=:=', [C,A], Fail) when ?IS_LITERAL(C) -> + {test,is_eq_exact,Fail,[A,C]}; +bif_to_test_1('=:=', [_,_]=Ops, Fail) -> + {test,is_eq_exact,Fail,Ops}; +bif_to_test_1('=/=', [C,A], Fail) when ?IS_LITERAL(C) -> + {test,is_ne_exact,Fail,[A,C]}; +bif_to_test_1('=/=', [_,_]=Ops, Fail) -> + {test,is_ne_exact,Fail,Ops}. opt_call_moves(Is0, Arity) -> {Moves0,Is} = splitwith(fun({move,_,_}) -> true; + ({kill,_}) -> true; (_) -> false end, Is0), Moves = opt_call_moves_1(Moves0, Arity), Moves ++ Is. +opt_call_moves_1([{move,Src,{x,_}=Tmp}=M1|[{kill,_}|_]=Is], Arity) -> + %% There could be a {move,Tmp,{x,0}} instruction after the + %% kill/1 instructions (moved to there by opt_move_to_x0/1). + case splitwith(fun({kill,_}) -> true; + (_) -> false + end, Is) of + {Kills,[{move,{x,_}=Tmp,{x,0}}=M2]} -> + %% The two move/2 instructions (M1 and M2) can be combined + %% to one. The question is, though, is it safe to place + %% them after the kill/1 instructions? + case is_killed(Src, Kills, Arity) of + true -> + %% Src (a Y register) is killed by one of the + %% kill/1 instructions. Thus M1 and M2 + %% must be placed before the kill/1 instructions + %% (essentially undoing what opt_move_to_x0/1 + %% did, which turned out to be a pessimization + %% in this case). + opt_call_moves_1([M1,M2|Kills], Arity); + false -> + %% Src is not killed by any of the kill/1 + %% instructions. Thus it is safe to place + %% M1 and M2 after the kill/1 instructions. + opt_call_moves_1(Kills++[M1,M2], Arity) + end; + {_,_} -> + [M1|Is] + end; opt_call_moves_1([{move,Src,{x,_}=Tmp}=M1,{move,Tmp,Dst}=M2|Is], Arity) -> case is_killed(Tmp, Is, Arity) of true -> @@ -1239,6 +1357,10 @@ opt_call_moves_1([M|Ms], Arity) -> [M|opt_call_moves_1(Ms, Arity)]; opt_call_moves_1([], _Arity) -> []. +is_killed(Y, [{kill,Y}|_], _) -> + true; +is_killed(R, [{kill,_}|Is], Arity) -> + is_killed(R, Is, Arity); is_killed(R, [{move,R,_}|_], _) -> false; is_killed(R, [{move,_,R}|_], _) -> @@ -1246,7 +1368,9 @@ is_killed(R, [{move,_,R}|_], _) -> is_killed(R, [{move,_,_}|Is], Arity) -> is_killed(R, Is, Arity); is_killed({x,X}, [], Arity) -> - X >= Arity. + X >= Arity; +is_killed({y,_}, [], _) -> + false. cg_alloc(#cg_alloc{stack=none,words=#need{h=0,f=0}}, _St) -> []; @@ -1527,13 +1651,13 @@ cg_bs_put(Fail, [{atom,Type},{literal,Flags}|Args]) -> [{Op,Fail,{field_flags,Flags},Src}] end. -cg_bs_init(Dst, Size0, Unit, Live, Fail) -> +cg_bs_init(Dst, Size0, Alloc, Unit, Live, Fail) -> Op = case Unit of 1 -> bs_init_bits; 8 -> bs_init2 end, Size = cg_bs_init_size(Size0), - [{Op,Fail,Size,0,Live,{field_flags,[]},Dst}]. + [{Op,Fail,Size,Alloc,Live,{field_flags,[]},Dst}]. cg_bs_init_size({x,_}=R) -> R; cg_bs_init_size({y,_}=R) -> R; @@ -1652,12 +1776,41 @@ phi_copies([#b_set{dst=Dst,args=PhiArgs}|Sets], L) -> [#cg_set{op=copy,dst=Dst,args=CopyArgs}|phi_copies(Sets, L)]; phi_copies([], _) -> []. +%% opt_move_to_x0([Instruction]) -> [Instruction]. +%% Simple peep-hole optimization to move a {move,Any,{x,0}} past +%% any kill up to the next call instruction. (To give the loader +%% an opportunity to combine the 'move' and the 'call' instructions.) + +opt_move_to_x0(Moves) -> + opt_move_to_x0(Moves, []). + +opt_move_to_x0([{move,_,{x,0}}=I|Is0], Acc0) -> + case move_past_kill(Is0, I, Acc0) of + impossible -> opt_move_to_x0(Is0, [I|Acc0]); + {Is,Acc} -> opt_move_to_x0(Is, Acc) + end; +opt_move_to_x0([I|Is], Acc) -> + opt_move_to_x0(Is, [I|Acc]); +opt_move_to_x0([], Acc) -> reverse(Acc). + +move_past_kill([{kill,Src}|_], {move,Src,_}, _) -> + impossible; +move_past_kill([{kill,_}=I|Is], Move, Acc) -> + move_past_kill(Is, Move, [I|Acc]); +move_past_kill(Is, Move, Acc) -> + {Is,[Move|Acc]}. + %% setup_args(Args, Anno, Context) -> [Instruction]. %% setup_args(Args) -> [Instruction]. %% Set up X registers for a call. setup_args(Args, Anno, none, St) -> - setup_args(Args) ++ kill_yregs(Anno, St); + case {setup_args(Args),kill_yregs(Anno, St)} of + {Moves,[]} -> + Moves; + {Moves,Kills} -> + opt_move_to_x0(Moves ++ Kills) + end; setup_args(Args, _, _, _) -> setup_args(Args). diff --git a/lib/compiler/src/beam_ssa_dead.erl b/lib/compiler/src/beam_ssa_dead.erl index c20652580d..067d9a6741 100644 --- a/lib/compiler/src/beam_ssa_dead.erl +++ b/lib/compiler/src/beam_ssa_dead.erl @@ -135,7 +135,8 @@ shortcut_terminator(Last, _Is, _Bs, _St) -> Last. shortcut_switch([{Lit,L0}|T], Bool, Bs, St0) -> - St = St0#st{rel_op=normalize_op({bif,'=:='}, [Bool,Lit])}, + RelOp = {'=:=',Bool,Lit}, + St = St0#st{rel_op=RelOp}, #b_br{bool=#b_literal{val=true},succ=L} = shortcut(L0, bind_var(Bool, Lit, Bs), St#st{target=one_way}), [{Lit,L}|shortcut_switch(T, Bool, Bs, St0)]; @@ -388,41 +389,43 @@ eval_terminator(#b_switch{arg=Arg,fail=Fail,list=List}=Sw, Bs, St) -> %% Literal argument. Simplify to a `br`. beam_ssa:normalize(Sw#b_switch{arg=Val}); #b_var{} -> - case St of - #st{rel_op=none} -> - %% No previous relational operator is stored. - %% Give up. + %% Try optimizing the switch. + case eval_switch(List, Arg, St, Fail) of + none -> none; - #st{} -> - %% There is a previous relational operator stored. - %% Try optimizing the switch. - case eval_switch(List, Arg, St, Fail) of - none -> - none; - To when is_integer(To) -> - %% Either one of the values in the switch - %% matched a previous value in a '=:=' test, or - %% none of the values matched a previous test. - #b_br{bool=#b_literal{val=true},succ=To,fail=To} - end + To when is_integer(To) -> + %% Either one of the values in the switch + %% matched a previous value in a '=:=' test, or + %% none of the values matched a previous test. + #b_br{bool=#b_literal{val=true},succ=To,fail=To} end end; eval_terminator(#b_ret{}, _Bs, _St) -> none. -eval_switch([{Lit,Lbl}|T], Arg, St, Fail) -> - case eval_rel_op({bif,'=:='}, [Arg,Lit], St) of - none -> - %% This label could be reached. - eval_switch(T, Arg, St, none); - #b_literal{val=false} -> - %% This branch will never be taken. - eval_switch(T, Arg, St, Fail); - #b_literal{val=true} -> +eval_switch(List, Arg, #st{rel_op={_,Arg,_}=PrevOp}, Fail) -> + %% There is a previous relational operator testing the same variable. + %% Optimization may be possible. + eval_switch_1(List, Arg, PrevOp, Fail); +eval_switch(_, _, _, _) -> + %% There is either no previous relational operator, or it tests + %% a different variable. Nothing to optimize. + none. + +eval_switch_1([{Lit,Lbl}|T], Arg, PrevOp, Fail) -> + RelOp = {'=:=',Arg,Lit}, + case will_succeed(PrevOp, RelOp) of + yes -> %% Success. This branch will always be taken. - Lbl + Lbl; + no -> + %% This branch will never be taken. + eval_switch_1(T, Arg, PrevOp, Fail); + maybe -> + %% This label could be reached. + eval_switch_1(T, Arg, PrevOp, none) end; -eval_switch([], _Arg, _St, Fail) -> +eval_switch_1([], _Arg, _PrevOp, Fail) -> %% Fail is now either the failure label or 'none'. Fail. diff --git a/lib/compiler/src/beam_ssa_funs.erl b/lib/compiler/src/beam_ssa_funs.erl new file mode 100644 index 0000000000..38df50fd74 --- /dev/null +++ b/lib/compiler/src/beam_ssa_funs.erl @@ -0,0 +1,149 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2018. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% + +%%% +%%% If a fun is defined locally and only used for calls, it can be replaced +%%% with direct calls to the relevant function. This greatly speeds up "named +%%% functions" (which rely on make_fun to recreate themselves) and macros that +%%% wrap their body in a fun. +%%% + +-module(beam_ssa_funs). + +-export([module/2]). + +-include("beam_ssa.hrl"). + +-import(lists, [foldl/3]). + +-spec module(Module, Options) -> Result when + Module :: beam_ssa:b_module(), + Options :: [compile:option()], + Result :: {ok, beam_ssa:b_module()}. + +module(#b_module{body=Fs0}=Module, _Opts) -> + Trampolines = foldl(fun find_trampolines/2, #{}, Fs0), + Fs = [lfo(F, Trampolines) || F <- Fs0], + {ok, Module#b_module{body=Fs}}. + +%% If a function does absolutely nothing beyond calling another function with +%% the same arguments in the same order, we can shave off a call by short- +%% circuiting it. +find_trampolines(#b_function{args=Args,bs=Blocks}=F, Trampolines) -> + case maps:get(0, Blocks) of + #b_blk{is=[#b_set{op=call, + args=[#b_local{}=Actual | Args], + dst=Dst}], + last=#b_ret{arg=Dst}} -> + {_, Name, Arity} = beam_ssa:get_anno(func_info, F), + Trampoline = #b_local{name=#b_literal{val=Name},arity=Arity}, + maps:put(Trampoline, Actual, Trampolines); + _ -> + Trampolines + end. + +lfo(#b_function{bs=Blocks0}=F, Trampolines) -> + Linear0 = beam_ssa:linearize(Blocks0), + Linear = lfo_optimize(Linear0, lfo_analyze(Linear0, #{}), Trampolines), + F#b_function{bs=maps:from_list(Linear)}. + +%% Gather a map of the locally defined funs that are only used for calls. +lfo_analyze([{_L,#b_blk{is=Is,last=Last}}|Bs], LFuns0) -> + LFuns = lfo_analyze_last(Last, lfo_analyze_is(Is, LFuns0)), + lfo_analyze(Bs, LFuns); +lfo_analyze([], LFuns) -> + LFuns. + +lfo_analyze_is([#b_set{op=make_fun, + dst=Dst, + args=[#b_local{} | FreeVars]}=Def | Is], + LFuns0) -> + LFuns = maps:put(Dst, Def, maps:without(FreeVars, LFuns0)), + lfo_analyze_is(Is, LFuns); +lfo_analyze_is([#b_set{op=call, + args=[Fun | CallArgs]} | Is], + LFuns) when is_map_key(Fun, LFuns) -> + #b_set{args=[#b_local{arity=Arity} | FreeVars]} = maps:get(Fun, LFuns), + case length(CallArgs) + length(FreeVars) of + Arity -> + lfo_analyze_is(Is, maps:without(CallArgs, LFuns)); + _ -> + %% This will `badarity` at runtime, and it's easier to disable the + %% optimization than to simulate it. + lfo_analyze_is(Is, maps:without([Fun | CallArgs], LFuns)) + end; +lfo_analyze_is([#b_set{args=Args} | Is], LFuns) when map_size(LFuns) =/= 0 -> + %% We disqualify funs that are used outside calls because this forces them + %% to be created anyway, and the slight performance gain from direct calls + %% is not enough to offset the potential increase in stack frame size (the + %% free variables need to be kept alive until the call). + %% + %% This is also a kludge to make HiPE work, as the latter will generate + %% code with the assumption that the functions referenced in a make_fun + %% will only be used by funs, which will not be the case if we mix it with + %% direct calls. See cerl_cconv.erl for details. + %% + %% Future optimizations like delaying fun creation until use may require us + %% to copy affected functions so that HiPE gets its own to play with (until + %% HiPE is fixed anyway). + lfo_analyze_is(Is, maps:without(Args, LFuns)); +lfo_analyze_is([_ | Is], LFuns) -> + lfo_analyze_is(Is, LFuns); +lfo_analyze_is([], LFuns) -> + LFuns. + +lfo_analyze_last(#b_switch{arg=Arg}, LFuns) -> + maps:remove(Arg, LFuns); +lfo_analyze_last(#b_ret{arg=Arg}, LFuns) -> + maps:remove(Arg, LFuns); +lfo_analyze_last(_, LFuns) -> + LFuns. + +%% Replace all calls of suitable funs with a direct call to their +%% implementation. Liveness optimization will get rid of the make_fun +%% instruction. +lfo_optimize(Linear, LFuns, _Trampolines) when map_size(LFuns) =:= 0 -> + Linear; +lfo_optimize(Linear, LFuns, Trampolines) -> + lfo_optimize_1(Linear, LFuns, Trampolines). + +lfo_optimize_1([{L,#b_blk{is=Is0}=Blk}|Bs], LFuns, Trampolines) -> + Is = lfo_optimize_is(Is0, LFuns, Trampolines), + [{L,Blk#b_blk{is=Is}} | lfo_optimize_1(Bs, LFuns, Trampolines)]; +lfo_optimize_1([], _LFuns, _Trampolines) -> + []. + +lfo_optimize_is([#b_set{op=call, + args=[Fun | CallArgs]}=Call0 | Is], + LFuns, Trampolines) when is_map_key(Fun, LFuns) -> + #b_set{args=[Local | FreeVars]} = maps:get(Fun, LFuns), + Args = [lfo_short_circuit(Local, Trampolines) | CallArgs ++ FreeVars], + Call = beam_ssa:add_anno(local_fun_opt, Fun, Call0#b_set{args=Args}), + [Call | lfo_optimize_is(Is, LFuns, Trampolines)]; +lfo_optimize_is([I | Is], LFuns, Trampolines) -> + [I | lfo_optimize_is(Is, LFuns, Trampolines)]; +lfo_optimize_is([], _LFuns, _Trampolines) -> + []. + +lfo_short_circuit(Call, Trampolines) -> + case maps:find(Call, Trampolines) of + {ok, Other} -> lfo_short_circuit(Other, Trampolines); + error -> Call + end. diff --git a/lib/compiler/src/beam_ssa_pre_codegen.erl b/lib/compiler/src/beam_ssa_pre_codegen.erl index 36137ef046..c60c6da9ea 100644 --- a/lib/compiler/src/beam_ssa_pre_codegen.erl +++ b/lib/compiler/src/beam_ssa_pre_codegen.erl @@ -131,6 +131,10 @@ passes(Opts) -> ?PASS(find_yregs), ?PASS(reserve_yregs), + %% Handle legacy binary match instruction that don't + %% accept a Y register as destination. + ?PASS(legacy_bs), + %% Improve reuse of Y registers to potentially %% reduce the size of the stack frame. ?PASS(copy_retval), @@ -612,6 +616,59 @@ bs_subst_ctx(#b_var{}=Var, CtxChain) -> bs_subst_ctx(Other, _CtxChain) -> Other. +%% legacy_bs(St0) -> St. +%% Binary matching instructions in OTP 21 and earlier don't support +%% a Y register as destination. If St#st.use_bsm3 is false, +%% we will need to rewrite those instructions so that the result +%% is first put in an X register and then moved to a Y register +%% if the operation succeeded. + +legacy_bs(#st{use_bsm3=false,ssa=Blocks0,cnt=Count0,res=Res}=St) -> + IsYreg = maps:from_list([{V,true} || {V,{y,_}} <- Res]), + Linear0 = beam_ssa:linearize(Blocks0), + {Linear,Count} = legacy_bs(Linear0, IsYreg, Count0, #{}, []), + Blocks = maps:from_list(Linear), + St#st{ssa=Blocks,cnt=Count}; +legacy_bs(#st{use_bsm3=true}=St) -> St. + +legacy_bs([{L,Blk}|Bs], IsYreg, Count0, Copies0, Acc) -> + #b_blk{is=Is0,last=Last} = Blk, + Is1 = case Copies0 of + #{L:=Copy} -> [Copy|Is0]; + #{} -> Is0 + end, + {Is,Count,Copies} = legacy_bs_is(Is1, Last, IsYreg, Count0, Copies0, []), + legacy_bs(Bs, IsYreg, Count, Copies, [{L,Blk#b_blk{is=Is}}|Acc]); +legacy_bs([], _IsYreg, Count, _Copies, Acc) -> + {Acc,Count}. + +legacy_bs_is([#b_set{op=Op,dst=Dst}=I0, + #b_set{op=succeeded,dst=SuccDst,args=[Dst]}=SuccI0], + Last, IsYreg, Count0, Copies0, Acc) -> + NeedsFix = is_map_key(Dst, IsYreg) andalso + case Op of + bs_get -> true; + bs_init -> true; + _ -> false + end, + case NeedsFix of + true -> + TempDst = #b_var{name={'@bs_temp_dst',Count0}}, + Count = Count0 + 1, + I = I0#b_set{dst=TempDst}, + SuccI = SuccI0#b_set{args=[TempDst]}, + Copy = #b_set{op=copy,dst=Dst,args=[TempDst]}, + #b_br{bool=SuccDst,succ=SuccL} = Last, + Copies = Copies0#{SuccL=>Copy}, + legacy_bs_is([], Last, IsYreg, Count, Copies, [SuccI,I|Acc]); + false -> + legacy_bs_is([], Last, IsYreg, Count0, Copies0, [SuccI0,I0|Acc]) + end; +legacy_bs_is([I|Is], Last, IsYreg, Count, Copies, Acc) -> + legacy_bs_is(Is, Last, IsYreg, Count, Copies, [I|Acc]); +legacy_bs_is([], _Last, _IsYreg, Count, Copies, Acc) -> + {reverse(Acc),Count,Copies}. + %% sanitize(St0) -> St. %% Remove constructs that can cause problems later: %% @@ -652,21 +709,24 @@ sanitize([], Count, Blocks0, Values) -> false -> remove_unreachable(Ls, Blocks, Reachable, []) end,Count}. -sanitize_is([#b_set{op=get_map_element, - args=[#b_literal{}=Map,Key]}=I0|Is], - Count0, Values, _Changed, Acc) -> - {MapVar,Count} = new_var('@ssa_map', Count0), - I = I0#b_set{args=[MapVar,Key]}, - Copy = #b_set{op=copy,dst=MapVar,args=[Map]}, - sanitize_is(Is, Count, Values, true, [I,Copy|Acc]); +sanitize_is([#b_set{op=get_map_element,args=Args0}=I0|Is], + Count0, Values, Changed, Acc) -> + case sanitize_args(Args0, Values) of + [#b_literal{}=Map,Key] -> + %% Bind the literal map to a variable. + {MapVar,Count} = new_var('@ssa_map', Count0), + I = I0#b_set{args=[MapVar,Key]}, + Copy = #b_set{op=copy,dst=MapVar,args=[Map]}, + sanitize_is(Is, Count, Values, true, [I,Copy|Acc]); + [_,_]=Args0 -> + sanitize_is(Is, Count0, Values, Changed, [I0|Acc]); + [_,_]=Args -> + I = I0#b_set{args=Args}, + sanitize_is(Is, Count0, Values, Changed, [I|Acc]) + end; sanitize_is([#b_set{op=Op,dst=Dst,args=Args0}=I0|Is0], - Count, Values, Changed, Acc) -> - Args = map(fun(Var) -> - case Values of - #{Var:=New} -> New; - #{} -> Var - end - end, Args0), + Count, Values, Changed0, Acc) -> + Args = sanitize_args(Args0, Values), case sanitize_instr(Op, Args, I0) of {value,Value0} -> Value = #b_literal{val=Value0}, @@ -674,7 +734,9 @@ sanitize_is([#b_set{op=Op,dst=Dst,args=Args0}=I0|Is0], {ok,I} -> sanitize_is(Is0, Count, Values, true, [I|Acc]); ok -> - sanitize_is(Is0, Count, Values, Changed, [I0|Acc]) + I = I0#b_set{args=Args}, + Changed = Changed0 orelse Args =/= Args0, + sanitize_is(Is0, Count, Values, Changed, [I|Acc]) end; sanitize_is([], Count, Values, Changed, Acc) -> case Changed of @@ -684,6 +746,14 @@ sanitize_is([], Count, Values, Changed, Acc) -> no_change end. +sanitize_args(Args, Values) -> + map(fun(Var) -> + case Values of + #{Var:=New} -> New; + #{} -> Var + end + end, Args). + sanitize_instr({bif,Bif}, [#b_literal{val=Lit}], _I) -> case erl_bifs:is_pure(erlang, Bif, 1) of false -> @@ -973,11 +1043,11 @@ need_frame_1([#b_set{op=call,args=[Func|_]}|Is], Context) -> #b_remote{} -> %% This is an apply(), which always needs a frame. true; - #b_var{} -> - %% A fun call always needs a frame. - true; + #b_local{} -> + Context =:= body orelse Is =/= []; _ -> - Context =:= body orelse Is =/= [] + %% A fun call always needs a frame. + true end; need_frame_1([I|Is], Context) -> beam_ssa:clobbers_xregs(I) orelse need_frame_1(Is, Context); @@ -2141,7 +2211,13 @@ linear_scan(#st{intervals=Intervals0,res=Res}=St0) -> Free = init_free(maps:to_list(Res)), Intervals1 = [init_interval(Int, Res) || Int <- Intervals0], Intervals = sort(Intervals1), - IsReserved = fun (#i{reg=Reg}) -> Reg =/= none end, + IsReserved = fun(#i{reg=Reg}) -> + case Reg of + none -> false; + {prefer,{_,_}} -> false; + {_,_} -> true + end + end, {UnhandledRes,Unhandled} = partition(IsReserved, Intervals), L = #l{unhandled_res=UnhandledRes, unhandled_any=Unhandled,free=Free}, diff --git a/lib/compiler/src/beam_ssa_type.erl b/lib/compiler/src/beam_ssa_type.erl index 18e6e73a46..95fc3bb0e9 100644 --- a/lib/compiler/src/beam_ssa_type.erl +++ b/lib/compiler/src/beam_ssa_type.erl @@ -27,9 +27,10 @@ -define(UNICODE_INT, #t_integer{elements={0,16#10FFFF}}). --record(d, {ds :: #{beam_ssa:var_name():=beam_ssa:b_set()}, +-record(d, {ds :: #{beam_ssa:b_var():=beam_ssa:b_set()}, ls :: #{beam_ssa:label():=type_db()}, - sub :: #{beam_ssa:var_name():=beam_ssa:value()} + once :: cerl_sets:set(beam_ssa:b_var()), + sub :: #{beam_ssa:b_var():=beam_ssa:value()} }). -define(ATOM_SET_SIZE, 5). @@ -56,13 +57,15 @@ Block :: beam_ssa:b_blk(). opt(Linear, Args) -> + UsedOnce = used_once(Linear, Args), Ts = maps:from_list([{V,any} || #b_var{}=V <- Args]), FakeCall = #b_set{op=call,args=[#b_remote{mod=#b_literal{val=unknown}, name=#b_literal{val=unknown}, arity=0}]}, Defs = maps:from_list([{Var,FakeCall#b_set{dst=Var}} || #b_var{}=Var <- Args]), - D = #d{ds=Defs,ls=#{0=>Ts},sub=#{}}, + D = #d{ds=Defs,ls=#{0=>Ts,?BADARG_BLOCK=>#{}}, + once=UsedOnce,sub=#{}}, opt_1(Linear, D). opt_1([{L,Blk}|Bs], #d{ls=Ls}=D) -> @@ -425,16 +428,43 @@ opt_terminator(#b_ret{}=Ret, _Ts, _Ds) -> Ret. update_successors(#b_br{bool=#b_literal{val=true},succ=S}, Ts, D) -> update_successor(S, Ts, D); -update_successors(#b_br{bool=#b_var{}=Bool,succ=Succ,fail=Fail}, Ts, D0) -> - D = update_successor_bool(Bool, false, Fail, Ts, D0), - SuccTs = infer_types(Bool, Ts, D0), - update_successor_bool(Bool, true, Succ, SuccTs, D); -update_successors(#b_switch{arg=#b_var{}=V,fail=Fail,list=List}, Ts, D0) -> - D = update_successor(Fail, Ts, D0), - foldl(fun({Val,S}, A) -> - T = get_type(Val, Ts), - update_successor(S, Ts#{V=>T}, A) - end, D, List); +update_successors(#b_br{bool=#b_var{}=Bool,succ=Succ,fail=Fail}, Ts0, D0) -> + case cerl_sets:is_element(Bool, D0#d.once) of + true -> + %% This variable is defined in this block and is only + %% referenced by this br terminator. Therefore, there is + %% no need to include the type database passed on to the + %% successors of this block. + Ts = maps:remove(Bool, Ts0), + D = update_successor(Fail, Ts, D0), + SuccTs = infer_types(Bool, Ts, D0), + update_successor(Succ, SuccTs, D); + false -> + D = update_successor_bool(Bool, false, Fail, Ts0, D0), + SuccTs = infer_types(Bool, Ts0, D0), + update_successor_bool(Bool, true, Succ, SuccTs, D) + end; +update_successors(#b_switch{arg=#b_var{}=V,fail=Fail,list=List}, Ts0, D0) -> + case cerl_sets:is_element(V, D0#d.once) of + true -> + %% This variable is defined in this block and is only + %% referenced by this switch terminator. Therefore, there is + %% no need to include the type database passed on to the + %% successors of this block. + Ts = maps:remove(V, Ts0), + D = update_successor(Fail, Ts, D0), + F = fun({_Val,S}, A) -> + update_successor(S, Ts, A) + end, + foldl(F, D, List); + false -> + D = update_successor(Fail, Ts0, D0), + F = fun({Val,S}, A) -> + T = get_type(Val, Ts0), + update_successor(S, Ts0#{V=>T}, A) + end, + foldl(F, D, List) + end; update_successors(#b_ret{}, _Ts, D) -> D. update_successor_bool(#b_var{}=Var, BoolValue, S, Ts, D) -> @@ -447,6 +477,11 @@ update_successor_bool(#b_var{}=Var, BoolValue, S, Ts, D) -> update_successor(S, Ts, D) end. +update_successor(?BADARG_BLOCK, _Ts, #d{}=D) -> + %% We KNOW that no variables are used in the ?BADARG_BLOCK, + %% so there is no need to update the type information. That + %% can be a huge timesaver for huge functions. + D; update_successor(S, Ts0, #d{ls=Ls}=D) -> case Ls of #{S:=Ts1} -> @@ -766,6 +801,48 @@ simplify_not(#b_br{bool=#b_var{}=V,succ=Succ,fail=Fail}=Br0, Ts, Ds) -> Br0 end. +%%% +%%% Calculate the set of variables that are only used once in the +%%% block that they are defined in. That will allow us to discard type +%%% information for variables that will never be referenced by the +%%% successor blocks, potentially improving compilation times. +%%% + +used_once(Linear, Args) -> + Map0 = used_once_1(reverse(Linear), #{}), + Map = maps:without(Args, Map0), + cerl_sets:from_list(maps:keys(Map)). + +used_once_1([{L,#b_blk{is=Is,last=Last}}|Bs], Uses0) -> + Uses = used_once_2([Last|reverse(Is)], L, Uses0), + used_once_1(Bs, Uses); +used_once_1([], Uses) -> Uses. + +used_once_2([I|Is], L, Uses0) -> + Uses = used_once_uses(beam_ssa:used(I), L, Uses0), + case I of + #b_set{dst=Dst} -> + case Uses of + #{Dst:=[L]} -> + used_once_2(Is, L, Uses); + #{} -> + used_once_2(Is, L, maps:remove(Dst, Uses)) + end; + _ -> + used_once_2(Is, L, Uses) + end; +used_once_2([], _, Uses) -> Uses. + +used_once_uses([V|Vs], L, Uses) -> + case Uses of + #{V:=Us} -> + used_once_uses(Vs, L, Uses#{V:=[L|Us]}); + #{} -> + used_once_uses(Vs, L, Uses#{V=>[L]}) + end; +used_once_uses([], _, Uses) -> Uses. + + get_types(Values, Ts) -> [get_type(Val, Ts) || Val <- Values]. -spec get_type(beam_ssa:value(), type_db()) -> type(). diff --git a/lib/compiler/src/beam_trim.erl b/lib/compiler/src/beam_trim.erl index 1acbedd45b..51ff580a7a 100644 --- a/lib/compiler/src/beam_trim.erl +++ b/lib/compiler/src/beam_trim.erl @@ -21,12 +21,11 @@ -module(beam_trim). -export([module/2]). --import(lists, [reverse/1,reverse/2,splitwith/2,sort/1]). +-import(lists, [any/2,member/2,reverse/1,reverse/2,splitwith/2,sort/1]). -record(st, - {safe :: gb_sets:set(beam_asm:label()), %Safe labels. - lbl :: beam_utils:code_index() %Code at each label. - }). + {safe :: cerl_sets:set(beam_asm:label()) %Safe labels. + }). -spec module(beam_utils:module_code(), [compile:option()]) -> {'ok',beam_utils:module_code()}. @@ -36,10 +35,15 @@ module({Mod,Exp,Attr,Fs0,Lc}, _Opts) -> {ok,{Mod,Exp,Attr,Fs,Lc}}. function({function,Name,Arity,CLabel,Is0}) -> - %%ok = io:fwrite("~w: ~p\n", [?LINE,{Name,Arity}]), - St = #st{safe=safe_labels(Is0, []),lbl=beam_utils:index_labels(Is0)}, - Is = trim(Is0, St, []), - {function,Name,Arity,CLabel,Is}. + try + St = #st{safe=safe_labels(Is0, [])}, + Is = trim(Is0, St, []), + {function,Name,Arity,CLabel,Is} + catch + Class:Error:Stack -> + io:fwrite("Function: ~w/~w\n", [Name,Arity]), + erlang:raise(Class, Error, Stack) + end. trim([{kill,_}|_]=Is0, St, Acc) -> {Kills0,Is1} = splitwith(fun({kill,_}) -> true; @@ -47,14 +51,33 @@ trim([{kill,_}|_]=Is0, St, Acc) -> end, Is0), Kills = sort(Kills0), try - {FrameSize,Layout} = frame_layout(Is1, Kills, St), - Configs = trim_instructions(Layout), - try_remap(Configs, Is1, FrameSize) - of + %% Find out the size and layout of the stack frame. + %% Example of a layout: + %% + %% [{kill,{y,0}},{dead,{y,1},{live,{y,2}},{kill,{y,3}}] + %% + %% That means that y0 and y3 are to be killed, that y1 + %% has been killed previously, and that y2 is live. + {FrameSize,Layout} = frame_layout(Is1, Kills, St), + + %% Calculate all recipes that are not worse in terms + %% of estimated execution time. The recipes are ordered + %% in descending order from how much they trim. + Recipes = trim_recipes(Layout), + + %% Try the recipes in order. A recipe may not work out because + %% a register that was previously killed may be + %% resurrected. If that happens, the next recipe, which trims + %% less, will be tried. + try_remap(Recipes, Is1, FrameSize) + of {Is,TrimInstr} -> + %% One of the recipes was applied. trim(Is, St, reverse(TrimInstr)++Acc) catch not_possible -> + %% No recipe worked out. Use the original kill + %% instructions. trim(Is1, St, reverse(Kills, Acc)) end; trim([I|Is], St, Acc) -> @@ -62,34 +85,42 @@ trim([I|Is], St, Acc) -> trim([], _, Acc) -> reverse(Acc). -%% trim_instructions([{kill,R}|{live,R}|{dead,R}]) -> {[Instruction],MapFun} -%% Figure out the sequence of moves and trim to use. +%% trim_recipes([{kill,R}|{live,R}|{dead,R}]) -> [Recipe]. +%% Recipe = {Kills,NumberToTrim,Moves} +%% Kills = [{kill,Y}] +%% Moves = [{move,SrcY,DstY}] +%% +%% Calculate how to best trim the stack and kill the correct +%% Y registers. Return a list of possible recipes. The best +%% recipe (the one that trims the most) is first in the list. +%% All of the recipes are no worse in estimated execution time +%% than the original sequences of kill instructions. -trim_instructions(Layout) -> +trim_recipes(Layout) -> Cost = length([I || {kill,_}=I <- Layout]), - trim_instructions_1(Layout, 0, [], {Cost,[]}). + trim_recipes_1(Layout, 0, [], {Cost,[]}). -trim_instructions_1([{kill,{y,Trim0}}|Ks], Trim0, Moves, Config0) -> +trim_recipes_1([{kill,{y,Trim0}}|Ks], Trim0, Moves, Recipes0) -> Trim = Trim0 + 1, - Config = save_config(Ks, Trim, Moves, Config0), - trim_instructions_1(Ks, Trim, Moves, Config); -trim_instructions_1([{dead,{y,Trim0}}|Ks], Trim0, Moves, Config0) -> + Recipes = save_recipe(Ks, Trim, Moves, Recipes0), + trim_recipes_1(Ks, Trim, Moves, Recipes); +trim_recipes_1([{dead,{y,Trim0}}|Ks], Trim0, Moves, Recipes0) -> Trim = Trim0 + 1, - Config = save_config(Ks, Trim, Moves, Config0), - trim_instructions_1(Ks, Trim, Moves, Config); -trim_instructions_1([{live,{y,Trim0}=Src}|Ks0], Trim0, Moves0, Config0) -> + Recipes = save_recipe(Ks, Trim, Moves, Recipes0), + trim_recipes_1(Ks, Trim, Moves, Recipes); +trim_recipes_1([{live,{y,Trim0}=Src}|Ks0], Trim0, Moves0, Recipes0) -> case take_last_dead(Ks0) of none -> - {_,ConfigList} = Config0, - ConfigList; + {_,RecipesList} = Recipes0, + RecipesList; {Dst,Ks} -> Trim = Trim0 + 1, Moves = [{move,Src,Dst}|Moves0], - Config = save_config(Ks, Trim, Moves, Config0), - trim_instructions_1(Ks, Trim, Moves, Config) + Recipes = save_recipe(Ks, Trim, Moves, Recipes0), + trim_recipes_1(Ks, Trim, Moves, Recipes) end; -trim_instructions_1([], _, _, {_,ConfigList}) -> - ConfigList. +trim_recipes_1([], _, _, {_,RecipesList}) -> + RecipesList. take_last_dead(L) -> take_last_dead_1(reverse(L)). @@ -100,28 +131,48 @@ take_last_dead_1([{dead,Reg}|Is]) -> {Reg,reverse(Is)}; take_last_dead_1(_) -> none. -save_config(Ks, Trim, Moves, {MaxCost,Acc}=Config) -> - case config_cost(Ks, Moves) of - Cost when Cost =< MaxCost -> - {MaxCost,[{Ks,Trim,Moves}|Acc]}; +save_recipe(Ks, Trim, Moves, {MaxCost,Acc}=Recipes) -> + case recipe_cost(Ks, Moves) of + Cost when Cost =< MaxCost -> + %% The price is right. + {MaxCost,[{Ks,Trim,Moves}|Acc]}; _Cost -> - Config + %% Too expensive. + Recipes end. -config_cost(Ks, Moves) -> +recipe_cost(Ks, Moves) -> %% We estimate that a {move,{y,_},{y,_}} instruction is roughly twice as %% expensive as a {kill,{y,_}} instruction. A {trim,_} instruction is %% roughly as expensive as a {kill,{y,_}} instruction. - config_cost_1(Ks, 1+2*length(Moves)). + recipe_cost_1(Ks, 1+2*length(Moves)). -config_cost_1([{kill,_}|Ks], Cost) -> - config_cost_1(Ks, Cost+1); -config_cost_1([_|Ks], Cost) -> - config_cost_1(Ks, Cost); -config_cost_1([], Cost) -> Cost. +recipe_cost_1([{kill,_}|Ks], Cost) -> + recipe_cost_1(Ks, Cost+1); +recipe_cost_1([_|Ks], Cost) -> + recipe_cost_1(Ks, Cost); +recipe_cost_1([], Cost) -> Cost. + +%% try_remap([Recipe], [Instruction], FrameSize) -> +%% {[Instruction],[TrimInstruction]}. +%% Try to renumber Y registers in the instruction stream. The +%% first rececipe that works will be used. +%% +%% This function will issue a `not_possible` exception if none +%% of the recipes were possible to apply. + +try_remap([R|Rs], Is, FrameSize) -> + {TrimInstr,Map} = expand_recipe(R, FrameSize), + try + {remap(Is, Map, []),TrimInstr} + catch + throw:not_possible -> + try_remap(Rs, Is, FrameSize) + end; +try_remap([], _, _) -> throw(not_possible). -expand_config({Layout,Trim,Moves}, FrameSize) -> +expand_recipe({Layout,Trim,Moves}, FrameSize) -> Kills = [Kill || {kill,_}=Kill <- Layout], {Kills++reverse(Moves, [{trim,Trim,FrameSize-Trim}]),create_map(Trim, Moves)}. @@ -132,16 +183,16 @@ create_map(Trim, []) -> (Any) -> Any end; create_map(Trim, Moves) -> - GbTree0 = [{Src,Dst-Trim} || {move,{y,Src},{y,Dst}} <- Moves], - GbTree = gb_trees:from_orddict(sort(GbTree0)), - IllegalTargets = gb_sets:from_list([Dst || {move,_,{y,Dst}} <- Moves]), + Map0 = [{Src,Dst-Trim} || {move,{y,Src},{y,Dst}} <- Moves], + Map = maps:from_list(Map0), + IllegalTargets = cerl_sets:from_list([Dst || {move,_,{y,Dst}} <- Moves]), fun({y,Y0}) when Y0 < Trim -> - case gb_trees:lookup(Y0, GbTree) of - {value,Y} -> {y,Y}; - none -> throw(not_possible) - end; + case Map of + #{Y0:=Y} -> {y,Y}; + #{} -> throw(not_possible) + end; ({y,Y}) -> - case gb_sets:is_element(Y, IllegalTargets) of + case cerl_sets:is_element(Y, IllegalTargets) of true -> throw(not_possible); false -> {y,Y-Trim} end; @@ -149,19 +200,15 @@ create_map(Trim, Moves) -> (Any) -> Any end. -try_remap([C|Cs], Is, FrameSize) -> - {TrimInstr,Map} = expand_config(C, FrameSize), - try - {remap(Is, Map, []),TrimInstr} - catch - throw:not_possible -> - try_remap(Cs, Is, FrameSize) - end; -try_remap([], _, _) -> throw(not_possible). - remap([{block,Bl0}|Is], Map, Acc) -> Bl = remap_block(Bl0, Map, []), remap(Is, Map, [{block,Bl}|Acc]); +remap([{bs_get_tail,Src,Dst,Live}|Is], Map, Acc) -> + I = {bs_get_tail,Map(Src),Map(Dst),Live}, + remap(Is, Map, [I|Acc]); +remap([{bs_set_position,Src1,Src2}|Is], Map, Acc) -> + I = {bs_set_position,Map(Src1),Map(Src2)}, + remap(Is, Map, [I|Acc]); remap([{call_fun,_}=I|Is], Map, Acc) -> remap(Is, Map, [I|Acc]); remap([{call,_,_}=I|Is], Map, Acc) -> @@ -205,35 +252,66 @@ remap([return|_]=Is, _, Acc) -> reverse(Acc, Is); remap([{line,_}=I|Is], Map, Acc) -> remap(Is, Map, [I|Acc]). - + remap_block([{set,Ds0,Ss0,Info}|Is], Map, Acc) -> Ds = [Map(D) || D <- Ds0], Ss = [Map(S) || S <- Ss0], remap_block(Is, Map, [{set,Ds,Ss,Info}|Acc]); remap_block([], _, Acc) -> reverse(Acc). - -safe_labels([{label,L},{line,_},{badmatch,{Tag,_}}|Is], Acc) when Tag =/= y -> - safe_labels(Is, [L|Acc]); -safe_labels([{label,L},{line,_},{case_end,{Tag,_}}|Is], Acc) when Tag =/= y -> - safe_labels(Is, [L|Acc]); -safe_labels([{label,L},{line,_},if_end|Is], Acc) -> - safe_labels(Is, [L|Acc]); -safe_labels([{label,L}, - {block,[{set,[{x,0}],[{Tag,_}],move}]}, - {line,_}, - {call_ext,1,{extfunc,erlang,error,1}}|Is], Acc) when Tag =/= y -> - safe_labels(Is, [L|Acc]); + +%% safe_labels([Instruction], Accumulator) -> gb_set() +%% Build a gb_set of safe labels. The code at a safe +%% label does not depend on the values in a specific +%% Y register, only that all Y registers are initialized +%% so that it safe to scan the stack when an exception +%% is generated. +%% +%% In other words, code at a safe label will continue +%% to work if Y registers have been renumbered and +%% the size of the stack frame has changed. + +safe_labels([{label,L}|Is], Acc) -> + case is_safe_label(Is) of + true -> safe_labels(Is, [L|Acc]); + false -> safe_labels(Is, Acc) + end; safe_labels([_|Is], Acc) -> safe_labels(Is, Acc); -safe_labels([], Acc) -> gb_sets:from_list(Acc). +safe_labels([], Acc) -> cerl_sets:from_list(Acc). + +is_safe_label([{line,_}|Is]) -> + is_safe_label(Is); +is_safe_label([{badmatch,{Tag,_}}|_]) -> + Tag =/= y; +is_safe_label([{case_end,{Tag,_}}|_]) -> + Tag =/= y; +is_safe_label([{try_case_end,{Tag,_}}|_]) -> + Tag =/= y; +is_safe_label([if_end|_]) -> + true; +is_safe_label([{block,Bl}|Is]) -> + is_safe_label_block(Bl) andalso is_safe_label(Is); +is_safe_label([{call_ext,_,{extfunc,M,F,A}}|_]) -> + erl_bifs:is_exit_bif(M, F, A); +is_safe_label(_) -> false. + +is_safe_label_block([{set,Ds,Ss,_}|Is]) -> + IsYreg = fun({y,_}) -> true; + (_) -> false + end, + %% This instruction is safe if the instruction + %% neither reads or writes Y registers. + not (any(IsYreg, Ss) orelse any(IsYreg, Ds)) andalso + is_safe_label_block(Is); +is_safe_label_block([]) -> true. %% frame_layout([Instruction], [{kill,_}], St) -> %% [{kill,Reg} | {live,Reg} | {dead,Reg}] %% Figure out the layout of the stack frame. -frame_layout(Is, Kills, #st{safe=Safe,lbl=D}) -> +frame_layout(Is, Kills, #st{safe=Safe}) -> N = frame_size(Is, Safe), - IsKilled = fun(R) -> beam_utils:is_not_used(R, Is, D) end, + IsKilled = fun(R) -> is_not_used(R, Is) end, {N,frame_layout_1(Kills, 0, N, IsKilled, [])}. frame_layout_1([{kill,{y,Y}}=I|Ks], Y, N, IsKilled, Acc) -> @@ -253,6 +331,11 @@ frame_layout_2(Is) -> reverse(Is). %% frame_size([Instruction], SafeLabels) -> FrameSize %% Find out the frame size by looking at the code that follows. +%% +%% Implicitly, also check that the instructions are a straight +%% sequence of code that ends in a return. Any branches are +%% to safe labels (i.e., the code at those labels don't depend +%% on the contents of any Y register). frame_size([{block,_}|Is], Safe) -> frame_size(Is, Safe); @@ -285,15 +368,92 @@ frame_size([{make_fun2,_,_,_,_}|Is], Safe) -> frame_size(Is, Safe); frame_size([{get_map_elements,{f,L},_,_}|Is], Safe) -> frame_size_branch(L, Is, Safe); -frame_size([{deallocate,N}|_], _) -> N; +frame_size([{deallocate,N}|_], _) -> + N; frame_size([{line,_}|Is], Safe) -> frame_size(Is, Safe); +frame_size([{bs_set_position,_,_}|Is], Safe) -> + frame_size(Is, Safe); +frame_size([{bs_get_tail,_,_,_}|Is], Safe) -> + frame_size(Is, Safe); frame_size(_, _) -> throw(not_possible). frame_size_branch(0, Is, Safe) -> frame_size(Is, Safe); frame_size_branch(L, Is, Safe) -> - case gb_sets:is_member(L, Safe) of + case cerl_sets:is_element(L, Safe) of false -> throw(not_possible); true -> frame_size(Is, Safe) end. + +%% is_not_used(Y, [Instruction]) -> true|false. +%% Test whether the value of Y is unused in the instruction sequence. +%% Return true if the value of Y is not used, and false if it is used. +%% +%% This function handles the same instructions as frame_size/2. It +%% assumes that any labels in the instructions are safe labels. + +is_not_used(Y, [{apply,_}|Is]) -> + is_not_used(Y, Is); +is_not_used(Y, [{bif,_,{f,_},Ss,Dst}|Is]) -> + is_not_used_ss_dst(Y, Ss, Dst, Is); +is_not_used(Y, [{block,Bl}|Is]) -> + case is_not_used_block(Y, Bl) of + used -> false; + killed -> true; + transparent -> is_not_used(Y, Is) + end; +is_not_used(Y, [{bs_get_tail,Src,Dst,_}|Is]) -> + is_not_used_ss_dst(Y, [Src], Dst, Is); +is_not_used(Y, [{bs_init,_,_,_,Ss,Dst}|Is]) -> + is_not_used_ss_dst(Y, Ss, Dst, Is); +is_not_used(Y, [{bs_put,{f,_},_,Ss}|Is]) -> + not member(Y, Ss) andalso is_not_used(Y, Is); +is_not_used(Y, [{bs_set_position,Src1,Src2}|Is]) -> + Y =/= Src1 andalso Y =/= Src2 andalso + is_not_used(Y, Is); +is_not_used(Y, [{call,_,_}|Is]) -> + is_not_used(Y, Is); +is_not_used(Y, [{call_ext,_,_}=I|Is]) -> + beam_jump:is_exit_instruction(I) orelse is_not_used(Y, Is); +is_not_used(Y, [{call_fun,_}|Is]) -> + is_not_used(Y, Is); +is_not_used(_Y, [{deallocate,_}|_]) -> + true; +is_not_used(Y, [{gc_bif,_,{f,_},_Live,Ss,Dst}|Is]) -> + is_not_used_ss_dst(Y, Ss, Dst, Is); +is_not_used(Y, [{get_map_elements,{f,_},S,{list,List}}|Is]) -> + {Ss,Ds} = beam_utils:split_even(List), + case member(Y, [S|Ss]) of + true -> + false; + false -> + member(Y, Ds) orelse is_not_used(Y, Is) + end; +is_not_used(Y, [{kill,Yreg}|Is]) -> + Y =:= Yreg orelse is_not_used(Y, Is); +is_not_used(Y, [{line,_}|Is]) -> + is_not_used(Y, Is); +is_not_used(Y, [{make_fun2,_,_,_,_}|Is]) -> + is_not_used(Y, Is); +is_not_used(Y, [{test,_,_,Ss}|Is]) -> + not member(Y, Ss) andalso is_not_used(Y, Is); +is_not_used(Y, [{test,_Op,{f,_},_Live,Ss,Dst}|Is]) -> + is_not_used_ss_dst(Y, Ss, Dst, Is). + +is_not_used_block(Y, [{set,Ds,Ss,_}|Is]) -> + case member(Y, Ss) of + true -> + used; + false -> + case member(Y, Ds) of + true -> + killed; + false -> + is_not_used_block(Y, Is) + end + end; +is_not_used_block(_Y, []) -> transparent. + +is_not_used_ss_dst(Y, Ss, Dst, Is) -> + not member(Y, Ss) andalso (Y =:= Dst orelse is_not_used(Y, Is)). diff --git a/lib/compiler/src/beam_utils.erl b/lib/compiler/src/beam_utils.erl index 626e041ea0..6e6574c0b3 100644 --- a/lib/compiler/src/beam_utils.erl +++ b/lib/compiler/src/beam_utils.erl @@ -18,27 +18,16 @@ %% %CopyrightEnd% %% %% Purpose : Common utilities used by several optimization passes. -%% +%% -module(beam_utils). --export([is_killed/3,is_killed_at/3,is_not_used/3, - empty_label_index/0,index_label/3,index_labels/1,replace_labels/4, - code_at/2,bif_to_test/3,is_pure_test/1, - combine_heap_needs/2, - split_even/1 - ]). +-export([replace_labels/4,is_pure_test/1,split_even/1]). -export_type([code_index/0,module_code/0,instruction/0]). --import(lists, [flatmap/2,map/2,member/2,sort/1,reverse/1]). - --define(is_const(Val), (Val =:= nil orelse - element(1, Val) =:= integer orelse - element(1, Val) =:= float orelse - element(1, Val) =:= atom orelse - element(1, Val) =:= literal)). +-import(lists, [map/2,reverse/1]). -%% instruction() describes all instructions that are used during optimzation +%% instruction() describes all instructions that are used during optimization %% (from beam_a to beam_z). -type instruction() :: atom() | tuple(). @@ -54,97 +43,6 @@ -type fail() :: beam_asm:fail() | 'fail'. -type test() :: {'test',atom(),fail(),[beam_asm:src()]} | {'test',atom(),fail(),integer(),list(),beam_asm:reg()}. --type result_cache() :: gb_trees:tree(beam_asm:label(), 'killed' | 'used'). - --record(live, - {lbl :: code_index(), %Label to code index. - res :: result_cache()}). %Result cache for each label. - -%% is_killed(Register, [Instruction], State) -> true|false -%% Determine whether a register is killed by the instruction sequence. -%% If true is returned, it means that the register will not be -%% referenced in ANY way (not even indirectly by an allocate instruction); -%% i.e. it is OK to enter the instruction sequence with Register -%% containing garbage. -%% -%% The state (constructed by index_instructions/1) is used to allow us -%% to determine the kill state across branches. - --spec is_killed(beam_asm:reg(), [instruction()], code_index()) -> boolean(). - -is_killed(R, Is, D) -> - St = #live{lbl=D,res=gb_trees:empty()}, - case check_liveness(R, Is, St) of - {killed,_} -> true; - {exit_not_used,_} -> false; - {_,_} -> false - end. - -%% is_killed_at(Reg, Lbl, State) -> true|false -%% Determine whether Reg is killed at label Lbl. - --spec is_killed_at(beam_asm:reg(), beam_asm:label(), code_index()) -> boolean(). - -is_killed_at(R, Lbl, D) when is_integer(Lbl) -> - St0 = #live{lbl=D,res=gb_trees:empty()}, - case check_liveness_at(R, Lbl, St0) of - {killed,_} -> true; - {exit_not_used,_} -> false; - {_,_} -> false - end. - -%% is_not_used(Register, [Instruction], State) -> true|false -%% Determine whether a register is never used in the instruction sequence -%% (it could still be referenced by an allocate instruction, meaning that -%% it MUST be initialized, but that its value does not matter). -%% The state is used to allow us to determine the usage state -%% across branches. - --spec is_not_used(beam_asm:reg(), [instruction()], code_index()) -> boolean(). - -is_not_used(R, Is, D) -> - St = #live{lbl=D,res=gb_trees:empty()}, - case check_liveness(R, Is, St) of - {used,_} -> false; - {exit_not_used,_} -> true; - {_,_} -> true - end. - -%% index_labels(FunctionIs) -> State -%% Index the instruction sequence so that we can quickly -%% look up the instruction following a specific label. - --spec index_labels([instruction()]) -> code_index(). - -index_labels(Is) -> - index_labels_1(Is, []). - -%% empty_label_index() -> State -%% Create an empty label index. - --spec empty_label_index() -> code_index(). - -empty_label_index() -> - gb_trees:empty(). - -%% index_label(Label, [Instruction], State) -> State -%% Add an index for a label. - --spec index_label(beam_asm:label(), [instruction()], code_index()) -> - code_index(). - -index_label(Lbl, Is0, Acc) -> - Is = drop_labels(Is0), - gb_trees:enter(Lbl, Is, Acc). - - -%% code_at(Label, State) -> [I]. -%% Retrieve the code at the given label. - --spec code_at(beam_asm:label(), code_index()) -> [instruction()]. - -code_at(L, Ll) -> - gb_trees:get(L, Ll). %% replace_labels(FunctionIs, Tail, ReplaceDb, Fallback) -> FunctionIs. %% Replace all labels in instructions according to the ReplaceDb. @@ -158,44 +56,6 @@ code_at(L, Ll) -> replace_labels(Is, Acc, D, Fb) -> replace_labels_1(Is, Acc, D, Fb). -%% bif_to_test(Bif, [Op], Fail) -> {test,Test,Fail,[Op]} -%% Convert a BIF to a test. Fail if not possible. - --spec bif_to_test(atom(), list(), fail()) -> test(). - -bif_to_test(is_atom, [_]=Ops, Fail) -> {test,is_atom,Fail,Ops}; -bif_to_test(is_boolean, [_]=Ops, Fail) -> {test,is_boolean,Fail,Ops}; -bif_to_test(is_binary, [_]=Ops, Fail) -> {test,is_binary,Fail,Ops}; -bif_to_test(is_bitstring,[_]=Ops, Fail) -> {test,is_bitstr,Fail,Ops}; -bif_to_test(is_float, [_]=Ops, Fail) -> {test,is_float,Fail,Ops}; -bif_to_test(is_function, [_]=Ops, Fail) -> {test,is_function,Fail,Ops}; -bif_to_test(is_function, [_,_]=Ops, Fail) -> {test,is_function2,Fail,Ops}; -bif_to_test(is_integer, [_]=Ops, Fail) -> {test,is_integer,Fail,Ops}; -bif_to_test(is_list, [_]=Ops, Fail) -> {test,is_list,Fail,Ops}; -bif_to_test(is_map, [_]=Ops, Fail) -> {test,is_map,Fail,Ops}; -bif_to_test(is_number, [_]=Ops, Fail) -> {test,is_number,Fail,Ops}; -bif_to_test(is_pid, [_]=Ops, Fail) -> {test,is_pid,Fail,Ops}; -bif_to_test(is_port, [_]=Ops, Fail) -> {test,is_port,Fail,Ops}; -bif_to_test(is_reference, [_]=Ops, Fail) -> {test,is_reference,Fail,Ops}; -bif_to_test(is_tuple, [_]=Ops, Fail) -> {test,is_tuple,Fail,Ops}; -bif_to_test('=<', [A,B], Fail) -> {test,is_ge,Fail,[B,A]}; -bif_to_test('>', [A,B], Fail) -> {test,is_lt,Fail,[B,A]}; -bif_to_test('<', [_,_]=Ops, Fail) -> {test,is_lt,Fail,Ops}; -bif_to_test('>=', [_,_]=Ops, Fail) -> {test,is_ge,Fail,Ops}; -bif_to_test('==', [C,A], Fail) when ?is_const(C) -> - {test,is_eq,Fail,[A,C]}; -bif_to_test('==', [_,_]=Ops, Fail) -> {test,is_eq,Fail,Ops}; -bif_to_test('/=', [C,A], Fail) when ?is_const(C) -> - {test,is_ne,Fail,[A,C]}; -bif_to_test('/=', [_,_]=Ops, Fail) -> {test,is_ne,Fail,Ops}; -bif_to_test('=:=', [C,A], Fail) when ?is_const(C) -> - {test,is_eq_exact,Fail,[A,C]}; -bif_to_test('=:=', [_,_]=Ops, Fail) -> {test,is_eq_exact,Fail,Ops}; -bif_to_test('=/=', [C,A], Fail) when ?is_const(C) -> - {test,is_ne_exact,Fail,[A,C]}; -bif_to_test('=/=', [_,_]=Ops, Fail) -> {test,is_ne_exact,Fail,Ops}. - - %% is_pure_test({test,Op,Fail,Ops}) -> true|false. %% Return 'true' if the test instruction does not modify any %% registers and/or bit syntax matching state. @@ -215,22 +75,9 @@ is_pure_test({test,test_arity,_,[_,_]}) -> true; is_pure_test({test,has_map_fields,_,[_|_]}) -> true; is_pure_test({test,is_bitstr,_,[_]}) -> true; is_pure_test({test,is_function2,_,[_,_]}) -> true; -is_pure_test({test,Op,_,Ops}) -> +is_pure_test({test,Op,_,Ops}) -> erl_internal:new_type_test(Op, length(Ops)). -%% combine_heap_needs(HeapNeed1, HeapNeed2) -> HeapNeed -%% Combine the heap need for two allocation instructions. - --type heap_need_tag() :: 'floats' | 'words'. --type heap_need() :: non_neg_integer() | - {'alloc',[{heap_need_tag(),non_neg_integer()}]}. --spec combine_heap_needs(heap_need(), heap_need()) -> heap_need(). - -combine_heap_needs(H1, H2) when is_integer(H1), is_integer(H2) -> - H1 + H2; -combine_heap_needs(H1, H2) -> - {alloc,combine_alloc_lists([H1,H2])}. - %% split_even/1 %% [1,2,3,4,5,6] -> {[1,3,5],[2,4,6]} @@ -242,438 +89,6 @@ split_even(Rs) -> split_even(Rs, [], []). %%% Local functions. %%% - -%% check_liveness(Reg, [Instruction], #live{}) -> -%% {killed | not_used | used, #live{}} -%% Find out whether Reg is used or killed in instruction sequence. -%% -%% killed - Reg is assigned or killed by an allocation instruction. -%% not_used - the value of Reg is not used, but Reg must not be garbage -%% exit_not_used - the value of Reg is not used, but must not be garbage -%% because the stack will be scanned because an -%% exit BIF will raise an exception -%% used - Reg is used - -check_liveness({fr,_}, _, St) -> - %% Conservatively always consider the floating point register used. - {used,St}; -check_liveness(R, [{block,Blk}|Is], St0) -> - case check_liveness_block(R, Blk, St0) of - {transparent,St1} -> - check_liveness(R, Is, St1); - {alloc_used,St1} -> - %% Used by an allocating instruction, but value not referenced. - %% Must check the rest of the instructions. - not_used(check_liveness(R, Is, St1)); - {Other,_}=Res when is_atom(Other) -> - Res - end; -check_liveness(R, [{label,_}|Is], St) -> - check_liveness(R, Is, St); -check_liveness(R, [{test,_,{f,Fail},As}|Is], St0) -> - case member(R, As) of - true -> - {used,St0}; - false -> - case check_liveness_at(R, Fail, St0) of - {killed,St1} -> - check_liveness(R, Is, St1); - {exit_not_used,St1} -> - not_used(check_liveness(R, Is, St1)); - {not_used,St1} -> - not_used(check_liveness(R, Is, St1)); - {used,_}=Used -> - Used - end - end; -check_liveness(R, [{test,Op,Fail,Live,Ss,Dst}|Is], St) -> - %% Check this instruction as a block to get a less conservative - %% result if the caller is is_not_used/3. - Block = [{set,[Dst],Ss,{alloc,Live,{bif,Op,Fail}}}], - check_liveness(R, [{block,Block}|Is], St); -check_liveness(R, [{select,_,R,_,_}|_], St) -> - {used,St}; -check_liveness(R, [{select,_,_,Fail,Branches}|_], St) -> - check_liveness_everywhere(R, [Fail|Branches], St); -check_liveness(R, [{jump,{f,F}}|_], St) -> - check_liveness_at(R, F, St); -check_liveness(R, [{case_end,Used}|_], St) -> - check_liveness_exit(R, Used, St); -check_liveness(R, [{try_case_end,Used}|_], St) -> - check_liveness_exit(R, Used, St); -check_liveness(R, [{badmatch,Used}|_], St) -> - check_liveness_exit(R, Used, St); -check_liveness(R, [if_end|_], St) -> - check_liveness_exit(R, ignore, St); -check_liveness(R, [{func_info,_,_,Ar}|_], St) -> - case R of - {x,X} when X < Ar -> {used,St}; - _ -> {killed,St} - end; -check_liveness(R, [{kill,R}|_], St) -> - {killed,St}; -check_liveness(R, [{kill,_}|Is], St) -> - check_liveness(R, Is, St); -check_liveness(R, [{bs_init,_,_,none,Ss,Dst}|Is], St) -> - case member(R, Ss) of - true -> - {used,St}; - false -> - if - R =:= Dst -> {killed,St}; - true -> check_liveness(R, Is, St) - end - end; -check_liveness(R, [{bs_init,_,_,Live,Ss,Dst}|Is], St) -> - case R of - {x,X} -> - case member(R, Ss) of - true -> - {used,St}; - false -> - if - X < Live -> - not_used(check_liveness(R, Is, St)); - true -> - {killed,St} - end - end; - {y,_} -> - case member(R, Ss) of - true -> {used,St}; - false -> - %% If the exception is taken, the stack may - %% be scanned. Therefore the register is not - %% guaranteed to be killed. - if - R =:= Dst -> {not_used,St}; - true -> not_used(check_liveness(R, Is, St)) - end - end - end; -check_liveness(R, [{deallocate,_}|Is], St) -> - case R of - {y,_} -> {killed,St}; - _ -> check_liveness(R, Is, St) - end; -check_liveness({x,_}=R, [return|_], St) -> - case R of - {x,0} -> {used,St}; - {x,_} -> {killed,St} - end; -check_liveness(R, [{call,Live,_}|Is], St) -> - case R of - {x,X} when X < Live -> {used,St}; - {x,_} -> {killed,St}; - {y,_} -> not_used(check_liveness(R, Is, St)) - end; -check_liveness(R, [{call_ext,Live,_}=I|Is], St) -> - case R of - {x,X} when X < Live -> - {used,St}; - {x,_} -> - {killed,St}; - {y,_} -> - case beam_jump:is_exit_instruction(I) of - false -> - not_used(check_liveness(R, Is, St)); - true -> - %% We must make sure we don't check beyond this - %% instruction or we will fall through into random - %% unrelated code and get stuck in a loop. - {exit_not_used,St} - end - end; -check_liveness(R, [{call_fun,Live}|Is], St) -> - case R of - {x,X} when X =< Live -> {used,St}; - {x,_} -> {killed,St}; - {y,_} -> not_used(check_liveness(R, Is, St)) - end; -check_liveness(R, [{apply,Args}|Is], St) -> - case R of - {x,X} when X < Args+2 -> {used,St}; - {x,_} -> {killed,St}; - {y,_} -> not_used(check_liveness(R, Is, St)) - end; -check_liveness(R, [{bif,Op,Fail,Ss,D}|Is], St) -> - Set = {set,[D],Ss,{bif,Op,Fail}}, - check_liveness(R, [{block,[Set]}|Is], St); -check_liveness(R, [{gc_bif,Op,{f,Fail},Live,Ss,D}|Is], St) -> - Set = {set,[D],Ss,{alloc,Live,{gc_bif,Op,Fail}}}, - check_liveness(R, [{block,[Set]}|Is], St); -check_liveness(R, [{bs_put,{f,0},_,Ss}|Is], St) -> - case member(R, Ss) of - true -> {used,St}; - false -> check_liveness(R, Is, St) - end; -check_liveness(R, [{bs_restore2,S,_}|Is], St) -> - case R of - S -> {used,St}; - _ -> check_liveness(R, Is, St) - end; -check_liveness(R, [{bs_save2,S,_}|Is], St) -> - case R of - S -> {used,St}; - _ -> check_liveness(R, Is, St) - end; -check_liveness(R, [{move,S,D}|Is], St) -> - case R of - S -> {used,St}; - D -> {killed,St}; - _ -> check_liveness(R, Is, St) - end; -check_liveness(R, [{make_fun2,_,_,_,NumFree}|Is], St) -> - case R of - {x,X} when X < NumFree -> {used,St}; - {x,_} -> {killed,St}; - {y,_} -> not_used(check_liveness(R, Is, St)) - end; -check_liveness(R, [{'catch'=Op,Y,Fail}|Is], St) -> - Set = {set,[Y],[],{try_catch,Op,Fail}}, - check_liveness(R, [{block,[Set]}|Is], St); -check_liveness(R, [{'try'=Op,Y,Fail}|Is], St) -> - Set = {set,[Y],[],{try_catch,Op,Fail}}, - check_liveness(R, [{block,[Set]}|Is], St); -check_liveness(R, [{try_end,Y}|Is], St) -> - case R of - Y -> - {killed,St}; - {y,_} -> - %% y registers will be used if an exception occurs and - %% control transfers to the label given in the previous - %% try/2 instruction. - {used,St}; - _ -> - check_liveness(R, Is, St) - end; -check_liveness(R, [{catch_end,Y}|Is], St) -> - case R of - Y -> {killed,St}; - _ -> check_liveness(R, Is, St) - end; -check_liveness(R, [{get_tuple_element,S,_,D}|Is], St) -> - case R of - S -> {used,St}; - D -> {killed,St}; - _ -> check_liveness(R, Is, St) - end; -check_liveness(R, [{loop_rec,{f,_},{x,0}}|_], St) -> - case R of - {x,_} -> - {killed,St}; - _ -> - %% y register. Rarely happens. Be very conversative and - %% assume it's used. - {used,St} - end; -check_liveness(R, [{loop_rec_end,{f,Fail}}|_], St) -> - check_liveness_at(R, Fail, St); -check_liveness(R, [{line,_}|Is], St) -> - check_liveness(R, Is, St); -check_liveness(R, [{get_map_elements,{f,Fail},S,{list,L}}|Is], St0) -> - {Ss,Ds} = split_even(L), - case member(R, [S|Ss]) of - true -> - {used,St0}; - false -> - case check_liveness_at(R, Fail, St0) of - {killed,St}=Killed -> - case member(R, Ds) of - true -> Killed; - false -> check_liveness(R, Is, St) - end; - Other -> - Other - end - end; -check_liveness(R, [{put_map,F,Op,S,D,Live,{list,Puts}}|Is], St) -> - Set = {set,[D],[S|Puts],{alloc,Live,{put_map,Op,F}}}, - check_liveness(R, [{block,[Set]}||Is], St); -check_liveness(R, [{put_tuple,Ar,D}|Is], St) -> - Set = {set,[D],[],{put_tuple,Ar}}, - check_liveness(R, [{block,[Set]}||Is], St); -check_liveness(R, [{put_list,S1,S2,D}|Is], St) -> - Set = {set,[D],[S1,S2],put_list}, - check_liveness(R, [{block,[Set]}||Is], St); -check_liveness(R, [{test_heap,N,Live}|Is], St) -> - I = {block,[{set,[],[],{alloc,Live,{nozero,nostack,N,[]}}}]}, - check_liveness(R, [I|Is], St); -check_liveness(R, [{allocate_zero,N,Live}|Is], St) -> - I = {block,[{set,[],[],{alloc,Live,{zero,N,0,[]}}}]}, - check_liveness(R, [I|Is], St); -check_liveness(R, [{get_hd,S,D}|Is], St) -> - I = {block,[{set,[D],[S],get_hd}]}, - check_liveness(R, [I|Is], St); -check_liveness(R, [{get_tl,S,D}|Is], St) -> - I = {block,[{set,[D],[S],get_tl}]}, - check_liveness(R, [I|Is], St); -check_liveness(R, [remove_message|Is], St) -> - check_liveness(R, Is, St); -check_liveness({x,X}, [build_stacktrace|_], St) when X > 0 -> - {killed,St}; -check_liveness(R, [{recv_mark,_}|Is], St) -> - check_liveness(R, Is, St); -check_liveness(R, [{recv_set,_}|Is], St) -> - check_liveness(R, Is, St); -check_liveness(R, [{'%',_}|Is], St) -> - check_liveness(R, Is, St); -check_liveness(_R, Is, St) when is_list(Is) -> - %% Not implemented. Conservatively assume that the register is used. - {used,St}. - -check_liveness_everywhere(R, Lbls, St0) -> - check_liveness_everywhere_1(R, Lbls, killed, St0). - -check_liveness_everywhere_1(R, [{f,Lbl}|T], Res0, St0) -> - {Res1,St} = check_liveness_at(R, Lbl, St0), - Res = case Res1 of - killed -> Res0; - _ -> Res1 - end, - case Res of - used -> {used,St}; - _ -> check_liveness_everywhere_1(R, T, Res, St) - end; -check_liveness_everywhere_1(R, [_|T], Res, St) -> - check_liveness_everywhere_1(R, T, Res, St); -check_liveness_everywhere_1(_, [], Res, St) -> - {Res,St}. - -check_liveness_at(R, Lbl, #live{lbl=Ll,res=ResMemorized}=St0) -> - case gb_trees:lookup(Lbl, ResMemorized) of - {value,Res} -> - {Res,St0}; - none -> - {Res,St} = case gb_trees:lookup(Lbl, Ll) of - {value,Is} -> check_liveness(R, Is, St0); - none -> {used,St0} - end, - {Res,St#live{res=gb_trees:insert(Lbl, Res, St#live.res)}} - end. - -not_used({used,_}=Res) -> Res; -not_used({_,St}) -> {not_used,St}. - -check_liveness_exit(R, R, St) -> {used,St}; -check_liveness_exit({x,_}, _, St) -> {killed,St}; -check_liveness_exit({y,_}, _, St) -> {exit_not_used,St}. - -%% check_liveness_block(Reg, [Instruction], State) -> -%% {killed | not_used | used | alloc_used | transparent,State'} -%% Finds out how Reg is used in the instruction sequence inside a block. -%% Returns one of: -%% killed - Reg is assigned a new value or killed by an -%% allocation instruction -%% not_used - The value is not used, but the register is referenced -%% e.g. by an allocation instruction -%% transparent - Reg is neither used nor killed -%% alloc_used - Used only in an allocate instruction -%% used - Reg is explicitly used by an instruction -%% -%% Annotations are not allowed. -%% -%% (Unknown instructions will cause an exception.) - -check_liveness_block({x,X}=R, [{set,Ds,Ss,{alloc,Live,Op}}|Is], St0) -> - if - X >= Live -> - {killed,St0}; - true -> - case check_liveness_block_1(R, Ss, Ds, Op, Is, St0) of - {transparent,St} -> {alloc_used,St}; - {_,_}=Res -> not_used(Res) - end - end; -check_liveness_block({y,_}=R, [{set,Ds,Ss,{alloc,_Live,Op}}|Is], St0) -> - case check_liveness_block_1(R, Ss, Ds, Op, Is, St0) of - {transparent,St} -> {alloc_used,St}; - {_,_}=Res -> not_used(Res) - end; -check_liveness_block({y,_}=R, [{set,Ds,Ss,{try_catch,_,Op}}|Is], St0) -> - case Ds of - [R] -> - {killed,St0}; - _ -> - case check_liveness_block_1(R, Ss, Ds, Op, Is, St0) of - {exit_not_used,St} -> - {used,St}; - {transparent,St} -> - %% Conservatively assumed that it is used. - {used,St}; - {_,_}=Res -> - Res - end - end; -check_liveness_block(R, [{set,Ds,Ss,Op}|Is], St) -> - check_liveness_block_1(R, Ss, Ds, Op, Is, St); -check_liveness_block(_, [], St) -> {transparent,St}. - -check_liveness_block_1(R, Ss, Ds, Op, Is, St0) -> - case member(R, Ss) of - true -> - {used,St0}; - false -> - case check_liveness_block_2(R, Op, Ss, St0) of - {killed,St} -> - case member(R, Ds) of - true -> {killed,St}; - false -> check_liveness_block(R, Is, St) - end; - {exit_not_used,St} -> - case member(R, Ds) of - true -> {exit_not_used,St}; - false -> check_liveness_block(R, Is, St) - end; - {not_used,St} -> - not_used(case member(R, Ds) of - true -> {killed,St}; - false -> check_liveness_block(R, Is, St) - end); - {used,St} -> - {used,St} - end - end. - -check_liveness_block_2(R, {gc_bif,Op,{f,Lbl}}, Ss, St) -> - check_liveness_block_3(R, Lbl, {Op,length(Ss)}, St); -check_liveness_block_2(R, {bif,Op,{f,Lbl}}, Ss, St) -> - Arity = length(Ss), - case erl_internal:comp_op(Op, Arity) orelse - erl_internal:new_type_test(Op, Arity) of - true -> - {killed,St}; - false -> - check_liveness_block_3(R, Lbl, {Op,length(Ss)}, St) - end; -check_liveness_block_2(R, {put_map,_Op,{f,Lbl}}, _Ss, St) -> - check_liveness_block_3(R, Lbl, {unsafe,0}, St); -check_liveness_block_2(_, _, _, St) -> - {killed,St}. - -check_liveness_block_3({x,_}, 0, _FA, St) -> - {killed,St}; -check_liveness_block_3({y,_}, 0, {F,A}, St) -> - %% If the exception is thrown, the stack may be scanned, - %% thus implicitly using the y register. - case erl_bifs:is_safe(erlang, F, A) of - true -> {killed,St}; - false -> {used,St} - end; -check_liveness_block_3(R, Lbl, _FA, St0) -> - check_liveness_at(R, Lbl, St0). - -index_labels_1([{label,Lbl}|Is0], Acc) -> - Is = drop_labels(Is0), - index_labels_1(Is0, [{Lbl,Is}|Acc]); -index_labels_1([_|Is], Acc) -> - index_labels_1(Is, Acc); -index_labels_1([], Acc) -> gb_trees:from_orddict(sort(Acc)). - -drop_labels([{label,_}|Is]) -> drop_labels(Is); -drop_labels(Is) -> Is. - - replace_labels_1([{test,Test,{f,Lbl},Ops}|Is], Acc, D, Fb) -> replace_labels_1(Is, [{test,Test,{f,label(Lbl, D, Fb)},Ops}|Acc], D, Fb); replace_labels_1([{test,Test,{f,Lbl},Live,Ops,Dst}|Is], Acc, D, Fb) -> @@ -729,21 +144,6 @@ label(Old, D, Fb) -> _ -> Fb(Old) end. -%% Help function for combine_heap_needs. - -combine_alloc_lists(Al0) -> - Al1 = flatmap(fun(Words) when is_integer(Words) -> - [{words,Words}]; - ({alloc,List}) -> - List - end, Al0), - Al2 = sofs:relation(Al1), - Al3 = sofs:relation_to_family(Al2), - Al4 = sofs:to_external(Al3), - [{Tag,lists:sum(L)} || {Tag,L} <- Al4]. - -%% live_opt/4. - split_even([], Ss, Ds) -> {reverse(Ss),reverse(Ds)}; split_even([S,D|Rs], Ss, Ds) -> diff --git a/lib/compiler/src/beam_validator.erl b/lib/compiler/src/beam_validator.erl index ca065295d6..7d908df3bf 100644 --- a/lib/compiler/src/beam_validator.erl +++ b/lib/compiler/src/beam_validator.erl @@ -479,16 +479,20 @@ valfun_1({try_case,Reg}, #vst{current=#st{ct=[Fail|Fails]}}=Vst0) -> error({bad_type,Type}) end; valfun_1({get_list,Src,D1,D2}, Vst0) -> + assert_not_literal(Src), assert_type(cons, Src, Vst0), Vst = set_type_reg(term, Src, D1, Vst0), set_type_reg(term, Src, D2, Vst); valfun_1({get_hd,Src,Dst}, Vst) -> + assert_not_literal(Src), assert_type(cons, Src, Vst), set_type_reg(term, Src, Dst, Vst); valfun_1({get_tl,Src,Dst}, Vst) -> + assert_not_literal(Src), assert_type(cons, Src, Vst), set_type_reg(term, Src, Dst, Vst); valfun_1({get_tuple_element,Src,I,Dst}, Vst) -> + assert_not_literal(Src), assert_type({tuple_element,I+1}, Src, Vst), set_type_reg(term, Src, Dst, Vst); valfun_1({jump,{f,Lbl}}, Vst) -> @@ -917,6 +921,7 @@ valfun_4(_, _) -> error(unknown_instruction). verify_get_map(Fail, Src, List, Vst0) -> + assert_not_literal(Src), %OTP 22. assert_type(map, Src, Vst0), Vst1 = foldl(fun(D, Vsti) -> case is_reg_defined(D,Vsti) of @@ -1466,6 +1471,10 @@ assert_term(Src, Vst) -> get_term_type(Src, Vst), ok. +assert_not_literal({x,_}) -> ok; +assert_not_literal({y,_}) -> ok; +assert_not_literal(Literal) -> error({literal_not_allowed,Literal}). + %% The possible types. %% %% First non-term types: diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl index d894694c79..65c4f140c9 100644 --- a/lib/compiler/src/compile.erl +++ b/lib/compiler/src/compile.erl @@ -210,8 +210,11 @@ do_compile(Input, Opts0) -> {error,Reason} end end, - %% Dialyzer has already spawned workers. - case lists:member(dialyzer, Opts) of + %% Some tools, like Dialyzer, has already spawned workers + %% and spawning extra workers actually slow the compilation + %% down instead of speeding it up, so we provide a mechanism + %% to bypass the compiler process. + case lists:member(no_spawn_compiler_process, Opts) of true -> IntFun(); false -> @@ -823,6 +826,9 @@ kernel_passes() -> {unless,no_bsm_opt,{pass,beam_ssa_bsm}}, {iff,dssabsm,{listing,"ssabsm"}}, {iff,ssalint,{pass,beam_ssa_lint}}, + {unless,no_fun_opt,{pass,beam_ssa_funs}}, + {iff,dssafuns,{listing,"ssafuns"}}, + {iff,ssalint,{pass,beam_ssa_lint}}, {unless,no_ssa_opt,{pass,beam_ssa_opt}}, {iff,dssaopt,{listing,"ssaopt"}}, {iff,ssalint,{pass,beam_ssa_lint}}, @@ -862,7 +868,9 @@ asm_passes() -> %% need to do a few clean-ups to code. {iff,no_postopt,[{pass,beam_clean}]}, + {iff,diffable,?pass(diffable)}, {pass,beam_z}, + {iff,diffable,{listing,"S"}}, {iff,dz,{listing,"z"}}, {iff,dopt,{listing,"optimize"}}, {iff,'S',{listing,"S"}}, @@ -1006,11 +1014,17 @@ parse_module(_Code, St0) -> end. do_parse_module(DefEncoding, #compile{ifile=File,options=Opts,dir=Dir}=St) -> + SourceName0 = proplists:get_value(source, Opts, File), + SourceName = case member(deterministic, Opts) of + true -> filename:basename(SourceName0); + false -> SourceName0 + end, R = epp:parse_file(File, - [{includes,[".",Dir|inc_paths(Opts)]}, - {macros,pre_defs(Opts)}, - {default_encoding,DefEncoding}, - extra]), + [{includes,[".",Dir|inc_paths(Opts)]}, + {source_name, SourceName}, + {macros,pre_defs(Opts)}, + {default_encoding,DefEncoding}, + extra]), case R of {ok,Forms,Extra} -> Encoding = proplists:get_value(encoding, Extra), @@ -1914,6 +1928,39 @@ restore_expand_module([F|Fs]) -> [F|restore_expand_module(Fs)]; restore_expand_module([]) -> []. +%%% +%%% Transform the BEAM code to make it more friendly for +%%% diffing: using function names instead of labels for +%%% local calls and number labels relative to each function. +%%% + +diffable(Code0, St) -> + {Mod,Exp,Attr,Fs0,NumLabels} = Code0, + EntryLabels0 = [{Entry,{Name,Arity}} || + {function,Name,Arity,Entry,_} <- Fs0], + EntryLabels = maps:from_list(EntryLabels0), + Fs = [diffable_fix_function(F, EntryLabels) || F <- Fs0], + Code = {Mod,Exp,Attr,Fs,NumLabels}, + {ok,Code,St}. + +diffable_fix_function({function,Name,Arity,Entry0,Is0}, LabelMap0) -> + Entry = maps:get(Entry0, LabelMap0), + {Is1,LabelMap} = diffable_label_map(Is0, 1, LabelMap0, []), + Fb = fun(Old) -> error({no_fb,Old}) end, + Is = beam_utils:replace_labels(Is1, [], LabelMap, Fb), + {function,Name,Arity,Entry,Is}. + +diffable_label_map([{label,Old}|Is], New, Map, Acc) -> + case Map of + #{Old:=NewLabel} -> + diffable_label_map(Is, New, Map, [{label,NewLabel}|Acc]); + #{} -> + diffable_label_map(Is, New+1, Map#{Old=>New}, [{label,New}|Acc]) + end; +diffable_label_map([I|Is], New, Map, Acc) -> + diffable_label_map(Is, New, Map, [I|Acc]); +diffable_label_map([], _New, Map, Acc) -> + {Acc,Map}. -spec options() -> 'ok'. @@ -2047,6 +2094,7 @@ pre_load() -> beam_ssa_bsm, beam_ssa_codegen, beam_ssa_dead, + beam_ssa_funs, beam_ssa_opt, beam_ssa_pre_codegen, beam_ssa_recv, diff --git a/lib/compiler/src/compiler.app.src b/lib/compiler/src/compiler.app.src index 7b802fdd62..86259270bd 100644 --- a/lib/compiler/src/compiler.app.src +++ b/lib/compiler/src/compiler.app.src @@ -39,6 +39,7 @@ beam_ssa_bsm, beam_ssa_codegen, beam_ssa_dead, + beam_ssa_funs, beam_ssa_lint, beam_ssa_opt, beam_ssa_pp, diff --git a/lib/compiler/src/erl_bifs.erl b/lib/compiler/src/erl_bifs.erl index 71ab0e872a..ce9762899e 100644 --- a/lib/compiler/src/erl_bifs.erl +++ b/lib/compiler/src/erl_bifs.erl @@ -108,6 +108,7 @@ is_pure(erlang, list_to_atom, 1) -> true; is_pure(erlang, list_to_binary, 1) -> true; is_pure(erlang, list_to_float, 1) -> true; is_pure(erlang, list_to_integer, 1) -> true; +is_pure(erlang, list_to_integer, 2) -> true; is_pure(erlang, list_to_pid, 1) -> true; is_pure(erlang, list_to_tuple, 1) -> true; is_pure(erlang, max, 2) -> true; |