From 60d4e673149aaa34e977e42aabeef3efd4b7c34e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Thu, 20 Sep 2018 04:23:12 +0200 Subject: Move optimization of 'move' from beam_flatten to beam_ssa_codegen The purpose of beam_flatten is to eliminate the blocks created by beam_block, but it also does a few optimizations because at the time the optimizations were added, beam_flatten was the most convenient place. Move the optimization that places `{move,Something,{x,0}}` before `call` instructions from beam_flatten to beam_ssa_codegen. This change will very slightly improve compilation times, and it will also apply the optimization in more places. In particular, a `{move,Literal,{x,0}}` would never be moved passed a `trim` instruction before this change. Now it will. --- lib/compiler/src/beam_flatten.erl | 33 +---------------- lib/compiler/src/beam_ssa_codegen.erl | 68 +++++++++++++++++++++++++++++++++-- 2 files changed, 67 insertions(+), 34 deletions(-) (limited to 'lib/compiler') diff --git a/lib/compiler/src/beam_flatten.erl b/lib/compiler/src/beam_flatten.erl index 973d16a1bc..c8bc1b7b84 100644 --- a/lib/compiler/src/beam_flatten.erl +++ b/lib/compiler/src/beam_flatten.erl @@ -32,8 +32,7 @@ module({Mod,Exp,Attr,Fs,Lc}, _Opt) -> {ok,{Mod,Exp,Attr,[function(F) || F <- Fs],Lc}}. function({function,Name,Arity,CLabel,Is0}) -> - Is1 = block(Is0), - Is = opt(Is1), + Is = block(Is0), {function,Name,Arity,CLabel,Is}. block(Is) -> @@ -115,33 +114,3 @@ insert_alloc_1([{bs_init=Op,Fail,Info0,Live,Ss,Dst}|Is], reverse(Acc, [I|Is]); insert_alloc_1([{bs_put,_,_,_}=I|Is], Alloc, Acc) -> insert_alloc_1(Is, Alloc, [I|Acc]). - -%% opt(Is0) -> Is -%% Simple peep-hole optimization to move a {move,Any,{x,0}} past -%% any kill up to the next call instruction. (To give the loader -%% an opportunity to combine the 'move' and the 'call' instructions.) -%% -opt(Is) -> - opt_1(Is, []). - -opt_1([{move,_,{x,0}}=I|Is0], Acc0) -> - case move_past_kill(Is0, I, Acc0) of - impossible -> opt_1(Is0, [I|Acc0]); - {Is,Acc} -> opt_1(Is, Acc) - end; -opt_1([I|Is], Acc) -> - opt_1(Is, [I|Acc]); -opt_1([], Acc) -> reverse(Acc). - -move_past_kill([{kill,Src}|_], {move,Src,_}, _) -> - impossible; -move_past_kill([{kill,_}=I|Is], Move, Acc) -> - move_past_kill(Is, Move, [I|Acc]); -move_past_kill([{trim,N,_}=I|Is], {move,Src,Dst}=Move, Acc) -> - case Src of - {y,Y} when Y < N-> impossible; - {y,Y} -> {Is,[{move,{y,Y-N},Dst},I|Acc]}; - _ -> {Is,[Move,I|Acc]} - end; -move_past_kill(Is, Move, Acc) -> - {Is,[Move|Acc]}. diff --git a/lib/compiler/src/beam_ssa_codegen.erl b/lib/compiler/src/beam_ssa_codegen.erl index 5085c950b5..cae9920f40 100644 --- a/lib/compiler/src/beam_ssa_codegen.erl +++ b/lib/compiler/src/beam_ssa_codegen.erl @@ -1196,11 +1196,40 @@ bif_to_test(Name, Args, Fail) -> opt_call_moves(Is0, Arity) -> {Moves0,Is} = splitwith(fun({move,_,_}) -> true; + ({kill,_}) -> true; (_) -> false end, Is0), Moves = opt_call_moves_1(Moves0, Arity), Moves ++ Is. +opt_call_moves_1([{move,Src,{x,_}=Tmp}=M1|[{kill,_}|_]=Is], Arity) -> + %% There could be a {move,Tmp,{x,0}} instruction after the + %% kill/1 instructions (moved to there by opt_move_to_x0/1). + case splitwith(fun({kill,_}) -> true; + (_) -> false + end, Is) of + {Kills,[{move,{x,_}=Tmp,{x,0}}=M2]} -> + %% The two move/2 instructions (M1 and M2) can be combined + %% to one. The question is, though, is it safe to place + %% them after the kill/1 instructions? + case is_killed(Src, Kills, Arity) of + true -> + %% Src (a Y register) is killed by one of the + %% kill/1 instructions. Thus M1 and M2 + %% must be placed before the kill/1 instructions + %% (essentially undoing what opt_move_to_x0/1 + %% did, which turned out to be a pessimization + %% in this case). + opt_call_moves_1([M1,M2|Kills], Arity); + false -> + %% Src is not killed by any of the kill/1 + %% instructions. Thus it is safe to place + %% M1 and M2 after the kill/1 instructions. + opt_call_moves_1(Kills++[M1,M2], Arity) + end; + {_,_} -> + [M1|Is] + end; opt_call_moves_1([{move,Src,{x,_}=Tmp}=M1,{move,Tmp,Dst}=M2|Is], Arity) -> case is_killed(Tmp, Is, Arity) of true -> @@ -1214,6 +1243,10 @@ opt_call_moves_1([M|Ms], Arity) -> [M|opt_call_moves_1(Ms, Arity)]; opt_call_moves_1([], _Arity) -> []. +is_killed(Y, [{kill,Y}|_], _) -> + true; +is_killed(R, [{kill,_}|Is], Arity) -> + is_killed(R, Is, Arity); is_killed(R, [{move,R,_}|_], _) -> false; is_killed(R, [{move,_,R}|_], _) -> @@ -1221,7 +1254,9 @@ is_killed(R, [{move,_,R}|_], _) -> is_killed(R, [{move,_,_}|Is], Arity) -> is_killed(R, Is, Arity); is_killed({x,X}, [], Arity) -> - X >= Arity. + X >= Arity; +is_killed({y,_}, [], _) -> + false. cg_alloc(#cg_alloc{stack=none,words=#need{h=0,f=0}}, _St) -> []; @@ -1621,12 +1656,41 @@ phi_copies([#b_set{dst=Dst,args=PhiArgs}|Sets], L) -> [#cg_set{op=copy,dst=Dst,args=CopyArgs}|phi_copies(Sets, L)]; phi_copies([], _) -> []. +%% opt_move_to_x0([Instruction]) -> [Instruction]. +%% Simple peep-hole optimization to move a {move,Any,{x,0}} past +%% any kill up to the next call instruction. (To give the loader +%% an opportunity to combine the 'move' and the 'call' instructions.) + +opt_move_to_x0(Moves) -> + opt_move_to_x0(Moves, []). + +opt_move_to_x0([{move,_,{x,0}}=I|Is0], Acc0) -> + case move_past_kill(Is0, I, Acc0) of + impossible -> opt_move_to_x0(Is0, [I|Acc0]); + {Is,Acc} -> opt_move_to_x0(Is, Acc) + end; +opt_move_to_x0([I|Is], Acc) -> + opt_move_to_x0(Is, [I|Acc]); +opt_move_to_x0([], Acc) -> reverse(Acc). + +move_past_kill([{kill,Src}|_], {move,Src,_}, _) -> + impossible; +move_past_kill([{kill,_}=I|Is], Move, Acc) -> + move_past_kill(Is, Move, [I|Acc]); +move_past_kill(Is, Move, Acc) -> + {Is,[Move|Acc]}. + %% setup_args(Args, Anno, Context) -> [Instruction]. %% setup_args(Args) -> [Instruction]. %% Set up X registers for a call. setup_args(Args, Anno, none, St) -> - setup_args(Args) ++ kill_yregs(Anno, St); + case {setup_args(Args),kill_yregs(Anno, St)} of + {Moves,[]} -> + Moves; + {Moves,Kills} -> + opt_move_to_x0(Moves ++ Kills) + end; setup_args(Args, _, _, _) -> setup_args(Args). -- cgit v1.2.3 From af632b4a9f259f5b8779d23d7aea6ebab7196724 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Thu, 20 Sep 2018 07:57:19 +0200 Subject: Move allocation combining from beam_flatten to beam_ssa_codegen Continuing the simplification of beam_flatten, move the optimization that eliminates a test_heap instruction following a binary construction by incorporating the allocation of the heap space into the bs_init* instruction itself. This change does not change the generated code in any way. Also remove beam_utils:combine_heap_needs/2, because beam_flatten was the last user of it. --- lib/compiler/src/beam_flatten.erl | 33 +------------------------- lib/compiler/src/beam_ssa_codegen.erl | 44 ++++++++++++++++++++++++++++------- lib/compiler/src/beam_utils.erl | 32 ++----------------------- 3 files changed, 39 insertions(+), 70 deletions(-) (limited to 'lib/compiler') diff --git a/lib/compiler/src/beam_flatten.erl b/lib/compiler/src/beam_flatten.erl index c8bc1b7b84..07c24abf7b 100644 --- a/lib/compiler/src/beam_flatten.erl +++ b/lib/compiler/src/beam_flatten.erl @@ -42,13 +42,6 @@ block([{block,Is0}|Is1], Acc) -> block(Is1, norm_block(Is0, Acc)); block([I|Is], Acc) -> block(Is, [I|Acc]); block([], Acc) -> reverse(Acc). -norm_block([{set,[],[],{alloc,R,{_,nostack,_,_}=Alloc}}|Is], Acc0) -> - case insert_alloc_in_bs_init(Acc0, Alloc) of - impossible -> - norm_block(Is, reverse(norm_allocate(Alloc, R), Acc0)); - Acc -> - norm_block(Is, Acc) - end; norm_block([{set,[],[],{alloc,R,Alloc}}|Is], Acc0) -> norm_block(Is, reverse(norm_allocate(Alloc, R), Acc0)); norm_block([{set,[D1],[S],get_hd},{set,[D2],[S],get_tl}|Is], Acc) -> @@ -56,7 +49,7 @@ norm_block([{set,[D1],[S],get_hd},{set,[D2],[S],get_tl}|Is], Acc) -> norm_block(Is, [I|Acc]); norm_block([I|Is], Acc) -> norm_block(Is, [norm(I)|Acc]); norm_block([], Acc) -> Acc. - + norm({set,[D],As,{bif,N,F}}) -> {bif,N,F,As,D}; norm({set,[D],As,{alloc,R,{gc_bif,N,F}}}) -> {gc_bif,N,F,R,As,D}; norm({set,[D],[],init}) -> {init,D}; @@ -90,27 +83,3 @@ norm_allocate({nozero,Ns,0,Inits}, Regs) -> [{allocate,Ns,Regs}|Inits]; norm_allocate({nozero,Ns,Nh,Inits}, Regs) -> [{allocate_heap,Ns,Nh,Regs}|Inits]. - -%% insert_alloc_in_bs_init(ReverseInstructionStream, AllocationInfo) -> -%% impossible | ReverseInstructionStream' -%% A bs_init/6 instruction should not be followed by a test heap instruction. -%% Given the AllocationInfo from a test heap instruction, merge the -%% allocation amounts into the previous bs_init/6 instruction (if any). -%% -insert_alloc_in_bs_init([{bs_put,_,_,_}=I|Is], Alloc) -> - %% The instruction sequence ends with an bs_put/4 instruction. - %% We'll need to search backwards for the bs_init/6 instruction. - insert_alloc_1(Is, Alloc, [I]); -insert_alloc_in_bs_init(_, _) -> impossible. - -insert_alloc_1([{bs_init=Op,Fail,Info0,Live,Ss,Dst}|Is], - {_,nostack,Ws2,[]}, Acc) when is_integer(Live) -> - %% The number of extra heap words is always in the second position - %% in the Info tuple. - Ws1 = element(2, Info0), - Al = beam_utils:combine_heap_needs(Ws1, Ws2), - Info = setelement(2, Info0, Al), - I = {Op,Fail,Info,Live,Ss,Dst}, - reverse(Acc, [I|Is]); -insert_alloc_1([{bs_put,_,_,_}=I|Is], Alloc, Acc) -> - insert_alloc_1(Is, Alloc, [I|Acc]). diff --git a/lib/compiler/src/beam_ssa_codegen.erl b/lib/compiler/src/beam_ssa_codegen.erl index cae9920f40..142df5192f 100644 --- a/lib/compiler/src/beam_ssa_codegen.erl +++ b/lib/compiler/src/beam_ssa_codegen.erl @@ -218,7 +218,7 @@ need_heap_never(_) -> false. need_heap_blks([{L,#cg_blk{is=Is0}=Blk0}|Bs], H0, Acc) -> {Is1,H1} = need_heap_is(reverse(Is0), H0, []), - {Ns,H} = need_heap_terminator(Bs, H1), + {Ns,H} = need_heap_terminator(Bs, L, H1), Is = Ns ++ Is1, Blk = Blk0#cg_blk{is=Is}, need_heap_blks(Bs, H, [{L,Blk}|Acc]); @@ -228,6 +228,13 @@ need_heap_blks([], H, Acc) -> need_heap_is([#cg_alloc{words=Words}=Alloc0|Is], N, Acc) -> Alloc = Alloc0#cg_alloc{words=add_heap_words(N, Words)}, need_heap_is(Is, #need{}, [Alloc|Acc]); +need_heap_is([#cg_set{anno=Anno,op=bs_init}=I0|Is], N, Acc) -> + Alloc = case need_heap_need(N) of + [#cg_alloc{words=Need}] -> alloc(Need); + [] -> 0 + end, + I = I0#cg_set{anno=Anno#{alloc=>Alloc}}, + need_heap_is(Is, #need{}, [I|Acc]); need_heap_is([#cg_set{op=Op,args=Args}=I|Is], N, Acc) -> case classify_heap_need(Op, Args) of {put,Words} -> @@ -243,11 +250,31 @@ need_heap_is([#cg_set{op=Op,args=Args}=I|Is], N, Acc) -> need_heap_is([], N, Acc) -> {Acc,N}. -need_heap_terminator([{_,#cg_blk{last=#cg_br{succ=Same,fail=Same}}}|_], N) -> +need_heap_terminator([{_,#cg_blk{last=#cg_br{succ=L,fail=L}}}|_], L, N) -> + %% Fallthrough. {[],N}; -need_heap_terminator([{_,#cg_blk{}}|_], N) -> +need_heap_terminator([{_,#cg_blk{is=Is,last=#cg_br{succ=L}}}|_], L, N) -> + case need_heap_need(N) of + [] -> + {[],#need{}}; + [_|_]=Alloc -> + %% If the preceding instructions are a binary construction, + %% hoist the allocation and incorporate into the bs_init + %% instruction. + case reverse(Is) of + [#cg_set{op=succeeded},#cg_set{op=bs_init}|_] -> + {[],N}; + [#cg_set{op=bs_put}|_] -> + {[],N}; + _ -> + %% Not binary construction. Must emit an allocation + %% instruction in this block. + {Alloc,#need{}} + end + end; +need_heap_terminator([{_,#cg_blk{}}|_], _, N) -> {need_heap_need(N),#need{}}; -need_heap_terminator([], H) -> +need_heap_terminator([], _, H) -> {need_heap_need(H),#need{}}. need_heap_need(#need{h=0,f=0}) -> []; @@ -1022,12 +1049,13 @@ cg_block([#cg_set{op=bs_init,dst=Dst0,args=Args0,anno=Anno}=I, #cg_set{op=succeeded,dst=Bool}], {Bool,Fail0}, St) -> Fail = bif_fail(Fail0), Line = line(Anno), + Alloc = map_get(alloc, Anno), [#b_literal{val=Kind}|Args1] = Args0, case Kind of new -> [Dst,Size,{integer,Unit}] = beam_args([Dst0|Args1], St), Live = get_live(I), - {[Line|cg_bs_init(Dst, Size, Unit, Live, Fail)],St}; + {[Line|cg_bs_init(Dst, Size, Alloc, Unit, Live, Fail)],St}; private_append -> [Dst,Src,Bits,{integer,Unit}] = beam_args([Dst0|Args1], St), Flags = {field_flags,[]}, @@ -1037,7 +1065,7 @@ cg_block([#cg_set{op=bs_init,dst=Dst0,args=Args0,anno=Anno}=I, [Dst,Src,Bits,{integer,Unit}] = beam_args([Dst0|Args1], St), Flags = {field_flags,[]}, Live = get_live(I), - Is = [Line,{bs_append,Fail,Bits,0,Live,Unit,Src,Flags,Dst}], + Is = [Line,{bs_append,Fail,Bits,Alloc,Live,Unit,Src,Flags,Dst}], {Is,St} end; cg_block([#cg_set{anno=Anno,op=bs_start_match,dst=Ctx0,args=[Bin0]}=I, @@ -1531,13 +1559,13 @@ cg_bs_put(Fail, [{atom,Type},{literal,Flags}|Args]) -> [{Op,Fail,{field_flags,Flags},Src}] end. -cg_bs_init(Dst, Size0, Unit, Live, Fail) -> +cg_bs_init(Dst, Size0, Alloc, Unit, Live, Fail) -> Op = case Unit of 1 -> bs_init_bits; 8 -> bs_init2 end, Size = cg_bs_init_size(Size0), - [{Op,Fail,Size,0,Live,{field_flags,[]},Dst}]. + [{Op,Fail,Size,Alloc,Live,{field_flags,[]},Dst}]. cg_bs_init_size({x,_}=R) -> R; cg_bs_init_size({y,_}=R) -> R; diff --git a/lib/compiler/src/beam_utils.erl b/lib/compiler/src/beam_utils.erl index 686d314c2d..39916a2af8 100644 --- a/lib/compiler/src/beam_utils.erl +++ b/lib/compiler/src/beam_utils.erl @@ -24,13 +24,11 @@ -export([is_killed/3,is_killed_at/3,is_not_used/3, empty_label_index/0,index_label/3,index_labels/1,replace_labels/4, code_at/2,bif_to_test/3,is_pure_test/1, - combine_heap_needs/2, - split_even/1 - ]). + split_even/1]). -export_type([code_index/0,module_code/0,instruction/0]). --import(lists, [flatmap/2,map/2,member/2,sort/1,reverse/1]). +-import(lists, [map/2,member/2,sort/1,reverse/1]). -define(is_const(Val), (Val =:= nil orelse element(1, Val) =:= integer orelse @@ -218,19 +216,6 @@ is_pure_test({test,is_function2,_,[_,_]}) -> true; is_pure_test({test,Op,_,Ops}) -> erl_internal:new_type_test(Op, length(Ops)). -%% combine_heap_needs(HeapNeed1, HeapNeed2) -> HeapNeed -%% Combine the heap need for two allocation instructions. - --type heap_need_tag() :: 'floats' | 'words'. --type heap_need() :: non_neg_integer() | - {'alloc',[{heap_need_tag(),non_neg_integer()}]}. --spec combine_heap_needs(heap_need(), heap_need()) -> heap_need(). - -combine_heap_needs(H1, H2) when is_integer(H1), is_integer(H2) -> - H1 + H2; -combine_heap_needs(H1, H2) -> - {alloc,combine_alloc_lists([H1,H2])}. - %% split_even/1 %% [1,2,3,4,5,6] -> {[1,3,5],[2,4,6]} @@ -734,19 +719,6 @@ label(Old, D, Fb) -> _ -> Fb(Old) end. -%% Help function for combine_heap_needs. - -combine_alloc_lists(Al0) -> - Al1 = flatmap(fun(Words) when is_integer(Words) -> - [{words,Words}]; - ({alloc,List}) -> - List - end, Al0), - Al2 = sofs:relation(Al1), - Al3 = sofs:relation_to_family(Al2), - Al4 = sofs:to_external(Al3), - [{Tag,lists:sum(L)} || {Tag,L} <- Al4]. - %% live_opt/4. split_even([], Ss, Ds) -> -- cgit v1.2.3 From cd1357467759401fbc87d6b39045adbcaf5016d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Thu, 20 Sep 2018 08:05:45 +0200 Subject: Remove the last optimization from beam_flatten It is not necessary to combine get_hd and get_tl instruction to a get_list instruction. It will be done in beam_z. After this change, beam_flatten does nothing more than eliminating the blocks. --- lib/compiler/src/beam_flatten.erl | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'lib/compiler') diff --git a/lib/compiler/src/beam_flatten.erl b/lib/compiler/src/beam_flatten.erl index 07c24abf7b..3e6bc1b1ed 100644 --- a/lib/compiler/src/beam_flatten.erl +++ b/lib/compiler/src/beam_flatten.erl @@ -44,10 +44,8 @@ block([], Acc) -> reverse(Acc). norm_block([{set,[],[],{alloc,R,Alloc}}|Is], Acc0) -> norm_block(Is, reverse(norm_allocate(Alloc, R), Acc0)); -norm_block([{set,[D1],[S],get_hd},{set,[D2],[S],get_tl}|Is], Acc) -> - I = {get_list,S,D1,D2}, - norm_block(Is, [I|Acc]); -norm_block([I|Is], Acc) -> norm_block(Is, [norm(I)|Acc]); +norm_block([I|Is], Acc) -> + norm_block(Is, [norm(I)|Acc]); norm_block([], Acc) -> Acc. norm({set,[D],As,{bif,N,F}}) -> {bif,N,F,As,D}; -- cgit v1.2.3 From 3954ff115258edd18acddd77c526da4f5cb308ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Thu, 20 Sep 2018 08:17:03 +0200 Subject: beam_clean: Use maps and cerl_sets instead of dict and sets Using maps and cerl_sets instead of dict and sets will slightly speed up the beam_clean pass for modules with many functions and/or calls to local functions. --- lib/compiler/src/beam_clean.erl | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'lib/compiler') diff --git a/lib/compiler/src/beam_clean.erl b/lib/compiler/src/beam_clean.erl index f5f0ac2218..7299654476 100644 --- a/lib/compiler/src/beam_clean.erl +++ b/lib/compiler/src/beam_clean.erl @@ -23,17 +23,15 @@ -export([module/2]). -export([clean_labels/1]). --import(lists, [foldl/3]). -spec module(beam_utils:module_code(), [compile:option()]) -> {'ok',beam_utils:module_code()}. module({Mod,Exp,Attr,Fs0,_}, Opts) -> Order = [Lbl || {function,_,_,Lbl,_} <- Fs0], - All = foldl(fun({function,_,_,Lbl,_}=Func,D) -> dict:store(Lbl, Func, D) end, - dict:new(), Fs0), + All = maps:from_list([{Lbl,Func} || {function,_,_,Lbl,_}=Func <- Fs0]), WorkList = rootset(Fs0, Exp, Attr), - Used = find_all_used(WorkList, All, sets:from_list(WorkList)), + Used = find_all_used(WorkList, All, cerl_sets:from_list(WorkList)), Fs1 = remove_unused(Order, Used, All), {Fs2,Lc} = clean_labels(Fs1), Fs = maybe_remove_lines(Fs2, Opts), @@ -55,16 +53,16 @@ rootset(Fs, Root0, Attr) -> %% Remove the unused functions. remove_unused([F|Fs], Used, All) -> - case sets:is_element(F, Used) of + case cerl_sets:is_element(F, Used) of false -> remove_unused(Fs, Used, All); - true -> [dict:fetch(F, All)|remove_unused(Fs, Used, All)] + true -> [map_get(F, All)|remove_unused(Fs, Used, All)] end; remove_unused([], _, _) -> []. - + %% Find all used functions. find_all_used([F|Fs0], All, Used0) -> - {function,_,_,_,Code} = dict:fetch(F, All), + {function,_,_,_,Code} = map_get(F, All), {Fs,Used} = update_work_list(Code, {Fs0,Used0}), find_all_used(Fs, All, Used); find_all_used([], _All, Used) -> Used. @@ -78,9 +76,9 @@ update_work_list([_|Is], Sets) -> update_work_list([], Sets) -> Sets. add_to_work_list(F, {Fs,Used}=Sets) -> - case sets:is_element(F, Used) of + case cerl_sets:is_element(F, Used) of true -> Sets; - false -> {[F|Fs],sets:add_element(F, Used)} + false -> {[F|Fs],cerl_sets:add_element(F, Used)} end. -- cgit v1.2.3 From 8e12d5652d6bb12f18d8a1fe3a1cc0b1ebed4111 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Sun, 23 Sep 2018 13:13:19 +0200 Subject: beam_utils: Fix typo in comment --- lib/compiler/src/beam_utils.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/compiler') diff --git a/lib/compiler/src/beam_utils.erl b/lib/compiler/src/beam_utils.erl index 39916a2af8..65bfb9959a 100644 --- a/lib/compiler/src/beam_utils.erl +++ b/lib/compiler/src/beam_utils.erl @@ -36,7 +36,7 @@ element(1, Val) =:= atom orelse element(1, Val) =:= literal)). -%% instruction() describes all instructions that are used during optimzation +%% instruction() describes all instructions that are used during optimization %% (from beam_a to beam_z). -type instruction() :: atom() | tuple(). -- cgit v1.2.3 From 7318da161d5ed7987c6e524e670219337160abc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Mon, 24 Sep 2018 12:29:09 +0200 Subject: beam_ssa_pre_codegen: Fix bug in sanitization of get_map_element The source map for `get_map_element` is not supposed to be a literal, and the sanitization code is supposed to ensure that. The sanitization incorrectly translated this code: Map = get_tuple_element literal {ok,#{key=>value}}, literal 1 Var = get_map_element Map, literal {a,key} To this code: Var = get_map_element literal #{key=>value}, literal {a,key} Make sure to substitute the arguments for `get_map_element` before looking for a literal source map. --- lib/compiler/src/beam_ssa_pre_codegen.erl | 43 ++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 15 deletions(-) (limited to 'lib/compiler') diff --git a/lib/compiler/src/beam_ssa_pre_codegen.erl b/lib/compiler/src/beam_ssa_pre_codegen.erl index ca3b792ed6..811f3d79e5 100644 --- a/lib/compiler/src/beam_ssa_pre_codegen.erl +++ b/lib/compiler/src/beam_ssa_pre_codegen.erl @@ -574,21 +574,24 @@ sanitize([], Count, Blocks0, Values) -> false -> remove_unreachable(Ls, Blocks, Reachable, []) end,Count}. -sanitize_is([#b_set{op=get_map_element, - args=[#b_literal{}=Map,Key]}=I0|Is], - Count0, Values, _Changed, Acc) -> - {MapVar,Count} = new_var('@ssa_map', Count0), - I = I0#b_set{args=[MapVar,Key]}, - Copy = #b_set{op=copy,dst=MapVar,args=[Map]}, - sanitize_is(Is, Count, Values, true, [I,Copy|Acc]); +sanitize_is([#b_set{op=get_map_element,args=Args0}=I0|Is], + Count0, Values, Changed, Acc) -> + case sanitize_args(Args0, Values) of + [#b_literal{}=Map,Key] -> + %% Bind the literal map to a variable. + {MapVar,Count} = new_var('@ssa_map', Count0), + I = I0#b_set{args=[MapVar,Key]}, + Copy = #b_set{op=copy,dst=MapVar,args=[Map]}, + sanitize_is(Is, Count, Values, true, [I,Copy|Acc]); + [_,_]=Args0 -> + sanitize_is(Is, Count0, Values, Changed, [I0|Acc]); + [_,_]=Args -> + I = I0#b_set{args=Args}, + sanitize_is(Is, Count0, Values, Changed, [I|Acc]) + end; sanitize_is([#b_set{op=Op,dst=Dst,args=Args0}=I0|Is0], - Count, Values, Changed, Acc) -> - Args = map(fun(Var) -> - case Values of - #{Var:=New} -> New; - #{} -> Var - end - end, Args0), + Count, Values, Changed0, Acc) -> + Args = sanitize_args(Args0, Values), case sanitize_instr(Op, Args, I0) of {value,Value0} -> Value = #b_literal{val=Value0}, @@ -596,7 +599,9 @@ sanitize_is([#b_set{op=Op,dst=Dst,args=Args0}=I0|Is0], {ok,I} -> sanitize_is(Is0, Count, Values, true, [I|Acc]); ok -> - sanitize_is(Is0, Count, Values, Changed, [I0|Acc]) + I = I0#b_set{args=Args}, + Changed = Changed0 orelse Args =/= Args0, + sanitize_is(Is0, Count, Values, Changed, [I|Acc]) end; sanitize_is([], Count, Values, Changed, Acc) -> case Changed of @@ -606,6 +611,14 @@ sanitize_is([], Count, Values, Changed, Acc) -> no_change end. +sanitize_args(Args, Values) -> + map(fun(Var) -> + case Values of + #{Var:=New} -> New; + #{} -> Var + end + end, Args). + sanitize_instr({bif,Bif}, [#b_literal{val=Lit}], _I) -> case erl_bifs:is_pure(erlang, Bif, 1) of false -> -- cgit v1.2.3 From 78a6f7fc67991e61d6f62c4056e11c22b7b1387b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Mon, 24 Sep 2018 12:39:25 +0200 Subject: beam_validator: Disallow literal arguments for certain instructions Disallow a literal map source for get_map_elements. There is currently runtime support for get_map with a literal map source, but by forbidding it in OTP 22, the runtime support could be removed in a future release (perhaps OTP 24). Also verify that the source arguments for get_list, get_hd, get_tl, and get_tuple_element are not literals. Literals are not supported for those instructions in the runtime system; verifying it in beam_validator is a convenience so that this kind of bug will be detected already during compilation. --- lib/compiler/src/beam_validator.erl | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'lib/compiler') diff --git a/lib/compiler/src/beam_validator.erl b/lib/compiler/src/beam_validator.erl index e76b097500..68544d97ee 100644 --- a/lib/compiler/src/beam_validator.erl +++ b/lib/compiler/src/beam_validator.erl @@ -459,16 +459,20 @@ valfun_1({try_case,Reg}, #vst{current=#st{ct=[Fail|Fails]}}=Vst0) -> error({bad_type,Type}) end; valfun_1({get_list,Src,D1,D2}, Vst0) -> + assert_not_literal(Src), assert_type(cons, Src, Vst0), Vst = set_type_reg(term, Src, D1, Vst0), set_type_reg(term, Src, D2, Vst); valfun_1({get_hd,Src,Dst}, Vst) -> + assert_not_literal(Src), assert_type(cons, Src, Vst), set_type_reg(term, Src, Dst, Vst); valfun_1({get_tl,Src,Dst}, Vst) -> + assert_not_literal(Src), assert_type(cons, Src, Vst), set_type_reg(term, Src, Dst, Vst); valfun_1({get_tuple_element,Src,I,Dst}, Vst) -> + assert_not_literal(Src), assert_type({tuple_element,I+1}, Src, Vst), set_type_reg(term, Src, Dst, Vst); valfun_1({jump,{f,Lbl}}, Vst) -> @@ -875,6 +879,7 @@ valfun_4(_, _) -> error(unknown_instruction). verify_get_map(Fail, Src, List, Vst0) -> + assert_not_literal(Src), %OTP 22. assert_type(map, Src, Vst0), Vst1 = foldl(fun(D, Vsti) -> case is_reg_defined(D,Vsti) of @@ -1427,6 +1432,10 @@ assert_term(Src, Vst) -> get_term_type(Src, Vst), ok. +assert_not_literal({x,_}) -> ok; +assert_not_literal({y,_}) -> ok; +assert_not_literal(Literal) -> error({literal_not_allowed,Literal}). + %% The possible types. %% %% First non-term types: -- cgit v1.2.3 From 13dd57dd63fee7593f809f1fc77ec91e4646c90c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Mon, 24 Sep 2018 15:23:40 +0200 Subject: Move bif_to_test/3 from beam_utils to beam_ssa_codegen The only caller of bif_to_test/3 is beam_ssa_codegen. --- lib/compiler/src/beam_ssa_codegen.erl | 63 ++++++++++++++++++++++++++++++++++- lib/compiler/src/beam_utils.erl | 40 +--------------------- 2 files changed, 63 insertions(+), 40 deletions(-) (limited to 'lib/compiler') diff --git a/lib/compiler/src/beam_ssa_codegen.erl b/lib/compiler/src/beam_ssa_codegen.erl index 142df5192f..163553d2c9 100644 --- a/lib/compiler/src/beam_ssa_codegen.erl +++ b/lib/compiler/src/beam_ssa_codegen.erl @@ -1207,6 +1207,12 @@ cg_copy_1([#cg_set{dst=Dst0,args=Args}|T], St) -> end; cg_copy_1([], _St) -> []. +-define(IS_LITERAL(Val), (Val =:= nil orelse + element(1, Val) =:= integer orelse + element(1, Val) =:= float orelse + element(1, Val) =:= atom orelse + element(1, Val) =:= literal)). + bif_to_test('and', [V1,V2], Fail) -> [{test,is_eq_exact,Fail,[V1,{atom,true}]}, {test,is_eq_exact,Fail,[V2,{atom,true}]}]; @@ -1220,7 +1226,62 @@ bif_to_test('or', [V1,V2], {f,Lbl}=Fail) when Lbl =/= 0 -> bif_to_test('not', [Var], Fail) -> [{test,is_eq_exact,Fail,[Var,{atom,false}]}]; bif_to_test(Name, Args, Fail) -> - [beam_utils:bif_to_test(Name, Args, Fail)]. + [bif_to_test_1(Name, Args, Fail)]. + +bif_to_test_1(is_atom, [_]=Ops, Fail) -> + {test,is_atom,Fail,Ops}; +bif_to_test_1(is_boolean, [_]=Ops, Fail) -> + {test,is_boolean,Fail,Ops}; +bif_to_test_1(is_binary, [_]=Ops, Fail) -> + {test,is_binary,Fail,Ops}; +bif_to_test_1(is_bitstring,[_]=Ops, Fail) -> + {test,is_bitstr,Fail,Ops}; +bif_to_test_1(is_float, [_]=Ops, Fail) -> + {test,is_float,Fail,Ops}; +bif_to_test_1(is_function, [_]=Ops, Fail) -> + {test,is_function,Fail,Ops}; +bif_to_test_1(is_function, [_,_]=Ops, Fail) -> + {test,is_function2,Fail,Ops}; +bif_to_test_1(is_integer, [_]=Ops, Fail) -> + {test,is_integer,Fail,Ops}; +bif_to_test_1(is_list, [_]=Ops, Fail) -> + {test,is_list,Fail,Ops}; +bif_to_test_1(is_map, [_]=Ops, Fail) -> + {test,is_map,Fail,Ops}; +bif_to_test_1(is_number, [_]=Ops, Fail) -> + {test,is_number,Fail,Ops}; +bif_to_test_1(is_pid, [_]=Ops, Fail) -> + {test,is_pid,Fail,Ops}; +bif_to_test_1(is_port, [_]=Ops, Fail) -> + {test,is_port,Fail,Ops}; +bif_to_test_1(is_reference, [_]=Ops, Fail) -> + {test,is_reference,Fail,Ops}; +bif_to_test_1(is_tuple, [_]=Ops, Fail) -> + {test,is_tuple,Fail,Ops}; +bif_to_test_1('=<', [A,B], Fail) -> + {test,is_ge,Fail,[B,A]}; +bif_to_test_1('>', [A,B], Fail) -> + {test,is_lt,Fail,[B,A]}; +bif_to_test_1('<', [_,_]=Ops, Fail) -> + {test,is_lt,Fail,Ops}; +bif_to_test_1('>=', [_,_]=Ops, Fail) -> + {test,is_ge,Fail,Ops}; +bif_to_test_1('==', [C,A], Fail) when ?IS_LITERAL(C) -> + {test,is_eq,Fail,[A,C]}; +bif_to_test_1('==', [_,_]=Ops, Fail) -> + {test,is_eq,Fail,Ops}; +bif_to_test_1('/=', [C,A], Fail) when ?IS_LITERAL(C) -> + {test,is_ne,Fail,[A,C]}; +bif_to_test_1('/=', [_,_]=Ops, Fail) -> + {test,is_ne,Fail,Ops}; +bif_to_test_1('=:=', [C,A], Fail) when ?IS_LITERAL(C) -> + {test,is_eq_exact,Fail,[A,C]}; +bif_to_test_1('=:=', [_,_]=Ops, Fail) -> + {test,is_eq_exact,Fail,Ops}; +bif_to_test_1('=/=', [C,A], Fail) when ?IS_LITERAL(C) -> + {test,is_ne_exact,Fail,[A,C]}; +bif_to_test_1('=/=', [_,_]=Ops, Fail) -> + {test,is_ne_exact,Fail,Ops}. opt_call_moves(Is0, Arity) -> {Moves0,Is} = splitwith(fun({move,_,_}) -> true; diff --git a/lib/compiler/src/beam_utils.erl b/lib/compiler/src/beam_utils.erl index 65bfb9959a..dc724c880b 100644 --- a/lib/compiler/src/beam_utils.erl +++ b/lib/compiler/src/beam_utils.erl @@ -23,7 +23,7 @@ -module(beam_utils). -export([is_killed/3,is_killed_at/3,is_not_used/3, empty_label_index/0,index_label/3,index_labels/1,replace_labels/4, - code_at/2,bif_to_test/3,is_pure_test/1, + code_at/2,is_pure_test/1, split_even/1]). -export_type([code_index/0,module_code/0,instruction/0]). @@ -156,44 +156,6 @@ code_at(L, Ll) -> replace_labels(Is, Acc, D, Fb) -> replace_labels_1(Is, Acc, D, Fb). -%% bif_to_test(Bif, [Op], Fail) -> {test,Test,Fail,[Op]} -%% Convert a BIF to a test. Fail if not possible. - --spec bif_to_test(atom(), list(), fail()) -> test(). - -bif_to_test(is_atom, [_]=Ops, Fail) -> {test,is_atom,Fail,Ops}; -bif_to_test(is_boolean, [_]=Ops, Fail) -> {test,is_boolean,Fail,Ops}; -bif_to_test(is_binary, [_]=Ops, Fail) -> {test,is_binary,Fail,Ops}; -bif_to_test(is_bitstring,[_]=Ops, Fail) -> {test,is_bitstr,Fail,Ops}; -bif_to_test(is_float, [_]=Ops, Fail) -> {test,is_float,Fail,Ops}; -bif_to_test(is_function, [_]=Ops, Fail) -> {test,is_function,Fail,Ops}; -bif_to_test(is_function, [_,_]=Ops, Fail) -> {test,is_function2,Fail,Ops}; -bif_to_test(is_integer, [_]=Ops, Fail) -> {test,is_integer,Fail,Ops}; -bif_to_test(is_list, [_]=Ops, Fail) -> {test,is_list,Fail,Ops}; -bif_to_test(is_map, [_]=Ops, Fail) -> {test,is_map,Fail,Ops}; -bif_to_test(is_number, [_]=Ops, Fail) -> {test,is_number,Fail,Ops}; -bif_to_test(is_pid, [_]=Ops, Fail) -> {test,is_pid,Fail,Ops}; -bif_to_test(is_port, [_]=Ops, Fail) -> {test,is_port,Fail,Ops}; -bif_to_test(is_reference, [_]=Ops, Fail) -> {test,is_reference,Fail,Ops}; -bif_to_test(is_tuple, [_]=Ops, Fail) -> {test,is_tuple,Fail,Ops}; -bif_to_test('=<', [A,B], Fail) -> {test,is_ge,Fail,[B,A]}; -bif_to_test('>', [A,B], Fail) -> {test,is_lt,Fail,[B,A]}; -bif_to_test('<', [_,_]=Ops, Fail) -> {test,is_lt,Fail,Ops}; -bif_to_test('>=', [_,_]=Ops, Fail) -> {test,is_ge,Fail,Ops}; -bif_to_test('==', [C,A], Fail) when ?is_const(C) -> - {test,is_eq,Fail,[A,C]}; -bif_to_test('==', [_,_]=Ops, Fail) -> {test,is_eq,Fail,Ops}; -bif_to_test('/=', [C,A], Fail) when ?is_const(C) -> - {test,is_ne,Fail,[A,C]}; -bif_to_test('/=', [_,_]=Ops, Fail) -> {test,is_ne,Fail,Ops}; -bif_to_test('=:=', [C,A], Fail) when ?is_const(C) -> - {test,is_eq_exact,Fail,[A,C]}; -bif_to_test('=:=', [_,_]=Ops, Fail) -> {test,is_eq_exact,Fail,Ops}; -bif_to_test('=/=', [C,A], Fail) when ?is_const(C) -> - {test,is_ne_exact,Fail,[A,C]}; -bif_to_test('=/=', [_,_]=Ops, Fail) -> {test,is_ne_exact,Fail,Ops}. - - %% is_pure_test({test,Op,Fail,Ops}) -> true|false. %% Return 'true' if the test instruction does not modify any %% registers and/or bit syntax matching state. -- cgit v1.2.3 From 6ee1de2e3384b3f9a99f756867f18afd8166420c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 26 Sep 2018 12:38:08 +0200 Subject: Move peephole optimization from beam_block to beam_a Moving away this optimization makes beam_block do one thing and one thing only -- creating blocks. --- lib/compiler/src/beam_a.erl | 3 +++ lib/compiler/src/beam_block.erl | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'lib/compiler') diff --git a/lib/compiler/src/beam_a.erl b/lib/compiler/src/beam_a.erl index 0abc845310..dd2537a699 100644 --- a/lib/compiler/src/beam_a.erl +++ b/lib/compiler/src/beam_a.erl @@ -59,6 +59,9 @@ rename_instrs([{test,is_eq_exact,_,[Dst,Src]}=Test, rename_instrs([{test,is_eq_exact,_,[Same,Same]}|Is]) -> %% Same literal or same register. Will always succeed. rename_instrs(Is); +rename_instrs([{loop_rec,{f,Fail},{x,0}},{loop_rec_end,_},{label,Fail}|Is]) -> + %% This instruction sequence does nothing. + rename_instrs(Is); rename_instrs([{apply_last,A,N}|Is]) -> [{apply,A},{deallocate,N},return|rename_instrs(Is)]; rename_instrs([{call_last,A,F,N}|Is]) -> diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl index d28c0fd9e4..9d8d5b2b0c 100644 --- a/lib/compiler/src/beam_block.erl +++ b/lib/compiler/src/beam_block.erl @@ -49,9 +49,6 @@ function({function,Name,Arity,CLabel,Is0}) -> blockify(Is) -> blockify(Is, []). -blockify([{loop_rec,{f,Fail},{x,0}},{loop_rec_end,_Lbl},{label,Fail}|Is], Acc) -> - %% Useless instruction sequence. - blockify(Is, Acc); blockify([I|Is0]=IsAll, Acc) -> case collect(I) of error -> blockify(Is0, [I|Acc]); -- cgit v1.2.3