From 0009fc378109f870472ded41d29c5d81febd5990 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Sat, 3 Nov 2018 07:12:10 +0100 Subject: Add get_map_element to beam_ssa:no_side_effect/1 The `get_map_element` instruction has no side effects, and should be removed if its value is not used. --- lib/compiler/src/beam_ssa.erl | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/compiler/src/beam_ssa.erl b/lib/compiler/src/beam_ssa.erl index c5e23d2ae0..b491e340b7 100644 --- a/lib/compiler/src/beam_ssa.erl +++ b/lib/compiler/src/beam_ssa.erl @@ -194,6 +194,7 @@ no_side_effect(#b_set{op=Op}) -> extract -> true; get_hd -> true; get_tl -> true; + get_map_element -> true; get_tuple_element -> true; has_map_field -> true; is_nonempty_list -> true; -- cgit v1.2.3 From 667e0e1213f69344a57c2eb0427d36cd240e16a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Mon, 5 Nov 2018 07:35:02 +0100 Subject: beam_ssa_codegen: Improve optimization of allocate instructions There could be `allocate_zero` instructions where `allocate` would suffice or superfluous `init` instructions because all possible initializations of Y registers were not taken into account. While at it, also add some more comments. --- lib/compiler/src/beam_ssa_codegen.erl | 39 +++++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/lib/compiler/src/beam_ssa_codegen.erl b/lib/compiler/src/beam_ssa_codegen.erl index 3c14062d0b..d3facc5911 100644 --- a/lib/compiler/src/beam_ssa_codegen.erl +++ b/lib/compiler/src/beam_ssa_codegen.erl @@ -747,7 +747,16 @@ need_live_anno(Op) -> end. %%% -%%% Add annotations for defined Y registers. +%%% Add the following annotations for Y registers: +%%% +%%% def_yregs An ordset with variables that refer to live Y registers. +%%% That is, Y registers that that have been killed +%%% are not included. This annotation is added to all +%%% instructions that require Y registers to be initialized. +%%% +%%% kill_yregs This annotation is added to call instructions. It is +%%% an ordset containing variables referring to Y registers +%%% that will no longer be used after the call instruction. %%% defined(Linear, #cg{regs=Regs}) -> @@ -863,13 +872,35 @@ opt_allocate(Linear, #cg{regs=Regs}) -> opt_allocate_1([{L,#cg_blk{is=[#cg_alloc{stack=Stk}=I0|Is]}=Blk0}|Bs]=Bs0, Regs) when is_integer(Stk) -> - Yregs = opt_alloc_def(Bs0, gb_sets:singleton(L), []), - I = I0#cg_alloc{def_yregs=Yregs}, - [{L,Blk0#cg_blk{is=[I|Is]}}|opt_allocate_1(Bs, Regs)]; + %% Collect the variables that are initialized by copy + %% instruction in this block. + case ordsets:from_list(opt_allocate_defs(Is, Regs)) of + Yregs when length(Yregs) =:= Stk -> + %% Those copy instructions are sufficient to fully + %% initialize the stack frame. + I = I0#cg_alloc{def_yregs=Yregs}, + [{L,Blk0#cg_blk{is=[I|Is]}}|opt_allocate_1(Bs, Regs)]; + Yregs0 -> + %% Determine a conservative approximation of the Y + %% registers that are guaranteed to be initialized by all + %% successors of this block, and to it add the variables + %% initialized by copy instructions in this block. + Yregs1 = opt_alloc_def(Bs0, gb_sets:singleton(L), []), + Yregs = ordsets:union(Yregs0, Yregs1), + I = I0#cg_alloc{def_yregs=Yregs}, + [{L,Blk0#cg_blk{is=[I|Is]}}|opt_allocate_1(Bs, Regs)] + end; opt_allocate_1([B|Bs], Regs) -> [B|opt_allocate_1(Bs, Regs)]; opt_allocate_1([], _) -> []. +opt_allocate_defs([#cg_set{op=copy,dst=Dst}|Is], Regs) -> + case is_yreg(Dst, Regs) of + true -> [Dst|opt_allocate_defs(Is, Regs)]; + false -> [] + end; +opt_allocate_defs(_, _Regs) -> []. + opt_alloc_def([{L,#cg_blk{is=Is,last=Last}}|Bs], Ws0, Def0) -> case gb_sets:is_member(L, Ws0) of false -> -- cgit v1.2.3 From b4595a670b159c802489d94d8537a48567967927 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Sun, 18 Nov 2018 06:21:18 +0100 Subject: beam_ssa_pre_codegen: Improve reuse of Y registers Enhance the copy_retval/1 optimization to allow Y registers to be reused in more circumstances. Reusing Y register can often reduce the size of the stack frame. --- lib/compiler/src/beam_ssa_pre_codegen.erl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/compiler/src/beam_ssa_pre_codegen.erl b/lib/compiler/src/beam_ssa_pre_codegen.erl index c60c6da9ea..32232e9b9f 100644 --- a/lib/compiler/src/beam_ssa_pre_codegen.erl +++ b/lib/compiler/src/beam_ssa_pre_codegen.erl @@ -1478,6 +1478,10 @@ copy_retval_is([#b_set{op=put_tuple_elements,args=Args0}=I0], false, _Yregs, Copy, Count, Acc) -> I = I0#b_set{args=copy_sub_args(Args0, Copy)}, {reverse(Acc, [I|acc_copy([], Copy)]),Count}; +copy_retval_is([#b_set{op=Op}=I0], false, Yregs, Copy, Count0, Acc0) + when Op =:= call; Op =:= make_fun -> + {I,Count,Acc} = place_retval_copy(I0, Yregs, Copy, Count0, Acc0), + {reverse(Acc, [I]),Count}; copy_retval_is([#b_set{}]=Is, false, _Yregs, Copy, Count, Acc) -> {reverse(Acc, acc_copy(Is, Copy)),Count}; copy_retval_is([#b_set{},#b_set{op=succeeded}]=Is, false, _Yregs, Copy, Count, Acc) -> -- cgit v1.2.3 From e79e5f194577d6c5b289415ec84338f70d2486db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Mon, 29 Oct 2018 16:31:00 +0100 Subject: Sort move instructions on the Y register Sort sequences of `move` instructions on the Y register. When moving from X registers to Y registers, having the instructions sorted on Y registers give the loader more opportunities to use `move_window{3,4,5}` instructions. For examples, the following five instructions: move_xy x(2) y(0) move_xy x(1) y(1) move_xy x(0) y(2) move_xy x(5) y(3) move_xy x(4) y(4) can be replaced with: move_window5_xxxxxy x(2) x(1) x(0) x(5) x(4) y(0) When the Y registers are not ordered so that `move_window5` can be used, the loader would typically combine the first three moves to a `move3_xyxyxy` instruction and the last two moves to a `move2_par_xyxy` instruction. When moving from Y registers to X registers, sorting on the Y registers could potentially be more cache-friendly. It could also be worthwhile investigating a new `move_window` instruction in the BEAM interpreter that could move values from contiguous Y registers to X registers. Note that `scripts/diffable` can generate diffable dissambly files for the loaded BEAM code: $ scripts/diffable --dis 0 $ scripts/diffable --dis 1 $ diff -u 0 1 --- lib/compiler/src/beam_block.erl | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl index 9d8d5b2b0c..707974b2c1 100644 --- a/lib/compiler/src/beam_block.erl +++ b/lib/compiler/src/beam_block.erl @@ -22,7 +22,7 @@ -module(beam_block). -export([module/2]). --import(lists, [reverse/1,splitwith/2]). +-import(lists, [keysort/2,reverse/1,splitwith/2]). -spec module(beam_utils:module_code(), [compile:option()]) -> {'ok',beam_utils:module_code()}. @@ -53,7 +53,8 @@ blockify([I|Is0]=IsAll, Acc) -> case collect(I) of error -> blockify(Is0, [I|Acc]); Instr when is_tuple(Instr) -> - {Block,Is} = collect_block(IsAll), + {Block0,Is} = collect_block(IsAll), + Block = sort_moves(Block0), blockify(Is, [{block,Block}|Acc]) end; blockify([], Acc) -> reverse(Acc). @@ -117,3 +118,36 @@ embed_lines([{block,B1},{line,_}=Line|T], Acc) -> embed_lines([I|Is], Acc) -> embed_lines(Is, [I|Acc]); embed_lines([], Acc) -> Acc. + +%% sort_moves([Instruction]) -> [Instruction]. +%% Sort move instructions on the Y register to give the loader +%% more opportunities for combining instructions. + +sort_moves([{set,[{x,_}],[{y,_}],move}=I|Is0]) -> + {Moves,Is} = sort_moves_1(Is0, x, y, [I]), + Moves ++ sort_moves(Is); +sort_moves([{set,[{y,_}],[{x,_}],move}=I|Is0]) -> + {Moves,Is} = sort_moves_1(Is0, y, x, [I]), + Moves ++ sort_moves(Is); +sort_moves([I|Is]) -> + [I|sort_moves(Is)]; +sort_moves([]) -> []. + +sort_moves_1([{set,[{x,0}],[_],move}=I|Is], _DTag, _STag, Acc) -> + %% The loader sometimes combines a move to x0 with the + %% instruction that follows, producing, for example, a move_call + %% instruction. Therefore, we don't want include this move + %% instruction in the sorting. + {sort_on_yreg(Acc)++[I],Is}; +sort_moves_1([{set,[{DTag,_}],[{STag,_}],move}=I|Is], DTag, STag, Acc) -> + sort_moves_1(Is, DTag, STag, [I|Acc]); +sort_moves_1(Is, _DTag, _STag, Acc) -> + {sort_on_yreg(Acc),Is}. + +sort_on_yreg([{set,[Dst],[Src],move}|_]=Moves) -> + case {Dst,Src} of + {{y,_},{x,_}} -> + keysort(2, Moves); + {{x,_},{y,_}} -> + keysort(3, Moves) + end. -- cgit v1.2.3