From ce67e91a851273c8aee58f7dad270c39f18fe0de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 10 Jan 2018 13:09:33 +0100 Subject: beam_block: Reorder element/2 calls in guards In a guard, reorder two consecutive calls to the element/2 BIF that access the same tuple and have the same failure label so that highest index is fetched first. That will allow the second element/2 to be replace with the slightly cheaper get_tuple_element/3 instruction. --- lib/compiler/src/beam_block.erl | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib/compiler') diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl index 570fc05ae5..7b3a95e03e 100644 --- a/lib/compiler/src/beam_block.erl +++ b/lib/compiler/src/beam_block.erl @@ -240,6 +240,10 @@ opt([{set,_,_,{line,_}}=Line1, {set,[D2],[{integer,Idx2},Reg],{bif,element,{f,0}}}=I2|Is]) when Idx1 < Idx2, D1 =/= D2, D1 =/= Reg, D2 =/= Reg -> opt([Line2,I2,Line1,I1|Is]); +opt([{set,[D1],[{integer,Idx1},Reg],{bif,element,{f,L}}}=I1, + {set,[D2],[{integer,Idx2},Reg],{bif,element,{f,L}}}=I2|Is]) + when Idx1 < Idx2, D1 =/= D2, D1 =/= Reg, D2 =/= Reg -> + opt([I2,I1|Is]); opt([{set,Ds0,Ss,Op}|Is0]) -> {Ds,Is} = opt_moves(Ds0, Is0), [{set,Ds,Ss,Op}|opt(Is)]; -- cgit v1.2.3 From 467ef356dbae0b53b824fcb71b3fc6899ced29a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 9 Jan 2018 16:42:21 +0100 Subject: beam_utils: Correct handling of liveness for select_val Since the select_val instruction never transfer directly to the next instruction, the incoming live registers should be ignored. This bug have not caused any problems yet, but it will in the future if we are to run the liveness optimizations again after the optimizations in beam_dead and beam_jump. --- lib/compiler/src/beam_utils.erl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib/compiler') diff --git a/lib/compiler/src/beam_utils.erl b/lib/compiler/src/beam_utils.erl index a2795e625f..bb0f78eafc 100644 --- a/lib/compiler/src/beam_utils.erl +++ b/lib/compiler/src/beam_utils.erl @@ -902,7 +902,8 @@ live_opt([{test,_,Fail,Live,Ss,_}=I|Is], _, D, Acc) -> Regs1 = x_live(Ss, Regs0), Regs = live_join_label(Fail, D, Regs1), live_opt(Is, Regs, D, [I|Acc]); -live_opt([{select,_,Src,Fail,List}=I|Is], Regs0, D, Acc) -> +live_opt([{select,_,Src,Fail,List}=I|Is], _, D, Acc) -> + Regs0 = 0, Regs1 = x_live([Src], Regs0), Regs = live_join_labels([Fail|List], D, Regs1), live_opt(Is, Regs, D, [I|Acc]); -- cgit v1.2.3 From 41aa951ca392e445d1ccae688f5a00662f6ca537 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 10 Jan 2018 05:00:14 +0100 Subject: Prepare beam_utils to run again after beam_split beam_utils:live_opt/1 is currently only run early (from beam_block). Prepare it to be run after beam_split when instructions with failure labels have been taken out of blocks. While we are it, also improve check_liveness/3. That will improve the optimizations in beam_record (replacing tuple matching instructions with an is_tagged_tuple instruction). --- lib/compiler/src/beam_utils.erl | 53 +++++++++++++++++++++++++++++++++-------- 1 file changed, 43 insertions(+), 10 deletions(-) (limited to 'lib/compiler') diff --git a/lib/compiler/src/beam_utils.erl b/lib/compiler/src/beam_utils.erl index bb0f78eafc..2080a79986 100644 --- a/lib/compiler/src/beam_utils.erl +++ b/lib/compiler/src/beam_utils.erl @@ -397,6 +397,8 @@ check_liveness(R, [{jump,{f,F}}|_], St) -> check_liveness_at(R, F, St); check_liveness(R, [{case_end,Used}|_], St) -> check_liveness_ret(R, Used, St); +check_liveness(R, [{try_case_end,Used}|_], St) -> + check_liveness_ret(R, Used, St); check_liveness(R, [{badmatch,Used}|_], St) -> check_liveness_ret(R, Used, St); check_liveness(_, [if_end|_], St) -> @@ -522,16 +524,12 @@ check_liveness(R, [{make_fun2,_,_,_,NumFree}|Is], St) -> {x,_} -> {killed,St}; {y,_} -> not_used(check_liveness(R, Is, St)) end; -check_liveness({x,_}=R, [{'catch',_,_}|Is], St) -> - %% All x registers will be killed if an exception occurs. - %% Therefore we only need to check the liveness for the - %% instructions following the catch instruction. - check_liveness(R, Is, St); -check_liveness({x,_}=R, [{'try',_,_}|Is], St) -> - %% All x registers will be killed if an exception occurs. - %% Therefore we only need to check the liveness for the - %% instructions inside the 'try' block. - check_liveness(R, Is, St); +check_liveness(R, [{'catch'=Op,Y,Fail}|Is], St) -> + Set = {set,[Y],[],{try_catch,Op,Fail}}, + check_liveness(R, [{block,[Set]}|Is], St); +check_liveness(R, [{'try'=Op,Y,Fail}|Is], St) -> + Set = {set,[Y],[],{try_catch,Op,Fail}}, + check_liveness(R, [{block,[Set]}|Is], St); check_liveness(R, [{try_end,Y}|Is], St) -> case R of Y -> @@ -592,6 +590,12 @@ check_liveness(R, [{get_map_elements,{f,Fail},S,{list,L}}|Is], St0) -> check_liveness(R, [{put_map,F,Op,S,D,Live,{list,Puts}}|Is], St) -> Set = {set,[D],[S|Puts],{alloc,Live,{put_map,Op,F}}}, check_liveness(R, [{block,[Set]}||Is], St); +check_liveness(R, [{put_tuple,Ar,D}|Is], St) -> + Set = {set,[D],[],{put_tuple,Ar}}, + check_liveness(R, [{block,[Set]}||Is], St); +check_liveness(R, [{put_list,S1,S2,D}|Is], St) -> + Set = {set,[D],[S1,S2],put_list}, + check_liveness(R, [{block,[Set]}||Is], St); check_liveness(R, [{test_heap,N,Live}|Is], St) -> I = {block,[{set,[],[],{alloc,Live,{nozero,nostack,N,[]}}}]}, check_liveness(R, [I|Is], St); @@ -605,6 +609,12 @@ check_liveness(R, [remove_message|Is], St) -> check_liveness(R, Is, St); check_liveness({x,X}, [build_stacktrace|_], St) when X > 0 -> {killed,St}; +check_liveness(R, [{recv_mark,_}|Is], St) -> + check_liveness(R, Is, St); +check_liveness(R, [{recv_set,_}|Is], St) -> + check_liveness(R, Is, St); +check_liveness(R, [{'%',_}|Is], St) -> + check_liveness(R, Is, St); check_liveness(_R, Is, St) when is_list(Is) -> %% Not implemented. Conservatively assume that the register is used. {used,St}. @@ -926,6 +936,25 @@ live_opt([{get_map_elements,Fail,Src,{list,List}}=I|Is], Regs0, D, Acc) -> Regs1 = x_live([Src|Ss], x_dead(Ds, Regs0)), Regs = live_join_label(Fail, D, Regs1), live_opt(Is, Regs, D, [I|Acc]); +live_opt([{gc_bif,N,F,R,As,Dst}=I|Is], Regs0, D, Acc) -> + Bl = [{set,[Dst],As,{alloc,R,{gc_bif,N,F}}}], + {_,Regs} = live_opt_block(Bl, Regs0, D, []), + live_opt(Is, Regs, D, [I|Acc]); +live_opt([{bif,N,F,As,Dst}=I|Is], Regs0, D, Acc) -> + Bl = [{set,[Dst],As,{bif,N,F}}], + {_,Regs} = live_opt_block(Bl, Regs0, D, []), + live_opt(Is, Regs, D, [I|Acc]); +live_opt([{get_tuple_element,Src,Idx,Dst}=I|Is], Regs0, D, Acc) -> + Bl = [{set,[Dst],[Src],{get_tuple_element,Idx}}], + {_,Regs} = live_opt_block(Bl, Regs0, D, []), + live_opt(Is, Regs, D, [I|Acc]); +live_opt([{move,Src,Dst}=I|Is], Regs0, D, Acc) -> + Regs = x_live([Src], x_dead([Dst], Regs0)), + live_opt(Is, Regs, D, [I|Acc]); +live_opt([{put_map,F,Op,S,Dst,R,{list,Puts}}=I|Is], Regs0, D, Acc) -> + Bl = [{set,[Dst],[S|Puts],{alloc,R,{put_map,Op,F}}}], + {_,Regs} = live_opt_block(Bl, Regs0, D, []), + live_opt(Is, Regs, D, [I|Acc]); %% Transparent instructions - they neither use nor modify x registers. live_opt([{deallocate,_}=I|Is], Regs, D, Acc) -> @@ -942,6 +971,10 @@ live_opt([{wait_timeout,_,{Tag,_}}=I|Is], Regs, D, Acc) when Tag =/= x -> live_opt(Is, Regs, D, [I|Acc]); live_opt([{line,_}=I|Is], Regs, D, Acc) -> live_opt(Is, Regs, D, [I|Acc]); +live_opt([{'catch',_,_}=I|Is], Regs, D, Acc) -> + live_opt(Is, Regs, D, [I|Acc]); +live_opt([{'try',_,_}=I|Is], Regs, D, Acc) -> + live_opt(Is, Regs, D, [I|Acc]); %% The following instructions can occur if the "compilation" has been %% started from a .S file using the 'from_asm' option. -- cgit v1.2.3 From 114f6de068f1cf5b0cf0cf1aacf2ff9a7f1f3650 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 10 Jan 2018 07:38:12 +0100 Subject: beam_bsm: Insert introduced 'move' instructions into block If possible, when adding move/2 instructions, try to insert them into a block. That could potentially allow them to be optimized. --- lib/compiler/src/beam_bsm.erl | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'lib/compiler') diff --git a/lib/compiler/src/beam_bsm.erl b/lib/compiler/src/beam_bsm.erl index 9ea5a3eb92..9f3b9d788f 100644 --- a/lib/compiler/src/beam_bsm.erl +++ b/lib/compiler/src/beam_bsm.erl @@ -124,20 +124,21 @@ btb_opt_1([{test,bs_get_binary2,F,_,[Reg,{atom,all},U,Fs],Reg}=I0|Is], D, Acc0) end, btb_opt_1(Is, D, Acc) end; -btb_opt_1([{test,bs_get_binary2,F,_,[Ctx,{atom,all},U,Fs],Dst}=I0|Is], D, Acc0) -> - case btb_reaches_match(Is, [Ctx,Dst], D) of +btb_opt_1([{test,bs_get_binary2,F,_,[Ctx,{atom,all},U,Fs],Dst}=I0|Is0], D, Acc0) -> + case btb_reaches_match(Is0, [Ctx,Dst], D) of {error,Reason} -> Comment = btb_comment_no_opt(Reason, Fs), - btb_opt_1(Is, D, [Comment,I0|Acc0]); + btb_opt_1(Is0, D, [Comment,I0|Acc0]); {ok,MustSave} when U =:= 1 -> Comment = btb_comment_opt(Fs), - Acc1 = btb_gen_save(MustSave, Ctx, [Comment|Acc0]), - Acc = [{move,Ctx,Dst}|Acc1], + Acc = btb_gen_save(MustSave, Ctx, [Comment|Acc0]), + Is = prepend_move(Ctx, Dst, Is0), btb_opt_1(Is, D, Acc); {ok,MustSave} -> Comment = btb_comment_opt(Fs), Acc1 = btb_gen_save(MustSave, Ctx, [Comment|Acc0]), - Acc = [{move,Ctx,Dst},{test,bs_test_unit,F,[Ctx,U]}|Acc1], + Acc = [{test,bs_test_unit,F,[Ctx,U]}|Acc1], + Is = prepend_move(Ctx, Dst, Is0), btb_opt_1(Is, D, Acc) end; btb_opt_1([I|Is], D, Acc) -> @@ -150,6 +151,12 @@ btb_gen_save(true, Reg, Acc) -> [{bs_save2,Reg,{atom,start}}|Acc]; btb_gen_save(false, _, Acc) -> Acc. +prepend_move(Ctx, Dst, [{block,Bl0}|Is]) -> + Bl = [{set,[Dst],[Ctx],move}|Bl0], + [{block,Bl}|Is]; +prepend_move(Ctx, Dst, Is) -> + [{move,Ctx,Dst}|Is]. + %% btb_reaches_match([Instruction], [Register], D) -> %% {ok,MustSave}|{error,Reason} %% -- cgit v1.2.3 From 1a029efd1ad47f5736faa7f7be6780b649a8b257 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 10 Jan 2018 07:02:21 +0100 Subject: Run beam_block again after other optimizations have been run Running beam_block again after the other optimizations have run will give it more opportunities for optimizations. In particular, more allocate_zero/2 instructions can be turned into allocate/2 instructions, and more get_tuple_element/3 instructions can store the retrieved value into the correct register at once. Out of a sample of about 700 modules in OTP, 64 modules were improved by this commit. --- lib/compiler/src/beam_block.erl | 16 +++++++++++----- lib/compiler/src/compile.erl | 6 ++++++ 2 files changed, 17 insertions(+), 5 deletions(-) (limited to 'lib/compiler') diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl index 7b3a95e03e..488485c17a 100644 --- a/lib/compiler/src/beam_block.erl +++ b/lib/compiler/src/beam_block.erl @@ -28,15 +28,21 @@ -spec module(beam_utils:module_code(), [compile:option()]) -> {'ok',beam_utils:module_code()}. -module({Mod,Exp,Attr,Fs0,Lc}, _Opt) -> - Fs = [function(F) || F <- Fs0], +module({Mod,Exp,Attr,Fs0,Lc}, Opts) -> + Blockify = not member(no_blockify, Opts), + Fs = [function(F, Blockify) || F <- Fs0], {ok,{Mod,Exp,Attr,Fs,Lc}}. -function({function,Name,Arity,CLabel,Is0}) -> +function({function,Name,Arity,CLabel,Is0}, Blockify) -> try %% Collect basic blocks and optimize them. - Is1 = blockify(Is0), - Is2 = embed_lines(Is1), + Is2 = case Blockify of + true -> + Is1 = blockify(Is0), + embed_lines(Is1); + false -> + Is0 + end, Is3 = beam_utils:anno_defs(Is2), Is4 = move_allocates(Is3), Is5 = beam_utils:live_opt(Is4), diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl index 770aa2c6c1..1409c358c2 100644 --- a/lib/compiler/src/compile.erl +++ b/lib/compiler/src/compile.erl @@ -775,6 +775,8 @@ asm_passes() -> {iff,drecv,{listing,"recv"}}, {unless,no_record_opt,{pass,beam_record}}, {iff,drecord,{listing,"record"}}, + {unless,no_blk2,?pass(block2)}, + {iff,dblk2,{listing,"block2"}}, {unless,no_stack_trimming,{pass,beam_trim}}, {iff,dtrim,{listing,"trim"}}, {pass,beam_flatten}]}, @@ -1350,6 +1352,10 @@ v3_kernel(Code0, #compile{options=Opts,warnings=Ws0}=St) -> {ok,Code,St} end. +block2(Code0, #compile{options=Opts}=St) -> + {ok,Code} = beam_block:module(Code0, [no_blockify|Opts]), + {ok,Code,St}. + test_old_inliner(#compile{options=Opts}) -> %% The point of this test is to avoid loading the old inliner %% if we know that it will not be used. -- cgit v1.2.3