From b4030b60b58a681b2dea5453fbc36f7f4cc41bd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 7 Jul 2015 15:21:30 +0200 Subject: Teach the compiler the 'da' and 'dz' options Add the 'da' option to create a list after the beam_a pass. Seeing how the code looks after beam_a, but before the blocks have been established, is sometimes useful. For symmetry, add the 'dz' option, even though it is just a synonym for 'S'. --- lib/compiler/src/compile.erl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl index e0a29fe9b1..cf79fdc9f9 100644 --- a/lib/compiler/src/compile.erl +++ b/lib/compiler/src/compile.erl @@ -671,6 +671,7 @@ asm_passes() -> %% Assembly level optimisations. [{delay, [{pass,beam_a}, + {iff,da,{listing,"a"}}, {unless,no_postopt, [{pass,beam_block}, {iff,dblk,{listing,"block"}}, @@ -703,6 +704,7 @@ asm_passes() -> {iff,no_postopt,[{pass,beam_clean}]}, {pass,beam_z}, + {iff,dz,{listing,"z"}}, {iff,dopt,{listing,"optimize"}}, {iff,'S',{listing,"S"}}, {iff,'to_asm',{done,"S"}}]}, -- cgit v1.2.3 From f3cc9d1b16d86124a70f5dd0609113ec6e3b4dcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 5 Aug 2015 19:35:45 +0200 Subject: beam_block: Eliminate redundant wasteful call to opt/1 opt_alloc/1 makes a redundant call to opt/1. It is redundant because the opt/1 function has already been applied to the instruction sequence prior to calling opt_alloc/1. --- lib/compiler/src/beam_block.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl index 2def3de7f3..4861a2d7ec 100644 --- a/lib/compiler/src/beam_block.erl +++ b/lib/compiler/src/beam_block.erl @@ -346,7 +346,7 @@ is_transparent(_, _) -> false. %% Optimises all allocate instructions. opt_alloc([{set,[],[],{alloc,R,{_,Ns,Nh,[]}}}|Is]) -> - [{set,[],[],opt_alloc(Is, Ns, Nh, R)}|opt(Is)]; + [{set,[],[],opt_alloc(Is, Ns, Nh, R)}|Is]; opt_alloc([I|Is]) -> [I|opt_alloc(Is)]; opt_alloc([]) -> []. -- cgit v1.2.3 From 7a47b20c3accf323bdbf7bcf9c86fbf8b2c18e20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 22 Jul 2015 09:45:04 +0200 Subject: beam_block: Clean up optimization of move optimizations The 'move' optimization was relatively clean until GC BIFs were introduced. Instead of re-thinking the implementation, the existing code was fixed and patched. The current code unsuccessfully attempts to eliminate 'move' instructions across GC BIF and allocation instructions. We can simplify the code if we give up as soon as we encounter any instruction that allocates. --- lib/compiler/src/beam_block.erl | 83 ++++++++++++++++++----------------------- 1 file changed, 37 insertions(+), 46 deletions(-) diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl index 4861a2d7ec..58e8f77a2c 100644 --- a/lib/compiler/src/beam_block.erl +++ b/lib/compiler/src/beam_block.erl @@ -287,60 +287,51 @@ opt_moves(Ds, Is) -> %% If there is a {move,Dest,FinalDest} instruction %% in the instruction stream, remove the move instruction %% and let FinalDest be the destination. -%% -%% For this optimization to be safe, we must be sure that -%% Dest will not be referenced in any other by other instructions -%% in the rest of the instruction stream. Not even the indirect -%% reference by an instruction that may allocate (such as -%% test_heap/2 or a GC Bif) is allowed. opt_move(Dest, Is) -> - opt_move_1(Dest, Is, ?MAXREG, []). - -opt_move_1(R, [{set,_,_,{alloc,Live,_}}|_]=Is, SafeRegs, Acc) when Live < SafeRegs -> - %% Downgrade number of safe regs and rescan the instruction, as it most probably - %% is a gc_bif instruction. - opt_move_1(R, Is, Live, Acc); -opt_move_1(R, [{set,[{x,X}=D],[R],move}|Is], SafeRegs, Acc) -> - case X < SafeRegs andalso beam_utils:is_killed_block(R, Is) of - true -> opt_move_2(D, Acc, Is); - false -> not_possible - end; -opt_move_1(R, [{set,[D],[R],move}|Is], _SafeRegs, Acc) -> + opt_move_1(Dest, Is, []). + +opt_move_1(R, [{set,[D],[R],move}|Is], Acc) -> + %% Check whether this instruction can be safely eliminated. + %% First check that the source (R) is not used by the + %% instructions that follow. case beam_utils:is_killed_block(R, Is) of - true -> opt_move_2(D, Acc, Is); + true -> opt_move_rev(D, Acc, Is); false -> not_possible end; -opt_move_1(R, [I|Is], SafeRegs, Acc) -> - case is_transparent(R, I) of - false -> not_possible; - true -> opt_move_1(R, Is, SafeRegs, [I|Acc]) - end. +opt_move_1({x,_}, [{set,_,_,{alloc,_,_}}|_], _) -> + %% The optimization is not possible. If the X register is not + %% killed by allocation, the optimization would not be safe. + %% If the X register is killed, it means that there cannot + %% follow a 'move' instruction with this X register as the + %% source. + not_possible; +opt_move_1(R, [{set,_,_,_}=I|Is], Acc) -> + %% If the source register is either killed or used by this + %% instruction, the optimimization is not possible. + case is_killed_or_used(R, I) of + true -> not_possible; + false -> opt_move_1(R, Is, [I|Acc]) + end; +opt_move_1(_, _, _) -> + not_possible. -%% Reverse the instructions, while checking that there are no instructions that -%% would interfere with using the new destination register chosen. +%% Reverse the instructions, while checking that there are no +%% instructions that would interfere with using the new destination +%% register (D). -opt_move_2(D, [I|Is], Acc) -> - case is_transparent(D, I) of - false -> not_possible; - true -> opt_move_2(D, Is, [I|Acc]) - end; -opt_move_2(D, [], Acc) -> {D,Acc}. - -%% is_transparent(Register, Instruction) -> true | false -%% Returns true if Instruction does not in any way references Register -%% (even indirectly by an allocation instruction). -%% Returns false if Instruction does reference Register, or we are -%% not sure. - -is_transparent({x,X}, {set,_,_,{alloc,Live,_}}) when X < Live -> - false; -is_transparent(R, {set,Ds,Ss,_Op}) -> - case member(R, Ds) of - true -> false; - false -> not member(R, Ss) +opt_move_rev(D, [I|Is], Acc) -> + case is_killed_or_used(D, I) of + true -> not_possible; + false -> opt_move_rev(D, Is, [I|Acc]) end; -is_transparent(_, _) -> false. +opt_move_rev(D, [], Acc) -> {D,Acc}. + +%% is_killed_or_used(Register, {set,_,_,_}) -> bool() +%% Test whether the register is used by the instruction. + +is_killed_or_used(R, {set,Ss,Ds,_}) -> + member(R, Ds) orelse member(R, Ss). %% opt_alloc(Instructions) -> Instructions' %% Optimises all allocate instructions. -- cgit v1.2.3 From 02d6135813dc133e018c161d803a642582aac36f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 22 Jul 2015 10:20:59 +0200 Subject: beam_block: Improve the move optimizations Here is an example of a move instruction that could not be optimized away because the {x,2} register was not killed: get_tuple_element Reg Pos {x,2} . . . move {x,2} {y,0} put_list {x,2} nil Any We can do the optimization if we replace all occurrences of the {x,2} register as a source with {y,0}: get_tuple_element Reg Pos {y,0} . . . put_list {y,0} nil Dst --- lib/compiler/src/beam_block.erl | 53 +++++++++++++++++++++++++++++++++++------ 1 file changed, 46 insertions(+), 7 deletions(-) diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl index 58e8f77a2c..af1bf8fd08 100644 --- a/lib/compiler/src/beam_block.erl +++ b/lib/compiler/src/beam_block.erl @@ -291,13 +291,12 @@ opt_moves(Ds, Is) -> opt_move(Dest, Is) -> opt_move_1(Dest, Is, []). -opt_move_1(R, [{set,[D],[R],move}|Is], Acc) -> - %% Check whether this instruction can be safely eliminated. - %% First check that the source (R) is not used by the - %% instructions that follow. - case beam_utils:is_killed_block(R, Is) of - true -> opt_move_rev(D, Acc, Is); - false -> not_possible +opt_move_1(R, [{set,[D],[R],move}|Is0], Acc) -> + %% Provided that the source register is killed by instructions + %% that follow, the optimization is safe. + case eliminate_use_of_from_reg(Is0, R, D, []) of + {yes,Is} -> opt_move_rev(D, Acc, Is); + no -> not_possible end; opt_move_1({x,_}, [{set,_,_,{alloc,_,_}}|_], _) -> %% The optimization is not possible. If the X register is not @@ -333,6 +332,46 @@ opt_move_rev(D, [], Acc) -> {D,Acc}. is_killed_or_used(R, {set,Ss,Ds,_}) -> member(R, Ds) orelse member(R, Ss). +%% eliminate_use_of_from_reg([Instruction], FromRegister, ToRegister, Acc) -> +%% {yes,Is} | no +%% Eliminate any use of FromRegister in the instruction sequence +%% by replacing uses of FromRegister with ToRegister. If FromRegister +%% is referenced by an allocation instruction, return 'no' to indicate +%% that FromRegister is still used and that the optimization is not +%% possible. + +eliminate_use_of_from_reg([{set,_,_,{alloc,Live,_}}|_]=Is0, {x,X}, _, Acc) -> + if + X < Live -> + no; + true -> + {yes,reverse(Acc, Is0)} + end; +eliminate_use_of_from_reg([{set,Ds,Ss0,Op}=I0|Is], From, To, Acc) -> + I = case member(From, Ss0) of + true -> + Ss = [case S of + From -> To; + _ -> S + end || S <- Ss0], + {set,Ds,Ss,Op}; + false -> + I0 + end, + case member(From, Ds) of + true -> + {yes,reverse(Acc, [I|Is])}; + false -> + eliminate_use_of_from_reg(Is, From, To, [I|Acc]) + end; +eliminate_use_of_from_reg([I]=Is, From, _To, Acc) -> + case beam_utils:is_killed_block(From, [I]) of + true -> + {yes,reverse(Acc, Is)}; + false -> + no + end. + %% opt_alloc(Instructions) -> Instructions' %% Optimises all allocate instructions. -- cgit v1.2.3 From 5f431276f1044c673c2e434e003e2f1ffddab341 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Thu, 23 Jul 2015 17:43:15 +0200 Subject: Optimize get_tuple_element instructions by moving them forward --- lib/compiler/src/beam_block.erl | 53 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl index af1bf8fd08..49e66b379b 100644 --- a/lib/compiler/src/beam_block.erl +++ b/lib/compiler/src/beam_block.erl @@ -183,7 +183,9 @@ opt_blocks([I|Is]) -> opt_blocks([]) -> []. opt_block(Is0) -> - Is = find_fixpoint(fun opt/1, Is0), + Is = find_fixpoint(fun(Is) -> + opt_tuple_element(opt(Is)) + end, Is0), opt_alloc(Is). find_fixpoint(OptFun, Is0) -> @@ -315,6 +317,55 @@ opt_move_1(R, [{set,_,_,_}=I|Is], Acc) -> opt_move_1(_, _, _) -> not_possible. +%% opt_tuple_element([Instruction]) -> [Instruction] +%% If possible, move get_tuple_element instructions forward +%% in the instruction stream to a move instruction, eliminating +%% the move instruction. Example: +%% +%% get_tuple_element Tuple Pos Dst1 +%% ... +%% move Dst1 Dst2 +%% +%% This code may be possible to rewrite to: +%% +%% %%(Moved get_tuple_element instruction) +%% ... +%% get_tuple_element Tuple Pos Dst2 +%% + +opt_tuple_element([{set,[D],[S],{get_tuple_element,_}}=I|Is0]) -> + case opt_tuple_element_1(Is0, I, {S,D}, []) of + no -> + [I|opt_tuple_element(Is0)]; + {yes,Is} -> + opt_tuple_element(Is) + end; +opt_tuple_element([I|Is]) -> + [I|opt_tuple_element(Is)]; +opt_tuple_element([]) -> []. + +opt_tuple_element_1([{set,_,_,{alloc,_,_}}|_], _, _, _) -> + no; +opt_tuple_element_1([{set,_,_,{'catch',_}}|_], _, _, _) -> + no; +opt_tuple_element_1([{set,[D],[S],move}|Is0], I0, {_,S}, Acc) -> + case eliminate_use_of_from_reg(Is0, S, D, []) of + no -> + no; + {yes,Is} -> + {set,[S],Ss,Op} = I0, + I = {set,[D],Ss,Op}, + {yes,reverse(Acc, [I|Is])} + end; +opt_tuple_element_1([{set,Ds,Ss,_}=I|Is], MovedI, {S,D}=Regs, Acc) -> + case member(S, Ds) orelse member(D, Ss) of + true -> + no; + false -> + opt_tuple_element_1(Is, MovedI, Regs, [I|Acc]) + end; +opt_tuple_element_1(_, _, _, _) -> no. + %% Reverse the instructions, while checking that there are no %% instructions that would interfere with using the new destination %% register (D). -- cgit v1.2.3 From c288ab87fd6cafe22ce46be551baa2e815b495b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 7 Jul 2015 10:45:38 +0200 Subject: Delay get_tuple_element instructions until they are needed When matching tuples, the pattern matching compiler would generate code that would fetch all elements of the tuple that will ultimately be used, *before* testing that (for example) the first element is the correct record tag. For example: is_tuple Fail {x,0} test_arity Fail {x,0} 3 get_tuple_element {x,0} 0 {x,1} get_tuple_element {x,0} 1 {x,2} get_tuple_element {x,0} 2 {x,3} is_eq_exact Fail {x,1} some_tag If {x,2} and {x,3} are not used at label Fail, we can re-arrange the code like this: is_tuple Fail {x,0} test_arity Fail {x,0} 3 get_tuple_element {x,0} 0 {x,1} is_eq_exact Fail {x,1} some_tag get_tuple_element {x,0} 1 {x,2} get_tuple_element {x,0} 2 {x,3} Doing that may be beneficial in two ways. If the branch is taken, we have eliminated the execution of two unnecessary instructions. Even if the branch is never or rarely taken, there is the possibility for more optimizations following the is_eq_exact instructions. For example, imagine that the code looks like this: get_tuple_element {x,0} 1 {x,2} get_tuple_element {x,0} 2 {x,3} move {x,2} {y,0} move {x,3} {y,1} Assuming that {x,2} and {x,3} have no further uses in the code that follows, that can be rewritten to: get_tuple_element {x,0} 1 {y,0} get_tuple_element {x,0} 2 {y,1} When should we perform this optimization? At the very latest, it must be done before opt_blocks/1 in beam_block which does the elimination of unnecessary moves. Actually, we want do the optimization before the blocks have been established, since moving instructions out of one block into another is cumbersome. Therefore, we will do the optimization in a new pass that is run before beam_block. A new pass will make debugging easier, and beam_block already has a fair number of sub passes. --- lib/compiler/src/Makefile | 1 + lib/compiler/src/beam_reorder.erl | 113 ++++++++++++++++++++++++++++++++++++++ lib/compiler/src/beam_utils.erl | 9 +++ lib/compiler/src/compile.erl | 4 +- lib/compiler/src/compiler.app.src | 1 + lib/compiler/test/misc_SUITE.erl | 8 +++ 6 files changed, 135 insertions(+), 1 deletion(-) create mode 100644 lib/compiler/src/beam_reorder.erl diff --git a/lib/compiler/src/Makefile b/lib/compiler/src/Makefile index 299b2892fc..ae4007c61c 100644 --- a/lib/compiler/src/Makefile +++ b/lib/compiler/src/Makefile @@ -62,6 +62,7 @@ MODULES = \ beam_opcodes \ beam_peep \ beam_receive \ + beam_reorder \ beam_split \ beam_trim \ beam_type \ diff --git a/lib/compiler/src/beam_reorder.erl b/lib/compiler/src/beam_reorder.erl new file mode 100644 index 0000000000..3230e33dbd --- /dev/null +++ b/lib/compiler/src/beam_reorder.erl @@ -0,0 +1,113 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1999-2013. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% + +-module(beam_reorder). + +-export([module/2]). +-import(lists, [member/2,reverse/1]). + +module({Mod,Exp,Attr,Fs0,Lc}, _Opt) -> + Fs = [function(F) || F <- Fs0], + {ok,{Mod,Exp,Attr,Fs,Lc}}. + +function({function,Name,Arity,CLabel,Is0}) -> + try + Is = reorder(Is0), + {function,Name,Arity,CLabel,Is} + catch + Class:Error -> + Stack = erlang:get_stacktrace(), + io:fwrite("Function: ~w/~w\n", [Name,Arity]), + erlang:raise(Class, Error, Stack) + end. + +%% reorder(Instructions0) -> Instructions +%% Reorder instructions before the beam_block pass, because reordering +%% will be more cumbersome when the blocks are in place. +%% +%% Execution of get_tuple_element instructions can be delayed until +%% they are actually needed. Consider the sequence: +%% +%% get_tuple_element Tuple Pos Dst +%% test Test Fail Operands +%% +%% If Dst is killed at label Fail (and not referenced in Operands), +%% we can can swap the instructions: +%% +%% test Test Fail Operands +%% get_tuple_element Tuple Pos Dst +%% +%% That can be beneficial in two ways: Firstly, if the branch is taken +%% we have avoided execution of the get_tuple_element instruction. +%% Secondly, even if the branch is not taken, subsequent optimization +%% (opt_blocks/1) may be able to change Dst to the final destination +%% register and eliminate a 'move' instruction. + +reorder(Is) -> + D = beam_utils:index_labels(Is), + reorder_1(Is, D, []). + +reorder_1([{label,L}=I|_], D, Acc) -> + Is = beam_utils:code_at(L, D), + reorder_1(Is, D, [I|Acc]); +reorder_1([{test,is_nonempty_list,_,_}=I|Is], D, Acc) -> + %% The run-time system may combine the is_nonempty_list test with + %% the following get_list instruction. + reorder_1(Is, D, [I|Acc]); +reorder_1([{test,_,_,_}=I, + {select,_,_,_,_}=S|Is], D, Acc) -> + %% There is nothing to gain by inserting a get_tuple_element + %% instruction between the test instruction and the select + %% instruction. + reorder_1(Is, D, [S,I|Acc]); +reorder_1([{test,_,{f,L},Ss}=I|Is0], D0, + [{get_tuple_element,_,_,El}=G|Acc0]=Acc) -> + case member(El, Ss) of + true -> + reorder_1(Is0, D0, [I|Acc]); + false -> + case beam_utils:is_killed_at(El, L, D0) of + true -> + Is = [I,G|Is0], + reorder_1(Is, D0, Acc0); + false -> + case beam_utils:is_killed(El, Is0, D0) of + true -> + Code0 = beam_utils:code_at(L, D0), + Code = [G|Code0], + D = beam_utils:index_label(L, Code, D0), + Is = [I|Is0], + reorder_1(Is, D, Acc0); + false -> + reorder_1(Is0, D0, [I|Acc]) + end + end + end; +reorder_1([{allocate_zero,N,Live}|Is], D, + [{get_tuple_element,_,_,{x,X}}=G|Acc]) + when X+1 =:= Live -> + %% Move allocation instruction upwards past get_tuple_element + %% instructions to give more opportunities for moving + %% get_tuple_element instructions. + I = {allocate_zero,N,X}, + reorder_1([I,G|Is], D, Acc); +reorder_1([I|Is], D, Acc) -> + reorder_1(Is, D, [I|Acc]); +reorder_1([], _, Acc) -> reverse(Acc). diff --git a/lib/compiler/src/beam_utils.erl b/lib/compiler/src/beam_utils.erl index fbcd5de1bb..68d6105cfa 100644 --- a/lib/compiler/src/beam_utils.erl +++ b/lib/compiler/src/beam_utils.erl @@ -484,6 +484,15 @@ check_liveness(R, [{get_map_elements,{f,Fail},S,{list,L}}|Is], St0) -> Other end end; +check_liveness(R, [{test_heap,N,Live}|Is], St) -> + I = {block,[{set,[],[],{alloc,Live,{nozero,nostack,N,[]}}}]}, + check_liveness(R, [I|Is], St); +check_liveness(R, [{allocate_zero,N,Live}|Is], St) -> + I = {block,[{set,[],[],{alloc,Live,{zero,N,0,[]}}}]}, + check_liveness(R, [I|Is], St); +check_liveness(R, [{get_list,S,D1,D2}|Is], St) -> + I = {block,[{set,[D1,D2],[S],get_list}]}, + check_liveness(R, [I|Is], St); check_liveness(_R, Is, St) when is_list(Is) -> %% case Is of %% [I|_] -> diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl index cf79fdc9f9..605f5b8fd5 100644 --- a/lib/compiler/src/compile.erl +++ b/lib/compiler/src/compile.erl @@ -673,7 +673,9 @@ asm_passes() -> [{pass,beam_a}, {iff,da,{listing,"a"}}, {unless,no_postopt, - [{pass,beam_block}, + [{unless,no_reorder,{pass,beam_reorder}}, + {iff,dre,{listing,"reorder"}}, + {pass,beam_block}, {iff,dblk,{listing,"block"}}, {unless,no_except,{pass,beam_except}}, {iff,dexcept,{listing,"except"}}, diff --git a/lib/compiler/src/compiler.app.src b/lib/compiler/src/compiler.app.src index afb85f4710..62ea9cee80 100644 --- a/lib/compiler/src/compiler.app.src +++ b/lib/compiler/src/compiler.app.src @@ -37,6 +37,7 @@ beam_opcodes, beam_peep, beam_receive, + beam_reorder, beam_split, beam_trim, beam_type, diff --git a/lib/compiler/test/misc_SUITE.erl b/lib/compiler/test/misc_SUITE.erl index 8606935504..3582e055c8 100644 --- a/lib/compiler/test/misc_SUITE.erl +++ b/lib/compiler/test/misc_SUITE.erl @@ -192,6 +192,14 @@ silly_coverage(Config) when is_list(Config) -> {label,2}|non_proper_list]}],99}, expect_error(fun() -> beam_a:module(BeamAInput, []) end), + %% beam_reorder + BlockInput = {?MODULE,[{foo,0}],[], + [{function,foo,0,2, + [{label,1}, + {func_info,{atom,?MODULE},{atom,foo},0}, + {label,2}|non_proper_list]}],99}, + expect_error(fun() -> beam_reorder:module(BlockInput, []) end), + %% beam_block BlockInput = {?MODULE,[{foo,0}],[], [{function,foo,0,2, -- cgit v1.2.3 From 3640e5c89bda0ca45b3320e8a00efb48b9d9f531 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 5 Aug 2015 19:12:15 +0200 Subject: Reorder instructions across try/catch Simplify further optimizations by moving safe instructions to before the 'try' or 'catch' instruction. --- lib/compiler/src/beam_reorder.erl | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/lib/compiler/src/beam_reorder.erl b/lib/compiler/src/beam_reorder.erl index 3230e33dbd..70adca6b04 100644 --- a/lib/compiler/src/beam_reorder.erl +++ b/lib/compiler/src/beam_reorder.erl @@ -64,6 +64,16 @@ reorder(Is) -> D = beam_utils:index_labels(Is), reorder_1(Is, D, []). +reorder_1([{Op,_,_}=TryCatch|[I|Is]=Is0], D, Acc) + when Op =:= 'catch'; Op =:= 'try' -> + %% Don't allow 'try' or 'catch' instructions to split blocks if + %% it can be avoided. + case is_safe(I) of + false -> + reorder_1(Is0, D, [TryCatch|Acc]); + true -> + reorder_1([TryCatch|Is], D, [I|Acc]) + end; reorder_1([{label,L}=I|_], D, Acc) -> Is = beam_utils:code_at(L, D), reorder_1(Is, D, [I|Acc]); @@ -111,3 +121,14 @@ reorder_1([{allocate_zero,N,Live}|Is], D, reorder_1([I|Is], D, Acc) -> reorder_1(Is, D, [I|Acc]); reorder_1([], _, Acc) -> reverse(Acc). + +%% is_safe(Instruction) -> true|false +%% Test whether an instruction is safe (cannot cause an exception). + +is_safe({kill,_}) -> true; +is_safe({move,_,_}) -> true; +is_safe({put,_}) -> true; +is_safe({put_list,_,_,_}) -> true; +is_safe({put_tuple,_,_}) -> true; +is_safe({test_heap,_,_}) -> true; +is_safe(_) -> false. -- cgit v1.2.3 From 30cc5c902d9e653d48452a7f84fcd664cfc3f0a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 5 Aug 2015 19:19:56 +0200 Subject: Put 'try' in blocks to optimize allocation instructions Put 'try' instructions inside block to improve the optimization of allocation instructions. Currently, the compiler only looks at initialization of y registers inside blocks when determining which y registers that will be "naturally" initialized. --- lib/compiler/src/beam_block.erl | 7 +++++-- lib/compiler/src/beam_jump.erl | 2 +- lib/compiler/src/beam_split.erl | 4 ++-- lib/compiler/src/beam_type.erl | 4 ++-- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl index 49e66b379b..0b284299be 100644 --- a/lib/compiler/src/beam_block.erl +++ b/lib/compiler/src/beam_block.erl @@ -149,7 +149,10 @@ collect({put_map,F,Op,S,D,R,{list,Puts}}) -> collect({get_map_elements,F,S,{list,Gets}}) -> {Ss,Ds} = beam_utils:split_even(Gets), {set,Ds,[S|Ss],{get_map_elements,F}}; -collect({'catch',R,L}) -> {set,[R],[],{'catch',L}}; +collect({'catch'=Op,R,L}) -> + {set,[R],[],{try_catch,Op,L}}; +collect({'try'=Op,R,L}) -> + {set,[R],[],{try_catch,Op,L}}; collect(fclearerror) -> {set,[],[],fclearerror}; collect({fcheckerror,{f,0}}) -> {set,[],[],fcheckerror}; collect({fmove,S,D}) -> {set,[D],[S],fmove}; @@ -346,7 +349,7 @@ opt_tuple_element([]) -> []. opt_tuple_element_1([{set,_,_,{alloc,_,_}}|_], _, _, _) -> no; -opt_tuple_element_1([{set,_,_,{'catch',_}}|_], _, _, _) -> +opt_tuple_element_1([{set,_,_,{try_catch,_,_}}|_], _, _, _) -> no; opt_tuple_element_1([{set,[D],[S],move}|Is0], I0, {_,S}, Acc) -> case eliminate_use_of_from_reg(Is0, S, D, []) of diff --git a/lib/compiler/src/beam_jump.erl b/lib/compiler/src/beam_jump.erl index 5e58e0f6ac..3b6eb19fe8 100644 --- a/lib/compiler/src/beam_jump.erl +++ b/lib/compiler/src/beam_jump.erl @@ -495,7 +495,7 @@ is_label_used_in_block({set,_,_,Info}, Lbl) -> {alloc,_,{gc_bif,_,{f,F}}} -> F =:= Lbl; {alloc,_,{put_map,_,{f,F}}} -> F =:= Lbl; {get_map_elements,{f,F}} -> F =:= Lbl; - {'catch',{f,F}} -> F =:= Lbl; + {try_catch,_,{f,F}} -> F =:= Lbl; {alloc,_,_} -> false; {put_tuple,_} -> false; {get_tuple_element,_} -> false; diff --git a/lib/compiler/src/beam_split.erl b/lib/compiler/src/beam_split.erl index 3be9311080..bb1c0e23a9 100644 --- a/lib/compiler/src/beam_split.erl +++ b/lib/compiler/src/beam_split.erl @@ -57,8 +57,8 @@ split_block([{set,[D],[S|Puts],{alloc,R,{put_map,Op,{f,Lbl}=Fail}}}|Is], split_block([{set,Ds,[S|Ss],{get_map_elements,Fail}}|Is], Bl, Acc) -> Gets = beam_utils:join_even(Ss,Ds), split_block(Is, [], [{get_map_elements,Fail,S,{list,Gets}}|make_block(Bl, Acc)]); -split_block([{set,[R],[],{'catch',L}}|Is], Bl, Acc) -> - split_block(Is, [], [{'catch',R,L}|make_block(Bl, Acc)]); +split_block([{set,[R],[],{try_catch,Op,L}}|Is], Bl, Acc) -> + split_block(Is, [], [{Op,R,L}|make_block(Bl, Acc)]); split_block([{set,[],[],{line,_}=Line}|Is], Bl, Acc) -> split_block(Is, [], [Line|make_block(Bl, Acc)]); split_block([I|Is], Bl, Acc) -> diff --git a/lib/compiler/src/beam_type.erl b/lib/compiler/src/beam_type.erl index 5298589f83..40d67d1670 100644 --- a/lib/compiler/src/beam_type.erl +++ b/lib/compiler/src/beam_type.erl @@ -92,7 +92,7 @@ simplify_basic_1([{set,[D],[TupleReg],{get_tuple_element,0}}=I|Is0], Ts0, Acc) - Ts = update(I, Ts0), simplify_basic_1(Is0, Ts, [I|Acc]) end; -simplify_basic_1([{set,_,_,{'catch',_}}=I|Is], _Ts, Acc) -> +simplify_basic_1([{set,_,_,{try_catch,_,_}}=I|Is], _Ts, Acc) -> simplify_basic_1(Is, tdb_new(), [I|Acc]); simplify_basic_1([{test,is_tuple,_,[R]}=I|Is], Ts, Acc) -> case tdb_find(R, Ts) of @@ -199,7 +199,7 @@ simplify_float_1([{set,[D0],[A0,B0],{alloc,_,{gc_bif,Op0,{f,0}}}}=I|Is]=Is0, Ts = tdb_update([{D0,float}], Ts0), simplify_float_1(Is, Ts, Rs, Acc) end; -simplify_float_1([{set,_,_,{'catch',_}}=I|Is]=Is0, _Ts, Rs0, Acc0) -> +simplify_float_1([{set,_,_,{try_catch,_,_}}=I|Is]=Is0, _Ts, Rs0, Acc0) -> Acc = flush_all(Rs0, Is0, Acc0), simplify_float_1(Is, tdb_new(), Rs0, [I|Acc]); simplify_float_1([{set,_,_,{line,_}}=I|Is], Ts, Rs, Acc) -> -- cgit v1.2.3 From f63b503f9c7ea4d5824899dc5b287075e261e6a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Mon, 10 Aug 2015 14:27:50 +0200 Subject: Move rewriting of select_val to is_boolean from beam_peep to beam_dead We can rewrite more instances of select_val to is_boolean because it is not necessary that a particular label follows the select_val. --- lib/compiler/src/beam_dead.erl | 24 +++++++++++++++++++++--- lib/compiler/src/beam_peep.erl | 28 ---------------------------- 2 files changed, 21 insertions(+), 31 deletions(-) diff --git a/lib/compiler/src/beam_dead.erl b/lib/compiler/src/beam_dead.erl index ead88b57e9..0cb5040177 100644 --- a/lib/compiler/src/beam_dead.erl +++ b/lib/compiler/src/beam_dead.erl @@ -242,8 +242,15 @@ backward([{select,select_val,Reg,{f,Fail0},List0}|Is], D, Acc) -> List = shortcut_select_list(List0, Reg, D, []), Fail1 = shortcut_label(Fail0, D), Fail = shortcut_bs_test(Fail1, Is, D), - Sel = {select,select_val,Reg,{f,Fail},List}, - backward(Is, D, [Sel|Acc]); + case List of + [{atom,B1},F,{atom,B2},F] when B1 =:= not B2 -> + Test = {test,is_boolean,{f,Fail},[Reg]}, + Jump = {jump,F}, + backward([Jump,Test|Is], D, Acc); + [_|_] -> + Sel = {select,select_val,Reg,{f,Fail},List}, + backward(Is, D, [Sel|Acc]) + end; backward([{jump,{f,To0}},{move,Src,Reg}=Move|Is], D, Acc) -> To = shortcut_select_label(To0, Reg, Src, D), Jump = {jump,{f,To}}, @@ -295,7 +302,18 @@ backward([{test,Op,{f,To0},Ops0}|Is], D, Acc) -> is_eq_exact -> combine_eqs(To, Ops0, D, Acc); _ -> {test,Op,{f,To},Ops0} end, - backward(Is, D, [I|Acc]); + case {I,Acc} of + {{test,is_atom,Fail,Ops0},[{test,is_boolean,Fail,Ops0}|_]} -> + %% An is_atom test before an is_boolean test (with the + %% same failure label) is redundant. + backward(Is, D, Acc); + {{test,_,_,_},_} -> + %% Still a test instruction. Done. + backward(Is, D, [I|Acc]); + {_,_} -> + %% Rewritten to a select_val. Rescan. + backward([I|Is], D, Acc) + end; backward([{test,Op,{f,To0},Live,Ops0,Dst}|Is], D, Acc) -> To1 = shortcut_bs_test(To0, Is, D), To2 = shortcut_label(To1, D), diff --git a/lib/compiler/src/beam_peep.erl b/lib/compiler/src/beam_peep.erl index 17fd2e502a..75be86b83f 100644 --- a/lib/compiler/src/beam_peep.erl +++ b/lib/compiler/src/beam_peep.erl @@ -65,18 +65,6 @@ function({function,Name,Arity,CLabel,Is0}) -> %% InEncoding =:= latin1, OutEncoding =:= unicode; %% InEncoding =:= latin1, OutEncoding =:= utf8 -> %% -%% (2) A select_val/4 instruction that only verifies that -%% its argument is either 'true' or 'false' can be -%% be replaced with an is_boolean/2 instruction. That is: -%% -%% select_val Reg Fail [ true Next false Next ] -%% Next: ... -%% -%% can be rewritten to -%% -%% is_boolean Fail Reg -%% Next: ... -%% peep(Is) -> peep(Is, gb_sets:empty(), []). @@ -95,12 +83,6 @@ peep([{gc_bif,_,_,_,_,Dst}=I|Is], SeenTests0, Acc) -> %% Kill all remembered tests that depend on the destination register. SeenTests = kill_seen(Dst, SeenTests0), peep(Is, SeenTests, [I|Acc]); -peep([{test,is_boolean,{f,Fail},Ops}|_]=Is, SeenTests, - [{test,is_atom,{f,Fail},Ops}|Acc]) -> - %% The previous is_atom/2 test (with the same failure label) is redundant. - %% (If is_boolean(Src) is true, is_atom(Src) is also true, so it is - %% OK to still remember that we have seen is_atom/1.) - peep(Is, SeenTests, Acc); peep([{test,Op,_,Ops}=I|Is], SeenTests0, Acc) -> case beam_utils:is_pure_test(I) of false -> @@ -121,16 +103,6 @@ peep([{test,Op,_,Ops}=I|Is], SeenTests0, Acc) -> peep(Is, SeenTests, [I|Acc]) end end; -peep([{select,select_val,Src,Fail, - [{atom,false},{f,L},{atom,true},{f,L}]}| - [{label,L}|_]=Is], SeenTests, Acc) -> - I = {test,is_boolean,Fail,[Src]}, - peep([I|Is], SeenTests, Acc); -peep([{select,select_val,Src,Fail, - [{atom,true},{f,L},{atom,false},{f,L}]}| - [{label,L}|_]=Is], SeenTests, Acc) -> - I = {test,is_boolean,Fail,[Src]}, - peep([I|Is], SeenTests, Acc); peep([I|Is], _, Acc) -> %% An unknown instruction. Throw away all information we %% have collected about test instructions. -- cgit v1.2.3 From e92ad3c4d6358cecbba7643ccc14610957ad79ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Mon, 10 Aug 2015 06:25:00 +0200 Subject: v3_core: Improve code generation for guards When translating guards to Core Erlang, it is sometimes necessary to add an is_boolean/1 guard test. Here is an example when it is necessary: o(A, B) when A or B -> ok. That would be translated to something like: o(A, B) when ((A =:= true) or (B =:= true)) and is_boolean(A) and is_boolean(B) -> ok. The is_boolean/1 tests are necessary to ensure that the guard fails for calls such as: o(true, not_boolean) However, because of a bug in v3_core, is_boolean/1 tests were added when they were not necessary. Here is an example: f(B) when not B -> ok. That would be translated to: f(B) when (B =:= false) and is_boolean(B) -> ok. The following translation will work just as well. f(B) when B =:= false -> ok. Correct the bug to suppress those unnecessary is_boolean/1 tests. --- lib/compiler/src/v3_core.erl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/compiler/src/v3_core.erl b/lib/compiler/src/v3_core.erl index 0941ad5dd5..7c229210a0 100644 --- a/lib/compiler/src/v3_core.erl +++ b/lib/compiler/src/v3_core.erl @@ -469,7 +469,8 @@ unforce_tree([#iset{var=#c_var{name=V},arg=Arg0}|Es], D0) -> unforce_tree(Es, D); unforce_tree([#icall{}=Call], D) -> unforce_tree_subst(Call, D); -unforce_tree([Top], _) -> Top. +unforce_tree([#c_var{name=V}], D) -> + gb_trees:get(V, D). unforce_tree_subst(#icall{module=#c_literal{val=erlang}, name=#c_literal{val='=:='}, -- cgit v1.2.3 From 93c5e457faeccfd8ccbb1e6c587ad6df1f200408 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Fri, 21 Aug 2015 07:07:49 +0200 Subject: beam_validator: Don't allow x(1023) to be used In 45f469ca0890, the BEAM loader started to use x(1023) as scratch register for some instructions. Therefore we should not allow x(1023) to be used in code emitted by the compiler. --- lib/compiler/src/beam_block.erl | 1 - lib/compiler/src/beam_bool.erl | 2 -- lib/compiler/src/beam_clean.erl | 2 +- lib/compiler/src/beam_validator.erl | 31 +++++++++++++--------- lib/compiler/test/beam_validator_SUITE.erl | 6 ++--- .../test/beam_validator_SUITE_data/xrange.S | 4 +-- 6 files changed, 24 insertions(+), 22 deletions(-) diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl index 0b284299be..4e179daa0e 100644 --- a/lib/compiler/src/beam_block.erl +++ b/lib/compiler/src/beam_block.erl @@ -24,7 +24,6 @@ -export([module/2]). -import(lists, [mapfoldl/3,reverse/1,reverse/2,foldl/3,member/2]). --define(MAXREG, 1024). module({Mod,Exp,Attr,Fs0,Lc0}, _Opt) -> {Fs,Lc} = mapfoldl(fun function/2, Lc0, Fs0), diff --git a/lib/compiler/src/beam_bool.erl b/lib/compiler/src/beam_bool.erl index 14b6381230..c9e103eae9 100644 --- a/lib/compiler/src/beam_bool.erl +++ b/lib/compiler/src/beam_bool.erl @@ -25,8 +25,6 @@ -import(lists, [reverse/1,reverse/2,foldl/3,mapfoldl/3,map/2]). --define(MAXREG, 1024). - -record(st, {next, %Next label number. ll %Live regs at labels. diff --git a/lib/compiler/src/beam_clean.erl b/lib/compiler/src/beam_clean.erl index 919ee3ee7d..7d66985791 100644 --- a/lib/compiler/src/beam_clean.erl +++ b/lib/compiler/src/beam_clean.erl @@ -141,7 +141,7 @@ renumber_labels([{bif,is_record,{f,_}, renumber_labels(Is, Acc, St); renumber_labels([{test,is_record,{f,_}=Fail, [Term,{atom,Tag}=TagAtom,{integer,Arity}]}|Is0], Acc, St) -> - Tmp = {x,1023}, + Tmp = {x,1022}, Is = case is_record_tuple(Term, Tag, Arity) of yes -> Is0; diff --git a/lib/compiler/src/beam_validator.erl b/lib/compiler/src/beam_validator.erl index 942d69a756..ab5fedf3bf 100644 --- a/lib/compiler/src/beam_validator.erl +++ b/lib/compiler/src/beam_validator.erl @@ -31,8 +31,6 @@ -import(lists, [reverse/1,foldl/3,foreach/2,dropwhile/2]). --define(MAXREG, 1024). - %%-define(DEBUG, 1). -ifdef(DEBUG). -define(DBG_FORMAT(F, D), (io:format((F), (D)))). @@ -970,9 +968,9 @@ get_fls(#vst{current=#st{fls=Fls}}) when is_atom(Fls) -> Fls. init_fregs() -> 0. -set_freg({fr,Fr}, #vst{current=#st{f=Fregs0}=St}=Vst) +set_freg({fr,Fr}=Freg, #vst{current=#st{f=Fregs0}=St}=Vst) when is_integer(Fr), 0 =< Fr -> - limit_check(Fr), + check_limit(Freg), Bit = 1 bsl Fr, if Fregs0 band Bit =:= 0 -> @@ -985,9 +983,10 @@ set_freg(Fr, _) -> error({bad_target,Fr}). assert_freg_set({fr,Fr}=Freg, #vst{current=#st{f=Fregs}}) when is_integer(Fr), 0 =< Fr -> if - Fregs band (1 bsl Fr) =/= 0 -> - limit_check(Fr); - true -> error({uninitialized_reg,Freg}) + (Fregs bsr Fr) band 1 =:= 0 -> + error({uninitialized_reg,Freg}); + true -> + ok end; assert_freg_set(Fr, _) -> error({bad_source,Fr}). @@ -1066,16 +1065,16 @@ set_type(Type, {x,_}=Reg, Vst) -> set_type_reg(Type, Reg, Vst); set_type(Type, {y,_}=Reg, Vst) -> set_type_y(Type, Reg, Vst); set_type(_, _, #vst{}=Vst) -> Vst. -set_type_reg(Type, {x,X}, #vst{current=#st{x=Xs}=St}=Vst) +set_type_reg(Type, {x,X}=Reg, #vst{current=#st{x=Xs}=St}=Vst) when is_integer(X), 0 =< X -> - limit_check(X), + check_limit(Reg), Vst#vst{current=St#st{x=gb_trees:enter(X, Type, Xs)}}; set_type_reg(Type, Reg, Vst) -> set_type_y(Type, Reg, Vst). set_type_y(Type, {y,Y}=Reg, #vst{current=#st{y=Ys0}=St}=Vst) when is_integer(Y), 0 =< Y -> - limit_check(Y), + check_limit(Reg), Ys = case gb_trees:lookup(Y, Ys0) of none -> error({invalid_store,Reg,Type}); @@ -1591,9 +1590,15 @@ return_type_math(pow, 2) -> {float,[]}; return_type_math(pi, 0) -> {float,[]}; return_type_math(F, A) when is_atom(F), is_integer(A), A >= 0 -> term. -limit_check(Num) when is_integer(Num), Num >= ?MAXREG -> - error(limit); -limit_check(_) -> ok. +check_limit({x,X}) when is_integer(X), X < 1023 -> + %% Note: x(1023) is reserved for use by the BEAM loader. + ok; +check_limit({y,Y}) when is_integer(Y), Y < 1024 -> + ok; +check_limit({fr,Fr}) when is_integer(Fr), Fr < 1024 -> + ok; +check_limit(_) -> + error(limit). min(A, B) when is_integer(A), is_integer(B), A < B -> A; min(A, B) when is_integer(A), is_integer(B) -> B. diff --git a/lib/compiler/test/beam_validator_SUITE.erl b/lib/compiler/test/beam_validator_SUITE.erl index 69391b15eb..d6deb4a730 100644 --- a/lib/compiler/test/beam_validator_SUITE.erl +++ b/lib/compiler/test/beam_validator_SUITE.erl @@ -107,13 +107,13 @@ xrange(Config) when is_list(Config) -> {{bif,'+',{f,0},[{x,-1},{x,1}],{x,0}},4, {uninitialized_reg,{x,-1}}}}, {{t,sum_2,2}, - {{bif,'+',{f,0},[{x,0},{x,1024}],{x,0}},4, - {uninitialized_reg,{x,1024}}}}, + {{bif,'+',{f,0},[{x,0},{x,1023}],{x,0}},4, + {uninitialized_reg,{x,1023}}}}, {{t,sum_3,2}, {{bif,'+',{f,0},[{x,0},{x,1}],{x,-1}},4, {invalid_store,{x,-1},number}}}, {{t,sum_4,2}, - {{bif,'+',{f,0},[{x,0},{x,1}],{x,1024}},4,limit}}] = Errors, + {{bif,'+',{f,0},[{x,0},{x,1}],{x,1023}},4,limit}}] = Errors, ok. yrange(Config) when is_list(Config) -> diff --git a/lib/compiler/test/beam_validator_SUITE_data/xrange.S b/lib/compiler/test/beam_validator_SUITE_data/xrange.S index c6f20288f7..a76408dde3 100644 --- a/lib/compiler/test/beam_validator_SUITE_data/xrange.S +++ b/lib/compiler/test/beam_validator_SUITE_data/xrange.S @@ -20,7 +20,7 @@ {label,3}. {func_info,{atom,t},{atom,sum_2},2}. {label,4}. - {bif,'+',{f,0},[{x,0},{x,1024}],{x,0}}. + {bif,'+',{f,0},[{x,0},{x,1023}],{x,0}}. {'%live',1}. return. @@ -38,7 +38,7 @@ {label,7}. {func_info,{atom,t},{atom,sum_4},2}. {label,8}. - {bif,'+',{f,0},[{x,0},{x,1}],{x,1024}}. + {bif,'+',{f,0},[{x,0},{x,1}],{x,1023}}. {'%live',1}. return. -- cgit v1.2.3