From f3cc9d1b16d86124a70f5dd0609113ec6e3b4dcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 5 Aug 2015 19:35:45 +0200 Subject: beam_block: Eliminate redundant wasteful call to opt/1 opt_alloc/1 makes a redundant call to opt/1. It is redundant because the opt/1 function has already been applied to the instruction sequence prior to calling opt_alloc/1. --- lib/compiler/src/beam_block.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/compiler/src/beam_block.erl') diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl index 2def3de7f3..4861a2d7ec 100644 --- a/lib/compiler/src/beam_block.erl +++ b/lib/compiler/src/beam_block.erl @@ -346,7 +346,7 @@ is_transparent(_, _) -> false. %% Optimises all allocate instructions. opt_alloc([{set,[],[],{alloc,R,{_,Ns,Nh,[]}}}|Is]) -> - [{set,[],[],opt_alloc(Is, Ns, Nh, R)}|opt(Is)]; + [{set,[],[],opt_alloc(Is, Ns, Nh, R)}|Is]; opt_alloc([I|Is]) -> [I|opt_alloc(Is)]; opt_alloc([]) -> []. -- cgit v1.2.3 From 7a47b20c3accf323bdbf7bcf9c86fbf8b2c18e20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 22 Jul 2015 09:45:04 +0200 Subject: beam_block: Clean up optimization of move optimizations The 'move' optimization was relatively clean until GC BIFs were introduced. Instead of re-thinking the implementation, the existing code was fixed and patched. The current code unsuccessfully attempts to eliminate 'move' instructions across GC BIF and allocation instructions. We can simplify the code if we give up as soon as we encounter any instruction that allocates. --- lib/compiler/src/beam_block.erl | 83 ++++++++++++++++++----------------------- 1 file changed, 37 insertions(+), 46 deletions(-) (limited to 'lib/compiler/src/beam_block.erl') diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl index 4861a2d7ec..58e8f77a2c 100644 --- a/lib/compiler/src/beam_block.erl +++ b/lib/compiler/src/beam_block.erl @@ -287,60 +287,51 @@ opt_moves(Ds, Is) -> %% If there is a {move,Dest,FinalDest} instruction %% in the instruction stream, remove the move instruction %% and let FinalDest be the destination. -%% -%% For this optimization to be safe, we must be sure that -%% Dest will not be referenced in any other by other instructions -%% in the rest of the instruction stream. Not even the indirect -%% reference by an instruction that may allocate (such as -%% test_heap/2 or a GC Bif) is allowed. opt_move(Dest, Is) -> - opt_move_1(Dest, Is, ?MAXREG, []). - -opt_move_1(R, [{set,_,_,{alloc,Live,_}}|_]=Is, SafeRegs, Acc) when Live < SafeRegs -> - %% Downgrade number of safe regs and rescan the instruction, as it most probably - %% is a gc_bif instruction. - opt_move_1(R, Is, Live, Acc); -opt_move_1(R, [{set,[{x,X}=D],[R],move}|Is], SafeRegs, Acc) -> - case X < SafeRegs andalso beam_utils:is_killed_block(R, Is) of - true -> opt_move_2(D, Acc, Is); - false -> not_possible - end; -opt_move_1(R, [{set,[D],[R],move}|Is], _SafeRegs, Acc) -> + opt_move_1(Dest, Is, []). + +opt_move_1(R, [{set,[D],[R],move}|Is], Acc) -> + %% Check whether this instruction can be safely eliminated. + %% First check that the source (R) is not used by the + %% instructions that follow. case beam_utils:is_killed_block(R, Is) of - true -> opt_move_2(D, Acc, Is); + true -> opt_move_rev(D, Acc, Is); false -> not_possible end; -opt_move_1(R, [I|Is], SafeRegs, Acc) -> - case is_transparent(R, I) of - false -> not_possible; - true -> opt_move_1(R, Is, SafeRegs, [I|Acc]) - end. +opt_move_1({x,_}, [{set,_,_,{alloc,_,_}}|_], _) -> + %% The optimization is not possible. If the X register is not + %% killed by allocation, the optimization would not be safe. + %% If the X register is killed, it means that there cannot + %% follow a 'move' instruction with this X register as the + %% source. + not_possible; +opt_move_1(R, [{set,_,_,_}=I|Is], Acc) -> + %% If the source register is either killed or used by this + %% instruction, the optimimization is not possible. + case is_killed_or_used(R, I) of + true -> not_possible; + false -> opt_move_1(R, Is, [I|Acc]) + end; +opt_move_1(_, _, _) -> + not_possible. -%% Reverse the instructions, while checking that there are no instructions that -%% would interfere with using the new destination register chosen. +%% Reverse the instructions, while checking that there are no +%% instructions that would interfere with using the new destination +%% register (D). -opt_move_2(D, [I|Is], Acc) -> - case is_transparent(D, I) of - false -> not_possible; - true -> opt_move_2(D, Is, [I|Acc]) - end; -opt_move_2(D, [], Acc) -> {D,Acc}. - -%% is_transparent(Register, Instruction) -> true | false -%% Returns true if Instruction does not in any way references Register -%% (even indirectly by an allocation instruction). -%% Returns false if Instruction does reference Register, or we are -%% not sure. - -is_transparent({x,X}, {set,_,_,{alloc,Live,_}}) when X < Live -> - false; -is_transparent(R, {set,Ds,Ss,_Op}) -> - case member(R, Ds) of - true -> false; - false -> not member(R, Ss) +opt_move_rev(D, [I|Is], Acc) -> + case is_killed_or_used(D, I) of + true -> not_possible; + false -> opt_move_rev(D, Is, [I|Acc]) end; -is_transparent(_, _) -> false. +opt_move_rev(D, [], Acc) -> {D,Acc}. + +%% is_killed_or_used(Register, {set,_,_,_}) -> bool() +%% Test whether the register is used by the instruction. + +is_killed_or_used(R, {set,Ss,Ds,_}) -> + member(R, Ds) orelse member(R, Ss). %% opt_alloc(Instructions) -> Instructions' %% Optimises all allocate instructions. -- cgit v1.2.3 From 02d6135813dc133e018c161d803a642582aac36f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 22 Jul 2015 10:20:59 +0200 Subject: beam_block: Improve the move optimizations Here is an example of a move instruction that could not be optimized away because the {x,2} register was not killed: get_tuple_element Reg Pos {x,2} . . . move {x,2} {y,0} put_list {x,2} nil Any We can do the optimization if we replace all occurrences of the {x,2} register as a source with {y,0}: get_tuple_element Reg Pos {y,0} . . . put_list {y,0} nil Dst --- lib/compiler/src/beam_block.erl | 53 +++++++++++++++++++++++++++++++++++------ 1 file changed, 46 insertions(+), 7 deletions(-) (limited to 'lib/compiler/src/beam_block.erl') diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl index 58e8f77a2c..af1bf8fd08 100644 --- a/lib/compiler/src/beam_block.erl +++ b/lib/compiler/src/beam_block.erl @@ -291,13 +291,12 @@ opt_moves(Ds, Is) -> opt_move(Dest, Is) -> opt_move_1(Dest, Is, []). -opt_move_1(R, [{set,[D],[R],move}|Is], Acc) -> - %% Check whether this instruction can be safely eliminated. - %% First check that the source (R) is not used by the - %% instructions that follow. - case beam_utils:is_killed_block(R, Is) of - true -> opt_move_rev(D, Acc, Is); - false -> not_possible +opt_move_1(R, [{set,[D],[R],move}|Is0], Acc) -> + %% Provided that the source register is killed by instructions + %% that follow, the optimization is safe. + case eliminate_use_of_from_reg(Is0, R, D, []) of + {yes,Is} -> opt_move_rev(D, Acc, Is); + no -> not_possible end; opt_move_1({x,_}, [{set,_,_,{alloc,_,_}}|_], _) -> %% The optimization is not possible. If the X register is not @@ -333,6 +332,46 @@ opt_move_rev(D, [], Acc) -> {D,Acc}. is_killed_or_used(R, {set,Ss,Ds,_}) -> member(R, Ds) orelse member(R, Ss). +%% eliminate_use_of_from_reg([Instruction], FromRegister, ToRegister, Acc) -> +%% {yes,Is} | no +%% Eliminate any use of FromRegister in the instruction sequence +%% by replacing uses of FromRegister with ToRegister. If FromRegister +%% is referenced by an allocation instruction, return 'no' to indicate +%% that FromRegister is still used and that the optimization is not +%% possible. + +eliminate_use_of_from_reg([{set,_,_,{alloc,Live,_}}|_]=Is0, {x,X}, _, Acc) -> + if + X < Live -> + no; + true -> + {yes,reverse(Acc, Is0)} + end; +eliminate_use_of_from_reg([{set,Ds,Ss0,Op}=I0|Is], From, To, Acc) -> + I = case member(From, Ss0) of + true -> + Ss = [case S of + From -> To; + _ -> S + end || S <- Ss0], + {set,Ds,Ss,Op}; + false -> + I0 + end, + case member(From, Ds) of + true -> + {yes,reverse(Acc, [I|Is])}; + false -> + eliminate_use_of_from_reg(Is, From, To, [I|Acc]) + end; +eliminate_use_of_from_reg([I]=Is, From, _To, Acc) -> + case beam_utils:is_killed_block(From, [I]) of + true -> + {yes,reverse(Acc, Is)}; + false -> + no + end. + %% opt_alloc(Instructions) -> Instructions' %% Optimises all allocate instructions. -- cgit v1.2.3 From 5f431276f1044c673c2e434e003e2f1ffddab341 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Thu, 23 Jul 2015 17:43:15 +0200 Subject: Optimize get_tuple_element instructions by moving them forward --- lib/compiler/src/beam_block.erl | 53 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) (limited to 'lib/compiler/src/beam_block.erl') diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl index af1bf8fd08..49e66b379b 100644 --- a/lib/compiler/src/beam_block.erl +++ b/lib/compiler/src/beam_block.erl @@ -183,7 +183,9 @@ opt_blocks([I|Is]) -> opt_blocks([]) -> []. opt_block(Is0) -> - Is = find_fixpoint(fun opt/1, Is0), + Is = find_fixpoint(fun(Is) -> + opt_tuple_element(opt(Is)) + end, Is0), opt_alloc(Is). find_fixpoint(OptFun, Is0) -> @@ -315,6 +317,55 @@ opt_move_1(R, [{set,_,_,_}=I|Is], Acc) -> opt_move_1(_, _, _) -> not_possible. +%% opt_tuple_element([Instruction]) -> [Instruction] +%% If possible, move get_tuple_element instructions forward +%% in the instruction stream to a move instruction, eliminating +%% the move instruction. Example: +%% +%% get_tuple_element Tuple Pos Dst1 +%% ... +%% move Dst1 Dst2 +%% +%% This code may be possible to rewrite to: +%% +%% %%(Moved get_tuple_element instruction) +%% ... +%% get_tuple_element Tuple Pos Dst2 +%% + +opt_tuple_element([{set,[D],[S],{get_tuple_element,_}}=I|Is0]) -> + case opt_tuple_element_1(Is0, I, {S,D}, []) of + no -> + [I|opt_tuple_element(Is0)]; + {yes,Is} -> + opt_tuple_element(Is) + end; +opt_tuple_element([I|Is]) -> + [I|opt_tuple_element(Is)]; +opt_tuple_element([]) -> []. + +opt_tuple_element_1([{set,_,_,{alloc,_,_}}|_], _, _, _) -> + no; +opt_tuple_element_1([{set,_,_,{'catch',_}}|_], _, _, _) -> + no; +opt_tuple_element_1([{set,[D],[S],move}|Is0], I0, {_,S}, Acc) -> + case eliminate_use_of_from_reg(Is0, S, D, []) of + no -> + no; + {yes,Is} -> + {set,[S],Ss,Op} = I0, + I = {set,[D],Ss,Op}, + {yes,reverse(Acc, [I|Is])} + end; +opt_tuple_element_1([{set,Ds,Ss,_}=I|Is], MovedI, {S,D}=Regs, Acc) -> + case member(S, Ds) orelse member(D, Ss) of + true -> + no; + false -> + opt_tuple_element_1(Is, MovedI, Regs, [I|Acc]) + end; +opt_tuple_element_1(_, _, _, _) -> no. + %% Reverse the instructions, while checking that there are no %% instructions that would interfere with using the new destination %% register (D). -- cgit v1.2.3 From 30cc5c902d9e653d48452a7f84fcd664cfc3f0a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 5 Aug 2015 19:19:56 +0200 Subject: Put 'try' in blocks to optimize allocation instructions Put 'try' instructions inside block to improve the optimization of allocation instructions. Currently, the compiler only looks at initialization of y registers inside blocks when determining which y registers that will be "naturally" initialized. --- lib/compiler/src/beam_block.erl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'lib/compiler/src/beam_block.erl') diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl index 49e66b379b..0b284299be 100644 --- a/lib/compiler/src/beam_block.erl +++ b/lib/compiler/src/beam_block.erl @@ -149,7 +149,10 @@ collect({put_map,F,Op,S,D,R,{list,Puts}}) -> collect({get_map_elements,F,S,{list,Gets}}) -> {Ss,Ds} = beam_utils:split_even(Gets), {set,Ds,[S|Ss],{get_map_elements,F}}; -collect({'catch',R,L}) -> {set,[R],[],{'catch',L}}; +collect({'catch'=Op,R,L}) -> + {set,[R],[],{try_catch,Op,L}}; +collect({'try'=Op,R,L}) -> + {set,[R],[],{try_catch,Op,L}}; collect(fclearerror) -> {set,[],[],fclearerror}; collect({fcheckerror,{f,0}}) -> {set,[],[],fcheckerror}; collect({fmove,S,D}) -> {set,[D],[S],fmove}; @@ -346,7 +349,7 @@ opt_tuple_element([]) -> []. opt_tuple_element_1([{set,_,_,{alloc,_,_}}|_], _, _, _) -> no; -opt_tuple_element_1([{set,_,_,{'catch',_}}|_], _, _, _) -> +opt_tuple_element_1([{set,_,_,{try_catch,_,_}}|_], _, _, _) -> no; opt_tuple_element_1([{set,[D],[S],move}|Is0], I0, {_,S}, Acc) -> case eliminate_use_of_from_reg(Is0, S, D, []) of -- cgit v1.2.3 From 93c5e457faeccfd8ccbb1e6c587ad6df1f200408 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Fri, 21 Aug 2015 07:07:49 +0200 Subject: beam_validator: Don't allow x(1023) to be used In 45f469ca0890, the BEAM loader started to use x(1023) as scratch register for some instructions. Therefore we should not allow x(1023) to be used in code emitted by the compiler. --- lib/compiler/src/beam_block.erl | 1 - 1 file changed, 1 deletion(-) (limited to 'lib/compiler/src/beam_block.erl') diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl index 0b284299be..4e179daa0e 100644 --- a/lib/compiler/src/beam_block.erl +++ b/lib/compiler/src/beam_block.erl @@ -24,7 +24,6 @@ -export([module/2]). -import(lists, [mapfoldl/3,reverse/1,reverse/2,foldl/3,member/2]). --define(MAXREG, 1024). module({Mod,Exp,Attr,Fs0,Lc0}, _Opt) -> {Fs,Lc} = mapfoldl(fun function/2, Lc0, Fs0), -- cgit v1.2.3 From 9a1f7c4156093effddd555e88fd16a2ffd6ae75a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 15 Sep 2015 17:23:19 +0200 Subject: Regain full coverage of beam_block d0784035ab fixed a problem with register corruption. Because of that, opt_moves/2 will never be asked to optimize instructions with more than two destination registers. Therefore, to regain full coverage of beam_block, remove the final clause in opt_moves/2. --- lib/compiler/src/beam_block.erl | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'lib/compiler/src/beam_block.erl') diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl index 75202d9379..ebf9b5fec5 100644 --- a/lib/compiler/src/beam_block.erl +++ b/lib/compiler/src/beam_block.erl @@ -283,11 +283,7 @@ opt_moves([X0,Y0], Is0) -> not_possible -> {[X,Y0],Is2}; {X,_} -> {[X,Y0],Is2}; {Y,Is} -> {[X,Y],Is} - end; -opt_moves(Ds, Is) -> - %% multiple destinations -> pass through - {Ds,Is}. - + end. %% opt_move(Dest, [Instruction]) -> {UpdatedDest,[Instruction]} | not_possible %% If there is a {move,Dest,FinalDest} instruction -- cgit v1.2.3 From 8372f56a06ef24cf4455a54e15d876ab6ca8c570 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Mon, 21 Sep 2015 14:27:30 +0200 Subject: Move out bit syntax optimizations from beam_block In the future we might want to add more bit syntax optimizations, but beam_block is already sufficiently complicated. Therefore, move the bit syntax optimizations out of beam_block into a separate compiler pass called beam_bs. --- lib/compiler/src/beam_block.erl | 261 ++-------------------------------------- 1 file changed, 10 insertions(+), 251 deletions(-) (limited to 'lib/compiler/src/beam_block.erl') diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl index ebf9b5fec5..741fdbb973 100644 --- a/lib/compiler/src/beam_block.erl +++ b/lib/compiler/src/beam_block.erl @@ -23,13 +23,13 @@ -module(beam_block). -export([module/2]). --import(lists, [mapfoldl/3,reverse/1,reverse/2,foldl/3,member/2]). +-import(lists, [reverse/1,reverse/2,foldl/3,member/2]). -module({Mod,Exp,Attr,Fs0,Lc0}, _Opt) -> - {Fs,Lc} = mapfoldl(fun function/2, Lc0, Fs0), +module({Mod,Exp,Attr,Fs0,Lc}, _Opt) -> + Fs = [function(F) || F <- Fs0], {ok,{Mod,Exp,Attr,Fs,Lc}}. -function({function,Name,Arity,CLabel,Is0}, Lc0) -> +function({function,Name,Arity,CLabel,Is0}) -> try %% Collect basic blocks and optimize them. Is1 = blockify(Is0), @@ -39,11 +39,8 @@ function({function,Name,Arity,CLabel,Is0}, Lc0) -> Is5 = opt_blocks(Is4), Is6 = beam_utils:delete_live_annos(Is5), - %% Optimize bit syntax. - {Is,Lc} = bsm_opt(Is6, Lc0), - %% Done. - {{function,Name,Arity,CLabel,Is},Lc} + {function,Name,Arity,CLabel,Is6} catch Class:Error -> Stack = erlang:get_stacktrace(), @@ -89,18 +86,11 @@ blockify([{test,is_atom,{f,Fail},[Reg]}=I| {test,is_eq_exact,BrFalse,[Reg,AtomTrue]}|Acc]) end; blockify([I|Is0]=IsAll, Acc) -> - case is_bs_put(I) of - true -> - {BsPuts0,Is} = collect_bs_puts(IsAll), - BsPuts = opt_bs_puts(BsPuts0), - blockify(Is, reverse(BsPuts, Acc)); - false -> - case collect(I) of - error -> blockify(Is0, [I|Acc]); - Instr when is_tuple(Instr) -> - {Block,Is} = collect_block(IsAll), - blockify(Is, [{block,Block}|Acc]) - end + case collect(I) of + error -> blockify(Is0, [I|Acc]); + Instr when is_tuple(Instr) -> + {Block,Is} = collect_block(IsAll), + blockify(Is, [{block,Block}|Acc]) end; blockify([], Acc) -> reverse(Acc). @@ -493,234 +483,3 @@ x_dead([], Regs) -> Regs. x_live([{x,N}|Rs], Regs) -> x_live(Rs, Regs bor (1 bsl N)); x_live([_|Rs], Regs) -> x_live(Rs, Regs); x_live([], Regs) -> Regs. - -%%% -%%% Evaluation of constant bit fields. -%%% - -is_bs_put({bs_put,_,{bs_put_integer,_,_},_}) -> true; -is_bs_put({bs_put,_,{bs_put_float,_,_},_}) -> true; -is_bs_put(_) -> false. - -collect_bs_puts(Is) -> - collect_bs_puts_1(Is, []). - -collect_bs_puts_1([I|Is]=Is0, Acc) -> - case is_bs_put(I) of - false -> {reverse(Acc),Is0}; - true -> collect_bs_puts_1(Is, [I|Acc]) - end. - -opt_bs_puts(Is) -> - opt_bs_1(Is, []). - -opt_bs_1([{bs_put,Fail, - {bs_put_float,1,Flags0},[{integer,Sz},Src]}=I0|Is], Acc) -> - try eval_put_float(Src, Sz, Flags0) of - <> -> - Flags = force_big(Flags0), - I = {bs_put,Fail,{bs_put_integer,1,Flags}, - [{integer,Sz},{integer,Int}]}, - opt_bs_1([I|Is], Acc) - catch - error:_ -> - opt_bs_1(Is, [I0|Acc]) - end; -opt_bs_1([{bs_put,_,{bs_put_integer,1,_},[{integer,8},{integer,_}]}|_]=IsAll, - Acc0) -> - {Is,Acc} = bs_collect_string(IsAll, Acc0), - opt_bs_1(Is, Acc); -opt_bs_1([{bs_put,Fail,{bs_put_integer,1,F},[{integer,Sz},{integer,N}]}=I|Is0], - Acc) when Sz > 8 -> - case field_endian(F) of - big -> - %% We can do this optimization for any field size without risk - %% for code explosion. - case bs_split_int(N, Sz, Fail, Is0) of - no_split -> opt_bs_1(Is0, [I|Acc]); - Is -> opt_bs_1(Is, Acc) - end; - little when Sz < 128 -> - %% We only try to optimize relatively small fields, to avoid - %% an explosion in code size. - <> = <>, - Flags = force_big(F), - Is = [{bs_put,Fail,{bs_put_integer,1,Flags}, - [{integer,Sz},{integer,Int}]}|Is0], - opt_bs_1(Is, Acc); - _ -> %native or too wide little field - opt_bs_1(Is0, [I|Acc]) - end; -opt_bs_1([{bs_put,Fail,{Op,U,F},[{integer,Sz},Src]}|Is], Acc) when U > 1 -> - opt_bs_1([{bs_put,Fail,{Op,1,F},[{integer,U*Sz},Src]}|Is], Acc); -opt_bs_1([I|Is], Acc) -> - opt_bs_1(Is, [I|Acc]); -opt_bs_1([], Acc) -> reverse(Acc). - -eval_put_float(Src, Sz, Flags) when Sz =< 256 -> %Only evaluate if Sz is reasonable. - Val = value(Src), - case field_endian(Flags) of - little -> <>; - big -> <> - %% native intentionally not handled here - we can't optimize it. - end. - -value({integer,I}) -> I; -value({float,F}) -> F. - -bs_collect_string(Is, [{bs_put,_,{bs_put_string,Len,{string,Str}},[]}|Acc]) -> - bs_coll_str_1(Is, Len, reverse(Str), Acc); -bs_collect_string(Is, Acc) -> - bs_coll_str_1(Is, 0, [], Acc). - -bs_coll_str_1([{bs_put,_,{bs_put_integer,U,_},[{integer,Sz},{integer,V}]}|Is], - Len, StrAcc, IsAcc) when U*Sz =:= 8 -> - Byte = V band 16#FF, - bs_coll_str_1(Is, Len+1, [Byte|StrAcc], IsAcc); -bs_coll_str_1(Is, Len, StrAcc, IsAcc) -> - {Is,[{bs_put,{f,0},{bs_put_string,Len,{string,reverse(StrAcc)}},[]}|IsAcc]}. - -field_endian({field_flags,F}) -> field_endian_1(F). - -field_endian_1([big=E|_]) -> E; -field_endian_1([little=E|_]) -> E; -field_endian_1([native=E|_]) -> E; -field_endian_1([_|Fs]) -> field_endian_1(Fs). - -force_big({field_flags,F}) -> - {field_flags,force_big_1(F)}. - -force_big_1([big|_]=Fs) -> Fs; -force_big_1([little|Fs]) -> [big|Fs]; -force_big_1([F|Fs]) -> [F|force_big_1(Fs)]. - -bs_split_int(0, Sz, _, _) when Sz > 64 -> - %% We don't want to split in this case because the - %% string will consist of only zeroes. - no_split; -bs_split_int(-1, Sz, _, _) when Sz > 64 -> - %% We don't want to split in this case because the - %% string will consist of only 255 bytes. - no_split; -bs_split_int(N, Sz, Fail, Acc) -> - FirstByteSz = case Sz rem 8 of - 0 -> 8; - Rem -> Rem - end, - bs_split_int_1(N, FirstByteSz, Sz, Fail, Acc). - -bs_split_int_1(-1, _, Sz, Fail, Acc) when Sz > 64 -> - I = {bs_put,Fail,{bs_put_integer,1,{field_flags,[big]}}, - [{integer,Sz},{integer,-1}]}, - [I|Acc]; -bs_split_int_1(0, _, Sz, Fail, Acc) when Sz > 64 -> - I = {bs_put,Fail,{bs_put_integer,1,{field_flags,[big]}}, - [{integer,Sz},{integer,0}]}, - [I|Acc]; -bs_split_int_1(N, ByteSz, Sz, Fail, Acc) when Sz > 0 -> - Mask = (1 bsl ByteSz) - 1, - I = {bs_put,Fail,{bs_put_integer,1,{field_flags,[big]}}, - [{integer,ByteSz},{integer,N band Mask}]}, - bs_split_int_1(N bsr ByteSz, 8, Sz-ByteSz, Fail, [I|Acc]); -bs_split_int_1(_, _, _, _, Acc) -> Acc. - - -%%% -%%% Optimization of new bit syntax matching: get rid -%%% of redundant bs_restore2/2 instructions across select_val -%%% instructions, as well as a few other simple peep-hole optimizations. -%%% - -bsm_opt(Is0, Lc0) -> - {Is1,D0,Lc} = bsm_scan(Is0, [], Lc0, []), - Is2 = case D0 of - [] -> - Is1; - _ -> - D = gb_trees:from_orddict(orddict:from_list(D0)), - bsm_reroute(Is1, D, none, []) - end, - Is = beam_clean:bs_clean_saves(Is2), - {bsm_opt_2(Is, []),Lc}. - -bsm_scan([{label,L}=Lbl,{bs_restore2,_,Save}=R|Is], D0, Lc, Acc0) -> - D = [{{L,Save},Lc}|D0], - Acc = [{label,Lc},R,Lbl|Acc0], - bsm_scan(Is, D, Lc+1, Acc); -bsm_scan([I|Is], D, Lc, Acc) -> - bsm_scan(Is, D, Lc, [I|Acc]); -bsm_scan([], D, Lc, Acc) -> - {reverse(Acc),D,Lc}. - -bsm_reroute([{bs_save2,Reg,Save}=I|Is], D, _, Acc) -> - bsm_reroute(Is, D, {Reg,Save}, [I|Acc]); -bsm_reroute([{bs_restore2,Reg,Save}=I|Is], D, _, Acc) -> - bsm_reroute(Is, D, {Reg,Save}, [I|Acc]); -bsm_reroute([{label,_}=I|Is], D, S, Acc) -> - bsm_reroute(Is, D, S, [I|Acc]); -bsm_reroute([{select,select_val,Reg,F0,Lbls0}|Is], D, {_,Save}=S, Acc0) -> - [F|Lbls] = bsm_subst_labels([F0|Lbls0], Save, D), - Acc = [{select,select_val,Reg,F,Lbls}|Acc0], - bsm_reroute(Is, D, S, Acc); -bsm_reroute([{test,TestOp,F0,TestArgs}=I|Is], D, {_,Save}=S, Acc0) -> - F = bsm_subst_label(F0, Save, D), - Acc = [{test,TestOp,F,TestArgs}|Acc0], - case bsm_not_bs_test(I) of - true -> - %% The test instruction will not update the bit offset for the - %% binary being matched. Therefore the save position can be kept. - bsm_reroute(Is, D, S, Acc); - false -> - %% The test instruction might update the bit offset. Kill our - %% remembered Save position. - bsm_reroute(Is, D, none, Acc) - end; -bsm_reroute([{test,TestOp,F0,Live,TestArgs,Dst}|Is], D, {_,Save}, Acc0) -> - F = bsm_subst_label(F0, Save, D), - Acc = [{test,TestOp,F,Live,TestArgs,Dst}|Acc0], - %% The test instruction will update the bit offset. Kill our - %% remembered Save position. - bsm_reroute(Is, D, none, Acc); -bsm_reroute([{block,[{set,[],[],{alloc,_,_}}]}=Bl, - {bs_context_to_binary,_}=I|Is], D, S, Acc) -> - %% To help further bit syntax optimizations. - bsm_reroute([I,Bl|Is], D, S, Acc); -bsm_reroute([I|Is], D, _, Acc) -> - bsm_reroute(Is, D, none, [I|Acc]); -bsm_reroute([], _, _, Acc) -> reverse(Acc). - -bsm_opt_2([{test,bs_test_tail2,F,[Ctx,Bits]}|Is], - [{test,bs_skip_bits2,F,[Ctx,{integer,I},Unit,_Flags]}|Acc]) -> - bsm_opt_2(Is, [{test,bs_test_tail2,F,[Ctx,Bits+I*Unit]}|Acc]); -bsm_opt_2([{test,bs_skip_bits2,F,[Ctx,{integer,I1},Unit1,_]}|Is], - [{test,bs_skip_bits2,F,[Ctx,{integer,I2},Unit2,Flags]}|Acc]) -> - bsm_opt_2(Is, [{test,bs_skip_bits2,F, - [Ctx,{integer,I1*Unit1+I2*Unit2},1,Flags]}|Acc]); -bsm_opt_2([I|Is], Acc) -> - bsm_opt_2(Is, [I|Acc]); -bsm_opt_2([], Acc) -> reverse(Acc). - -%% bsm_not_bs_test({test,Name,_,Operands}) -> true|false. -%% Test whether is the test is a "safe", i.e. does not move the -%% bit offset for a binary. -%% -%% 'true' means that the test is safe, 'false' that we don't know or -%% that the test moves the offset (e.g. bs_get_integer2). - -bsm_not_bs_test({test,bs_test_tail2,_,[_,_]}) -> true; -bsm_not_bs_test(Test) -> beam_utils:is_pure_test(Test). - -bsm_subst_labels(Fs, Save, D) -> - bsm_subst_labels_1(Fs, Save, D, []). - -bsm_subst_labels_1([F|Fs], Save, D, Acc) -> - bsm_subst_labels_1(Fs, Save, D, [bsm_subst_label(F, Save, D)|Acc]); -bsm_subst_labels_1([], _, _, Acc) -> - reverse(Acc). - -bsm_subst_label({f,Lbl0}=F, Save, D) -> - case gb_trees:lookup({Lbl0,Save}, D) of - {value,Lbl} -> {f,Lbl}; - none -> F - end; -bsm_subst_label(Other, _, _) -> Other. -- cgit v1.2.3 From b66193afc3fe85072da4631c52c5ccec136caa05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 23 Sep 2015 12:30:16 +0200 Subject: beam_type: Improve optimizations by keeping track of booleans There is an optimization in beam_block to simplify a select_val on a known boolean value. We can implement this optimization in a cleaner way in beam_type and it will also be applicable in more situations. (When I added the optimization to beam_type without removing the optimization from beam_block, the optimization was applied 66 times.) --- lib/compiler/src/beam_block.erl | 34 ---------------------------------- 1 file changed, 34 deletions(-) (limited to 'lib/compiler/src/beam_block.erl') diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl index 741fdbb973..10dbaf462c 100644 --- a/lib/compiler/src/beam_block.erl +++ b/lib/compiler/src/beam_block.erl @@ -58,33 +58,6 @@ blockify(Is) -> blockify([{loop_rec,{f,Fail},{x,0}},{loop_rec_end,_Lbl},{label,Fail}|Is], Acc) -> %% Useless instruction sequence. blockify(Is, Acc); -blockify([{test,is_atom,{f,Fail},[Reg]}=I| - [{select,select_val,Reg,{f,Fail}, - [{atom,false},{f,_}=BrFalse, - {atom,true}=AtomTrue,{f,_}=BrTrue]}|Is]=Is0], - [{block,Bl}|_]=Acc) -> - case is_last_bool(Bl, Reg) of - false -> - blockify(Is0, [I|Acc]); - true -> - %% The last instruction is a boolean operator/guard BIF that can't fail. - %% We can convert the three-way branch to a two-way branch (eliminating - %% the reference to the failure label). - blockify(Is, [{jump,BrTrue}, - {test,is_eq_exact,BrFalse,[Reg,AtomTrue]}|Acc]) - end; -blockify([{test,is_atom,{f,Fail},[Reg]}=I| - [{select,select_val,Reg,{f,Fail}, - [{atom,true}=AtomTrue,{f,_}=BrTrue, - {atom,false},{f,_}=BrFalse]}|Is]=Is0], - [{block,Bl}|_]=Acc) -> - case is_last_bool(Bl, Reg) of - false -> - blockify(Is0, [I|Acc]); - true -> - blockify(Is, [{jump,BrTrue}, - {test,is_eq_exact,BrFalse,[Reg,AtomTrue]}|Acc]) - end; blockify([I|Is0]=IsAll, Acc) -> case collect(I) of error -> blockify(Is0, [I|Acc]); @@ -94,13 +67,6 @@ blockify([I|Is0]=IsAll, Acc) -> end; blockify([], Acc) -> reverse(Acc). -is_last_bool([{set,[Reg],As,{bif,N,_}}], Reg) -> - Ar = length(As), - erl_internal:new_type_test(N, Ar) orelse erl_internal:comp_op(N, Ar) - orelse erl_internal:bool_op(N, Ar); -is_last_bool([_|Is], Reg) -> is_last_bool(Is, Reg); -is_last_bool([], _) -> false. - collect_block(Is) -> collect_block(Is, []). -- cgit v1.2.3