diff options
author | Erlang/OTP <[email protected]> | 2009-11-20 14:54:40 +0000 |
---|---|---|
committer | Erlang/OTP <[email protected]> | 2009-11-20 14:54:40 +0000 |
commit | 84adefa331c4159d432d22840663c38f155cd4c1 (patch) | |
tree | bff9a9c66adda4df2106dfd0e5c053ab182a12bd /lib/compiler/src/beam_block.erl | |
download | otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.gz otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.bz2 otp-84adefa331c4159d432d22840663c38f155cd4c1.zip |
The R13B03 release.OTP_R13B03
Diffstat (limited to 'lib/compiler/src/beam_block.erl')
-rw-r--r-- | lib/compiler/src/beam_block.erl | 624 |
1 files changed, 624 insertions, 0 deletions
diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl new file mode 100644 index 0000000000..d4a4ddca8a --- /dev/null +++ b/lib/compiler/src/beam_block.erl @@ -0,0 +1,624 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1999-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% Purpose : Partitions assembly instructions into basic blocks and +%% optimizes them. + +-module(beam_block). + +-export([module/2]). +-import(lists, [mapfoldl/3,reverse/1,reverse/2,foldl/3,member/2]). +-define(MAXREG, 1024). + +module({Mod,Exp,Attr,Fs0,Lc0}, _Opt) -> + {Fs,Lc} = mapfoldl(fun function/2, Lc0, Fs0), + {ok,{Mod,Exp,Attr,Fs,Lc}}. + +function({function,Name,Arity,CLabel,Is0}, Lc0) -> + try + %% Extra labels may thwart optimizations. + Is1 = beam_jump:remove_unused_labels(Is0), + + %% Collect basic blocks and optimize them. + Is2 = blockify(Is1), + Is3 = beam_utils:live_opt(Is2), + Is4 = opt_blocks(Is3), + Is5 = beam_utils:delete_live_annos(Is4), + + %% Optimize bit syntax. + {Is,Lc} = bsm_opt(Is5, Lc0), + + %% Done. + {{function,Name,Arity,CLabel,Is},Lc} + catch + Class:Error -> + Stack = erlang:get_stacktrace(), + io:fwrite("Function: ~w/~w\n", [Name,Arity]), + erlang:raise(Class, Error, Stack) + end. + +%% blockify(Instructions0) -> Instructions +%% Collect sequences of instructions to basic blocks. +%% Also do some simple optimations on instructions outside the blocks. + +blockify(Is) -> + blockify(Is, []). + +blockify([{loop_rec,{f,Fail},{x,0}},{loop_rec_end,_Lbl},{label,Fail}|Is], Acc) -> + %% Useless instruction sequence. + blockify(Is, Acc); + +%% New bit syntax matching. +blockify([{bs_save2,R,Point}=I,{bs_restore2,R,Point}|Is], Acc) -> + blockify([I|Is], Acc); +blockify([{bs_save2,R,Point}=I,{test,is_eq_exact,_,_}=Test, + {bs_restore2,R,Point}|Is], Acc) -> + blockify([I,Test|Is], Acc); + +%% Do other peep-hole optimizations. +blockify([{test,is_atom,{f,Fail},[Reg]}=I| + [{select_val,Reg,{f,Fail}, + {list,[{atom,false},{f,_}=BrFalse, + {atom,true}=AtomTrue,{f,_}=BrTrue]}}|Is]=Is0], + [{block,Bl}|_]=Acc) -> + case is_last_bool(Bl, Reg) of + false -> + blockify(Is0, [I|Acc]); + true -> + %% The last instruction is a boolean operator/guard BIF that can't fail. + %% We can convert the three-way branch to a two-way branch (eliminating + %% the reference to the failure label). + blockify(Is, [{jump,BrTrue}, + {test,is_eq_exact,BrFalse,[Reg,AtomTrue]}|Acc]) + end; +blockify([{test,is_atom,{f,Fail},[Reg]}=I| + [{select_val,Reg,{f,Fail}, + {list,[{atom,true}=AtomTrue,{f,_}=BrTrue, + {atom,false},{f,_}=BrFalse]}}|Is]=Is0], + [{block,Bl}|_]=Acc) -> + case is_last_bool(Bl, Reg) of + false -> + blockify(Is0, [I|Acc]); + true -> + blockify(Is, [{jump,BrTrue}, + {test,is_eq_exact,BrFalse,[Reg,AtomTrue]}|Acc]) + end; +blockify([I|Is0]=IsAll, Acc) -> + case is_bs_put(I) of + true -> + {BsPuts0,Is} = collect_bs_puts(IsAll), + BsPuts = opt_bs_puts(BsPuts0), + blockify(Is, reverse(BsPuts, Acc)); + false -> + case collect(I) of + error -> blockify(Is0, [I|Acc]); + Instr when is_tuple(Instr) -> + {Block,Is} = collect_block(IsAll), + blockify(Is, [{block,Block}|Acc]) + end + end; +blockify([], Acc) -> reverse(Acc). + +is_last_bool([{set,[Reg],As,{bif,N,_}}], Reg) -> + Ar = length(As), + erl_internal:new_type_test(N, Ar) orelse erl_internal:comp_op(N, Ar) + orelse erl_internal:bool_op(N, Ar); +is_last_bool([_|Is], Reg) -> is_last_bool(Is, Reg); +is_last_bool([], _) -> false. + +collect_block(Is) -> + collect_block(Is, []). + +collect_block([{allocate_zero,Ns,R},{test_heap,Nh,R}|Is], Acc) -> + collect_block(Is, [{set,[],[],{alloc,R,{no_opt,Ns,Nh,[]}}}|Acc]); +collect_block([I|Is]=Is0, Acc) -> + case collect(I) of + error -> {reverse(Acc),Is0}; + Instr -> collect_block(Is, [Instr|Acc]) + end. + +collect({allocate_zero,N,R}) -> {set,[],[],{alloc,R,{zero,N,0,[]}}}; +collect({test_heap,N,R}) -> {set,[],[],{alloc,R,{nozero,nostack,N,[]}}}; +collect({bif,N,F,As,D}) -> {set,[D],As,{bif,N,F}}; +collect({gc_bif,N,F,R,As,D}) -> {set,[D],As,{alloc,R,{gc_bif,N,F}}}; +collect({move,S,D}) -> {set,[D],[S],move}; +collect({put_list,S1,S2,D}) -> {set,[D],[S1,S2],put_list}; +collect({put_tuple,A,D}) -> {set,[D],[],{put_tuple,A}}; +collect({put,S}) -> {set,[],[S],put}; +collect({put_string,L,S,D}) -> {set,[D],[],{put_string,L,S}}; +collect({get_tuple_element,S,I,D}) -> {set,[D],[S],{get_tuple_element,I}}; +collect({set_tuple_element,S,D,I}) -> {set,[],[S,D],{set_tuple_element,I}}; +collect({get_list,S,D1,D2}) -> {set,[D1,D2],[S],get_list}; +collect(remove_message) -> {set,[],[],remove_message}; +collect({'catch',R,L}) -> {set,[R],[],{'catch',L}}; +collect(_) -> error. + +opt_blocks([{block,Bl0}|Is]) -> + %% The live annotation at the beginning is not useful. + [{'%live',_}|Bl] = Bl0, + [{block,opt_block(Bl)}|opt_blocks(Is)]; +opt_blocks([I|Is]) -> + [I|opt_blocks(Is)]; +opt_blocks([]) -> []. + +opt_block(Is0) -> + %% We explicitly move any allocate instruction upwards before optimising + %% moves, to avoid any potential problems with the calculation of live + %% registers. + Is1 = move_allocates(Is0), + Is = find_fixpoint(fun opt/1, Is1), + opt_alloc(Is). + +find_fixpoint(OptFun, Is0) -> + case OptFun(Is0) of + Is0 -> Is0; + Is1 -> find_fixpoint(OptFun, Is1) + end. + +%% move_allocates(Is0) -> Is +%% Move allocates upwards in the instruction stream, in the hope of +%% getting more possibilities for optimizing away moves later. + +move_allocates(Is) -> + move_allocates_1(reverse(Is), []). + +move_allocates_1([{set,[],[],{alloc,_,_}=Alloc}|Is0], Acc0) -> + {Is,Acc} = move_allocates_2(Alloc, Is0, Acc0), + move_allocates_1(Is, Acc); +move_allocates_1([I|Is], Acc) -> + move_allocates_1(Is, [I|Acc]); +move_allocates_1([], Is) -> Is. + +move_allocates_2({alloc,Live,Info}, [{set,[],[],{alloc,Live0,Info0}}|Is], Acc) -> + Live = Live0, % Assertion. + Alloc = {alloc,Live,combine_alloc(Info0, Info)}, + move_allocates_2(Alloc, Is, Acc); +move_allocates_2({alloc,Live,Info}=Alloc0, [I|Is]=Is0, Acc) -> + case alloc_may_pass(I) of + false -> + {Is0,[{set,[],[],Alloc0}|Acc]}; + true -> + Alloc = {alloc,alloc_live_regs(I, Live),Info}, + move_allocates_2(Alloc, Is, [I|Acc]) + end; +move_allocates_2(Alloc, [], Acc) -> + {[],[{set,[],[],Alloc}|Acc]}. + +alloc_may_pass({set,_,_,{alloc,_,_}}) -> false; +alloc_may_pass({set,_,_,{set_tuple_element,_}}) -> false; +alloc_may_pass({set,_,_,put_list}) -> false; +alloc_may_pass({set,_,_,{put_tuple,_}}) -> false; +alloc_may_pass({set,_,_,put}) -> false; +alloc_may_pass({set,_,_,{put_string,_,_}}) -> false; +alloc_may_pass({set,_,_,_}) -> true. + +combine_alloc({_,Ns,Nh1,Init}, {_,nostack,Nh2,[]}) -> + {zero,Ns,beam_utils:combine_heap_needs(Nh1, Nh2),Init}. + +%% opt([Instruction]) -> [Instruction] +%% Optimize the instruction stream inside a basic block. + +opt([{set,[Dst],As,{bif,Bif,Fail}}=I1, + {set,[Dst],[Dst],{bif,'not',Fail}}=I2|Is]) -> + %% Get rid of the 'not' if the operation can be inverted. + case inverse_comp_op(Bif) of + none -> [I1,I2|opt(Is)]; + RevBif -> [{set,[Dst],As,{bif,RevBif,Fail}}|opt(Is)] + end; +opt([{set,[X],[X],move}|Is]) -> opt(Is); +opt([{set,[D1],[{integer,Idx1},Reg],{bif,element,{f,0}}}=I1, + {set,[D2],[{integer,Idx2},Reg],{bif,element,{f,0}}}=I2|Is]) + when Idx1 < Idx2, D1 =/= D2, D1 =/= Reg, D2 =/= Reg -> + opt([I2,I1|Is]); +opt([{set,Ds0,Ss,Op}|Is0]) -> + {Ds,Is} = opt_moves(Ds0, Is0), + [{set,Ds,Ss,Op}|opt(Is)]; +opt([{'%live',_}=I|Is]) -> + [I|opt(Is)]; +opt([]) -> []. + +%% opt_moves([Dest], [Instruction]) -> {[Dest],[Instruction]} +%% For each Dest, does the optimization described in opt_move/2. + +opt_moves([], Is0) -> {[],Is0}; +opt_moves([D0]=Ds, Is0) -> + case opt_move(D0, Is0) of + not_possible -> {Ds,Is0}; + {D1,Is} -> {[D1],Is} + end; +opt_moves([X0,Y0], Is0) -> + {X,Is2} = case opt_move(X0, Is0) of + not_possible -> {X0,Is0}; + {Y0,_} -> {X0,Is0}; + {_X1,_Is1} = XIs1 -> XIs1 + end, + case opt_move(Y0, Is2) of + not_possible -> {[X,Y0],Is2}; + {X,_} -> {[X,Y0],Is2}; + {Y,Is} -> {[X,Y],Is} + end. + +%% opt_move(Dest, [Instruction]) -> {UpdatedDest,[Instruction]} | not_possible +%% If there is a {move,Dest,FinalDest} instruction +%% in the instruction stream, remove the move instruction +%% and let FinalDest be the destination. +%% +%% For this optimization to be safe, we must be sure that +%% Dest will not be referenced in any other by other instructions +%% in the rest of the instruction stream. Not even the indirect +%% reference by an instruction that may allocate (such as +%% test_heap/2 or a GC Bif) is allowed. + +opt_move(Dest, Is) -> + opt_move_1(Dest, Is, ?MAXREG, []). + +opt_move_1(R, [{set,_,_,{alloc,Live,_}}|_]=Is, SafeRegs, Acc) when Live < SafeRegs -> + %% Downgrade number of safe regs and rescan the instruction, as it most probably + %% is a gc_bif instruction. + opt_move_1(R, Is, Live, Acc); +opt_move_1(R, [{set,[{x,X}=D],[R],move}|Is], SafeRegs, Acc) -> + case X < SafeRegs andalso beam_utils:is_killed_block(R, Is) of + true -> opt_move_2(D, Acc, Is); + false -> not_possible + end; +opt_move_1(R, [{set,[D],[R],move}|Is], _SafeRegs, Acc) -> + case beam_utils:is_killed_block(R, Is) of + true -> opt_move_2(D, Acc, Is); + false -> not_possible + end; +opt_move_1(R, [I|Is], SafeRegs, Acc) -> + case is_transparent(R, I) of + false -> not_possible; + true -> opt_move_1(R, Is, SafeRegs, [I|Acc]) + end. + +%% Reverse the instructions, while checking that there are no instructions that +%% would interfere with using the new destination register chosen. + +opt_move_2(D, [I|Is], Acc) -> + case is_transparent(D, I) of + false -> not_possible; + true -> opt_move_2(D, Is, [I|Acc]) + end; +opt_move_2(D, [], Acc) -> {D,Acc}. + +%% is_transparent(Register, Instruction) -> true | false +%% Returns true if Instruction does not in any way references Register +%% (even indirectly by an allocation instruction). +%% Returns false if Instruction does reference Register, or we are +%% not sure. + +is_transparent({x,X}, {set,_,_,{alloc,Live,_}}) when X < Live -> + false; +is_transparent(R, {set,Ds,Ss,_Op}) -> + case member(R, Ds) of + true -> false; + false -> not member(R, Ss) + end; +is_transparent(_, _) -> false. + +%% opt_alloc(Instructions) -> Instructions' +%% Optimises all allocate instructions. + +opt_alloc([{set,[],[],{alloc,R,{_,Ns,Nh,[]}}}|Is]) -> + [{set,[],[],opt_alloc(Is, Ns, Nh, R)}|opt(Is)]; +opt_alloc([I|Is]) -> [I|opt_alloc(Is)]; +opt_alloc([]) -> []. + +%% opt_alloc(Instructions, FrameSize, HeapNeed, LivingRegs) -> [Instr] +%% Generates the optimal sequence of instructions for +%% allocating and initalizing the stack frame and needed heap. + +opt_alloc(_Is, nostack, Nh, LivingRegs) -> + {alloc,LivingRegs,{nozero,nostack,Nh,[]}}; +opt_alloc(Is, Ns, Nh, LivingRegs) -> + InitRegs = init_yreg(Is, 0), + case count_ones(InitRegs) of + N when N*2 > Ns -> + {alloc,LivingRegs,{nozero,Ns,Nh,gen_init(Ns, InitRegs)}}; + _ -> + {alloc,LivingRegs,{zero,Ns,Nh,[]}} + end. + +gen_init(Fs, Regs) -> gen_init(Fs, Regs, 0, []). + +gen_init(SameFs, _Regs, SameFs, Acc) -> reverse(Acc); +gen_init(Fs, Regs, Y, Acc) when Regs band 1 =:= 0 -> + gen_init(Fs, Regs bsr 1, Y+1, [{init,{y,Y}}|Acc]); +gen_init(Fs, Regs, Y, Acc) -> + gen_init(Fs, Regs bsr 1, Y+1, Acc). + +%% init_yreg(Instructions, RegSet) -> RegSetInitialized +%% Calculate the set of initialized y registers. + +init_yreg([{set,_,_,{bif,_,_}}|_], Reg) -> Reg; +init_yreg([{set,_,_,{alloc,_,{gc_bif,_,_}}}|_], Reg) -> Reg; +init_yreg([{set,Ds,_,_}|Is], Reg) -> init_yreg(Is, add_yregs(Ds, Reg)); +init_yreg(_Is, Reg) -> Reg. + +add_yregs(Ys, Reg) -> foldl(fun(Y, R0) -> add_yreg(Y, R0) end, Reg, Ys). + +add_yreg({y,Y}, Reg) -> Reg bor (1 bsl Y); +add_yreg(_, Reg) -> Reg. + +count_ones(Bits) -> count_ones(Bits, 0). +count_ones(0, Acc) -> Acc; +count_ones(Bits, Acc) -> + count_ones(Bits bsr 1, Acc + (Bits band 1)). + +%% Calculate the new number of live registers when we move an allocate +%% instruction upwards, passing a 'set' instruction. + +alloc_live_regs({set,Ds,Ss,_}, Regs0) -> + Rset = x_live(Ss, x_dead(Ds, (1 bsl Regs0)-1)), + live_regs(Rset). + +live_regs(Regs) -> + live_regs_1(0, Regs). + +live_regs_1(N, 0) -> N; +live_regs_1(N, Regs) -> live_regs_1(N+1, Regs bsr 1). + +x_dead([{x,N}|Rs], Regs) -> x_dead(Rs, Regs band (bnot (1 bsl N))); +x_dead([_|Rs], Regs) -> x_dead(Rs, Regs); +x_dead([], Regs) -> Regs. + +x_live([{x,N}|Rs], Regs) -> x_live(Rs, Regs bor (1 bsl N)); +x_live([_|Rs], Regs) -> x_live(Rs, Regs); +x_live([], Regs) -> Regs. + +%% inverse_comp_op(Op) -> none|RevOp + +inverse_comp_op('=:=') -> '=/='; +inverse_comp_op('=/=') -> '=:='; +inverse_comp_op('==') -> '/='; +inverse_comp_op('/=') -> '=='; +inverse_comp_op('>') -> '=<'; +inverse_comp_op('<') -> '>='; +inverse_comp_op('>=') -> '<'; +inverse_comp_op('=<') -> '>'; +inverse_comp_op(_) -> none. + +%%% +%%% Evaluation of constant bit fields. +%%% + +is_bs_put({bs_put_integer,_,_,_,_,_}) -> true; +is_bs_put({bs_put_float,_,_,_,_,_}) -> true; +is_bs_put(_) -> false. + +collect_bs_puts(Is) -> + collect_bs_puts_1(Is, []). + +collect_bs_puts_1([I|Is]=Is0, Acc) -> + case is_bs_put(I) of + false -> {reverse(Acc),Is0}; + true -> collect_bs_puts_1(Is, [I|Acc]) + end. + +opt_bs_puts(Is) -> + opt_bs_1(Is, []). + +opt_bs_1([{bs_put_float,Fail,{integer,Sz},1,Flags0,Src}=I0|Is], Acc) -> + try eval_put_float(Src, Sz, Flags0) of + <<Int:Sz>> -> + Flags = force_big(Flags0), + I = {bs_put_integer,Fail,{integer,Sz},1,Flags,{integer,Int}}, + opt_bs_1([I|Is], Acc) + catch + error:_ -> + opt_bs_1(Is, [I0|Acc]) + end; +opt_bs_1([{bs_put_integer,_,{integer,8},1,_,{integer,_}}|_]=IsAll, Acc0) -> + {Is,Acc} = bs_collect_string(IsAll, Acc0), + opt_bs_1(Is, Acc); +opt_bs_1([{bs_put_integer,Fail,{integer,Sz},1,F,{integer,N}}=I|Is0], Acc) when Sz > 8 -> + case field_endian(F) of + big -> + %% We can do this optimization for any field size without risk + %% for code explosion. + case bs_split_int(N, Sz, Fail, Is0) of + no_split -> opt_bs_1(Is0, [I|Acc]); + Is -> opt_bs_1(Is, Acc) + end; + little when Sz < 128 -> + %% We only try to optimize relatively small fields, to avoid + %% an explosion in code size. + <<Int:Sz>> = <<N:Sz/little>>, + Flags = force_big(F), + Is = [{bs_put_integer,Fail,{integer,Sz},1, + Flags,{integer,Int}}|Is0], + opt_bs_1(Is, Acc); + _ -> %native or too wide little field + opt_bs_1(Is0, [I|Acc]) + end; +opt_bs_1([{Op,Fail,{integer,Sz},U,F,Src}|Is], Acc) when U > 1 -> + opt_bs_1([{Op,Fail,{integer,U*Sz},1,F,Src}|Is], Acc); +opt_bs_1([I|Is], Acc) -> + opt_bs_1(Is, [I|Acc]); +opt_bs_1([], Acc) -> reverse(Acc). + +eval_put_float(Src, Sz, Flags) when Sz =< 256 -> %Only evaluate if Sz is reasonable. + Val = value(Src), + case field_endian(Flags) of + little -> <<Val:Sz/little-float-unit:1>>; + big -> <<Val:Sz/big-float-unit:1>> + %% native intentionally not handled here - we can't optimize it. + end. + +value({integer,I}) -> I; +value({float,F}) -> F. + +bs_collect_string(Is, [{bs_put_string,Len,{string,Str}}|Acc]) -> + bs_coll_str_1(Is, Len, reverse(Str), Acc); +bs_collect_string(Is, Acc) -> + bs_coll_str_1(Is, 0, [], Acc). + +bs_coll_str_1([{bs_put_integer,_,{integer,Sz},U,_,{integer,V}}|Is], + Len, StrAcc, IsAcc) when U*Sz =:= 8 -> + Byte = V band 16#FF, + bs_coll_str_1(Is, Len+1, [Byte|StrAcc], IsAcc); +bs_coll_str_1(Is, Len, StrAcc, IsAcc) -> + {Is,[{bs_put_string,Len,{string,reverse(StrAcc)}}|IsAcc]}. + +field_endian({field_flags,F}) -> field_endian_1(F). + +field_endian_1([big=E|_]) -> E; +field_endian_1([little=E|_]) -> E; +field_endian_1([native=E|_]) -> E; +field_endian_1([_|Fs]) -> field_endian_1(Fs). + +force_big({field_flags,F}) -> + {field_flags,force_big_1(F)}. + +force_big_1([big|_]=Fs) -> Fs; +force_big_1([little|Fs]) -> [big|Fs]; +force_big_1([F|Fs]) -> [F|force_big_1(Fs)]. + +bs_split_int(0, Sz, _, _) when Sz > 64 -> + %% We don't want to split in this case because the + %% string will consist of only zeroes. + no_split; +bs_split_int(-1, Sz, _, _) when Sz > 64 -> + %% We don't want to split in this case because the + %% string will consist of only 255 bytes. + no_split; +bs_split_int(N, Sz, Fail, Acc) -> + FirstByteSz = case Sz rem 8 of + 0 -> 8; + Rem -> Rem + end, + bs_split_int_1(N, FirstByteSz, Sz, Fail, Acc). + +bs_split_int_1(-1, _, Sz, Fail, Acc) when Sz > 64 -> + I = {bs_put_integer,Fail,{integer,Sz},1,{field_flags,[big]},{integer,-1}}, + [I|Acc]; +bs_split_int_1(0, _, Sz, Fail, Acc) when Sz > 64 -> + I = {bs_put_integer,Fail,{integer,Sz},1,{field_flags,[big]},{integer,0}}, + [I|Acc]; +bs_split_int_1(N, ByteSz, Sz, Fail, Acc) when Sz > 0 -> + Mask = (1 bsl ByteSz) - 1, + I = {bs_put_integer,Fail,{integer,ByteSz},1, + {field_flags,[big]},{integer,N band Mask}}, + bs_split_int_1(N bsr ByteSz, 8, Sz-ByteSz, Fail, [I|Acc]); +bs_split_int_1(_, _, _, _, Acc) -> Acc. + + +%%% +%%% Optimization of new bit syntax matching: get rid +%%% of redundant bs_restore2/2 instructions across select_val +%%% instructions, as well as a few other simple peep-hole optimizations. +%%% + +bsm_opt(Is0, Lc0) -> + {Is1,D0,Lc} = bsm_scan(Is0, [], Lc0, []), + Is2 = case D0 of + [] -> + Is1; + _ -> + D = gb_trees:from_orddict(orddict:from_list(D0)), + bsm_reroute(Is1, D, none, []) + end, + Is = beam_clean:bs_clean_saves(Is2), + {bsm_opt_2(Is, []),Lc}. + +bsm_scan([{label,L}=Lbl,{bs_restore2,_,Save}=R|Is], D0, Lc, Acc0) -> + D = [{{L,Save},Lc}|D0], + Acc = [{label,Lc},R,Lbl|Acc0], + bsm_scan(Is, D, Lc+1, Acc); +bsm_scan([I|Is], D, Lc, Acc) -> + bsm_scan(Is, D, Lc, [I|Acc]); +bsm_scan([], D, Lc, Acc) -> + {reverse(Acc),D,Lc}. + +bsm_reroute([{bs_save2,Reg,Save}=I|Is], D, _, Acc) -> + bsm_reroute(Is, D, {Reg,Save}, [I|Acc]); +bsm_reroute([{bs_restore2,Reg,Save}=I|Is], D, _, Acc) -> + bsm_reroute(Is, D, {Reg,Save}, [I|Acc]); +bsm_reroute([{label,_}=I|Is], D, S, Acc) -> + bsm_reroute(Is, D, S, [I|Acc]); +bsm_reroute([{select_val,Reg,F0,{list,Lbls0}}|Is], D, {_,Save}=S, Acc0) -> + [F|Lbls] = bsm_subst_labels([F0|Lbls0], Save, D), + Acc = [{select_val,Reg,F,{list,Lbls}}|Acc0], + bsm_reroute(Is, D, S, Acc); +bsm_reroute([{test,TestOp,F0,TestArgs}=I|Is], D, {_,Save}=S, Acc0) -> + F = bsm_subst_label(F0, Save, D), + Acc = [{test,TestOp,F,TestArgs}|Acc0], + case bsm_not_bs_test(I) of + true -> + %% The test instruction will not update the bit offset for the + %% binary being matched. Therefore the save position can be kept. + bsm_reroute(Is, D, S, Acc); + false -> + %% The test instruction might update the bit offset. Kill our + %% remembered Save position. + bsm_reroute(Is, D, none, Acc) + end; +bsm_reroute([{test,TestOp,F0,Live,TestArgs,Dst}|Is], D, {_,Save}, Acc0) -> + F = bsm_subst_label(F0, Save, D), + Acc = [{test,TestOp,F,Live,TestArgs,Dst}|Acc0], + %% The test instruction will update the bit offset. Kill our + %% remembered Save position. + bsm_reroute(Is, D, none, Acc); +bsm_reroute([{block,[{set,[],[],{alloc,_,_}}]}=Bl, + {bs_context_to_binary,_}=I|Is], D, S, Acc) -> + %% To help further bit syntax optimizations. + bsm_reroute([I,Bl|Is], D, S, Acc); +bsm_reroute([I|Is], D, _, Acc) -> + bsm_reroute(Is, D, none, [I|Acc]); +bsm_reroute([], _, _, Acc) -> reverse(Acc). + +bsm_opt_2([{test,bs_test_tail2,F,[Ctx,Bits]}|Is], + [{test,bs_skip_bits2,F,[Ctx,{integer,I},Unit,_Flags]}|Acc]) -> + bsm_opt_2(Is, [{test,bs_test_tail2,F,[Ctx,Bits+I*Unit]}|Acc]); +bsm_opt_2([{test,bs_skip_bits2,F,[Ctx,{integer,I1},Unit1,_]}|Is], + [{test,bs_skip_bits2,F,[Ctx,{integer,I2},Unit2,Flags]}|Acc]) -> + bsm_opt_2(Is, [{test,bs_skip_bits2,F, + [Ctx,{integer,I1*Unit1+I2*Unit2},1,Flags]}|Acc]); +bsm_opt_2([{test,bs_match_string,F,[Ctx,Bin1]}, + {test,bs_match_string,F,[Ctx,Bin2]}|Is], Acc) -> + I = {test,bs_match_string,F,[Ctx,<<Bin1/bitstring,Bin2/bitstring>>]}, + bsm_opt_2([I|Is], Acc); +bsm_opt_2([I|Is], Acc) -> + bsm_opt_2(Is, [I|Acc]); +bsm_opt_2([], Acc) -> reverse(Acc). + +%% bsm_not_bs_test({test,Name,_,Operands}) -> true|false. +%% Test whether is the test is a "safe", i.e. does not move the +%% bit offset for a binary. +%% +%% 'true' means that the test is safe, 'false' that we don't know or +%% that the test moves the offset (e.g. bs_get_integer2). + +bsm_not_bs_test({test,bs_test_tail2,_,[_,_]}) -> true; +bsm_not_bs_test(Test) -> beam_utils:is_pure_test(Test). + +bsm_subst_labels(Fs, Save, D) -> + bsm_subst_labels_1(Fs, Save, D, []). + +bsm_subst_labels_1([F|Fs], Save, D, Acc) -> + bsm_subst_labels_1(Fs, Save, D, [bsm_subst_label(F, Save, D)|Acc]); +bsm_subst_labels_1([], _, _, Acc) -> + reverse(Acc). + +bsm_subst_label({f,Lbl0}=F, Save, D) -> + case gb_trees:lookup({Lbl0,Save}, D) of + {value,Lbl} -> {f,Lbl}; + none -> F + end; +bsm_subst_label(Other, _, _) -> Other. |