1 files changed, 601 insertions, 0 deletions
diff --git a/lib/dialyzer/test/options1_SUITE_data/src/compiler/beam_block.erl b/lib/dialyzer/test/options1_SUITE_data/src/compiler/beam_block.erl
new file mode 100644
index 0000000000..0e3589cdf5
--- /dev/null
+++ b/lib/dialyzer/test/options1_SUITE_data/src/compiler/beam_block.erl
@@ -0,0 +1,601 @@
+%% ``The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved via the world wide web at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% The Initial Developer of the Original Code is Ericsson Utvecklings AB.
+%% Portions created by Ericsson are Copyright 1999, Ericsson Utvecklings
+%% AB. All Rights Reserved.''
+%%
+%%     $Id: beam_block.erl,v 1.1 2008/12/17 09:53:41 mikpe Exp $
+%%
+%% Purpose : Partitions assembly instructions into basic blocks and
+%% optimizes them.
+
+-module(beam_block).
+
+-export([module/2]).
+-export([live_at_entry/1]).			%Used by beam_type, beam_bool.
+-export([is_killed/2]).				%Used by beam_dead, beam_type, beam_bool.
+-export([is_not_used/2]).			%Used by beam_bool.
+-export([merge_blocks/2]).			%Used by beam_jump.
+-import(lists, [map/2,mapfoldr/3,reverse/1,reverse/2,foldl/3,
+		member/2,sort/1,all/2]).
+-define(MAXREG, 1024).
+
+module({Mod,Exp,Attr,Fs,Lc}, _Opt) ->
+    {ok,{Mod,Exp,Attr,map(fun function/1, Fs),Lc}}.
+
+function({function,Name,Arity,CLabel,Is0}) ->
+    %% Collect basic blocks and optimize them.
+    Is = blockify(Is0),
+
+    %% Done.
+    {function,Name,Arity,CLabel,Is}.
+
+%% blockify(Instructions0) -> Instructions
+%%  Collect sequences of instructions to basic blocks and
+%%  optimize the contents of the blocks. Also do some simple
+%%  optimations on instructions outside the blocks.
+
+blockify(Is) ->
+    blockify(Is, []).
+
+blockify([{loop_rec,{f,Fail},{x,0}},{loop_rec_end,_Lbl},{label,Fail}|Is], Acc) ->
+    %% Useless instruction sequence.
+    blockify(Is, Acc);
+blockify([{test,bs_test_tail,F,[Bits]}|Is],
+	 [{test,bs_skip_bits,F,[{integer,I},Unit,_Flags]}|Acc]) ->
+    blockify(Is, [{test,bs_test_tail,F,[Bits+I*Unit]}|Acc]);
+blockify([{test,bs_skip_bits,F,[{integer,I1},Unit1,_]}|Is],
+	 [{test,bs_skip_bits,F,[{integer,I2},Unit2,Flags]}|Acc]) ->
+    blockify(Is, [{test,bs_skip_bits,F,
+		   [{integer,I1*Unit1+I2*Unit2},1,Flags]}|Acc]);
+blockify([{test,is_atom,{f,Fail},[Reg]}=I|
+	  [{select_val,Reg,{f,Fail},
+	    {list,[{atom,false},{f,_}=BrFalse,
+		   {atom,true}=AtomTrue,{f,_}=BrTrue]}}|Is]=Is0],
+	 [{block,Bl}|_]=Acc) ->
+    case is_last_bool(Bl, Reg) of
+	false ->
+	    blockify(Is0, [I|Acc]);
+	true ->
+	    blockify(Is, [{jump,BrTrue},
+			  {test,is_eq_exact,BrFalse,[Reg,AtomTrue]}|Acc])
+    end;
+blockify([{test,is_atom,{f,Fail},[Reg]}=I|
+	  [{select_val,Reg,{f,Fail},
+	    {list,[{atom,true}=AtomTrue,{f,_}=BrTrue,
+		   {atom,false},{f,_}=BrFalse]}}|Is]=Is0],
+	 [{block,Bl}|_]=Acc) ->
+    case is_last_bool(Bl, Reg) of
+	false ->
+	    blockify(Is0, [I|Acc]);
+	true ->
+	    blockify(Is, [{jump,BrTrue},
+			  {test,is_eq_exact,BrFalse,[Reg,AtomTrue]}|Acc])
+    end;
+blockify([I|Is0]=IsAll, Acc) ->
+    case is_bs_put(I) of
+	true ->
+	    {BsPuts0,Is} = collect_bs_puts(IsAll),
+	    BsPuts = opt_bs_puts(BsPuts0),
+	    blockify(Is, reverse(BsPuts, Acc));
+	false ->
+	    case collect(I) of
+		error -> blockify(Is0, [I|Acc]);
+		Instr when is_tuple(Instr) ->
+		    {Block0,Is} = collect_block(IsAll),
+		    Block = opt_block(Block0),
+		    blockify(Is, [{block,Block}|Acc])
+	    end
+    end;
+blockify([], Acc) -> reverse(Acc).
+
+is_last_bool([I,{'%live',_}], Reg) ->
+    is_last_bool([I], Reg);
+is_last_bool([{set,[Reg],As,{bif,N,_}}], Reg) ->
+    Ar = length(As),
+    erl_internal:new_type_test(N, Ar) orelse erl_internal:comp_op(N, Ar)
+	orelse erl_internal:bool_op(N, Ar);
+is_last_bool([_|Is], Reg) -> is_last_bool(Is, Reg);
+is_last_bool([], _) -> false.
+
+collect_block(Is) ->
+    collect_block(Is, []).
+
+collect_block([{allocate_zero,Ns,R},{test_heap,Nh,R}|Is], Acc) ->
+    collect_block(Is, [{allocate,R,{no_opt,Ns,Nh,[]}}|Acc]);
+collect_block([I|Is]=Is0, Acc) ->
+    case collect(I) of
+	error -> {reverse(Acc),Is0};
+	Instr -> collect_block(Is, [Instr|Acc])
+    end;
+collect_block([], Acc) -> {reverse(Acc),[]}.
+
+collect({allocate_zero,N,R}) -> {allocate,R,{zero,N,0,[]}};
+collect({test_heap,N,R})     -> {allocate,R,{nozero,nostack,N,[]}};
+collect({bif,N,nofail,As,D}) -> {set,[D],As,{bif,N}};
+collect({bif,N,F,As,D})      -> {set,[D],As,{bif,N,F}};
+collect({move,S,D})          -> {set,[D],[S],move};
+collect({put_list,S1,S2,D})  -> {set,[D],[S1,S2],put_list};
+collect({put_tuple,A,D})     -> {set,[D],[],{put_tuple,A}};
+collect({put,S})             -> {set,[],[S],put};
+collect({put_string,L,S,D})  -> {set,[D],[],{put_string,L,S}};
+collect({get_tuple_element,S,I,D}) -> {set,[D],[S],{get_tuple_element,I}};
+collect({set_tuple_element,S,D,I}) -> {set,[],[S,D],{set_tuple_element,I}};
+collect({get_list,S,D1,D2})  -> {set,[D1,D2],[S],get_list};
+collect(remove_message)      -> {set,[],[],remove_message};
+collect({'catch',R,L})       -> {set,[R],[],{'catch',L}};
+collect({'%live',_}=Live)    -> Live;
+collect(_)                   -> error.
+
+opt_block(Is0) ->
+    %% We explicitly move any allocate instruction upwards before optimising
+    %% moves, to avoid any potential problems with the calculation of live
+    %% registers.
+    Is1 = find_fixpoint(fun move_allocates/1, Is0),
+    Is2 = find_fixpoint(fun opt/1, Is1),
+    Is = opt_alloc(Is2),
+    share_floats(Is).
+
+find_fixpoint(OptFun, Is0) ->
+    case OptFun(Is0) of
+	Is0 -> Is0;
+	Is1 -> find_fixpoint(OptFun, Is1)
+    end.
+
+move_allocates([{set,_Ds,_Ss,{set_tuple_element,_}}|_]=Is) -> Is;
+move_allocates([{set,Ds,Ss,_Op}=Set,{allocate,R,Alloc}|Is]) when is_integer(R) ->
+    [{allocate,live_regs(Ds, Ss, R),Alloc},Set|Is];
+move_allocates([{allocate,R1,Alloc1},{allocate,R2,Alloc2}|Is]) ->
+    R1 = R2,					% Assertion.
+    move_allocates([{allocate,R1,combine_alloc(Alloc1, Alloc2)}|Is]);
+move_allocates([I|Is]) ->
+    [I|move_allocates(Is)];
+move_allocates([]) -> [].
+
+combine_alloc({_,Ns,Nh1,Init}, {_,nostack,Nh2,[]}) ->
+    {zero,Ns,Nh1+Nh2,Init}.
+
+merge_blocks([{allocate,R,{Attr,Ns,Nh1,Init}}|B1],
+	     [{allocate,_,{_,nostack,Nh2,[]}}|B2]) ->
+    Alloc = {allocate,R,{Attr,Ns,Nh1+Nh2,Init}},
+    [Alloc|merge_blocks(B1, B2)];
+merge_blocks(B1, B2) -> merge_blocks_1(B1++[{set,[],[],stop_here}|B2]).
+
+merge_blocks_1([{set,[],_,stop_here}|Is]) -> Is;
+merge_blocks_1([{set,[D],_,move}=I|Is]) ->
+    case is_killed(D, Is) of
+	true -> merge_blocks_1(Is);
+	false -> [I|merge_blocks_1(Is)]
+    end;
+merge_blocks_1([I|Is]) -> [I|merge_blocks_1(Is)].
+
+opt([{set,[Dst],As,{bif,Bif,Fail}}=I1,
+     {set,[Dst],[Dst],{bif,'not',Fail}}=I2|Is]) ->
+    %% Get rid of the 'not' if the operation can be inverted.
+    case inverse_comp_op(Bif) of
+	none -> [I1,I2|opt(Is)];
+	RevBif -> [{set,[Dst],As,{bif,RevBif,Fail}}|opt(Is)]
+    end;
+opt([{set,[X],[X],move}|Is]) -> opt(Is);
+opt([{set,[D1],[{integer,Idx1},Reg],{bif,element,{f,0}}}=I1,
+     {set,[D2],[{integer,Idx2},Reg],{bif,element,{f,0}}}=I2|Is])
+  when Idx1 < Idx2, D1 =/= D2, D1 =/= Reg, D2 =/= Reg ->
+    opt([I2,I1|Is]);
+opt([{set,Ds0,Ss,Op}|Is0]) ->
+    {Ds,Is} =  opt_moves(Ds0, Is0),
+    [{set,Ds,Ss,Op}|opt(Is)];
+opt([I|Is]) -> [I|opt(Is)];
+opt([]) -> [].
+
+opt_moves([], Is0) -> {[],Is0};
+opt_moves([D0], Is0) ->
+    {D1,Is1} = opt_move(D0, Is0),
+    {[D1],Is1};
+opt_moves([X0,Y0]=Ds, Is0) ->
+    {X1,Is1} = opt_move(X0, Is0),
+    case opt_move(Y0, Is1) of
+	{Y1,Is2} when X1 =/= Y1 -> {[X1,Y1],Is2};
+	_Other when X1 =/= Y0 -> {[X1,Y0],Is1};
+	_Other -> {Ds,Is0}
+    end.
+
+opt_move(R, [{set,[D],[R],move}|Is]=Is0) ->
+    case is_killed(R, Is) of
+	true -> {D,Is};
+	false -> {R,Is0}
+    end;
+opt_move(R, [I|Is0]) ->
+    case is_transparent(R, I) of
+	true ->
+	    {D,Is1} = opt_move(R, Is0),
+	    case is_transparent(D, I) of
+		true ->  {D,[I|Is1]};
+		false -> {R,[I|Is0]}
+	    end;
+	false -> {R,[I|Is0]}
+    end;
+opt_move(R, []) -> {R,[]}.
+
+is_transparent(R, {set,Ds,Ss,_Op}) ->
+    case member(R, Ds) of
+	true -> false;
+	false -> not member(R, Ss)
+    end;
+is_transparent(_, _) -> false.
+
+%% is_killed(Register, [Instruction]) -> true|false
+%%  Determine whether a register is killed by the instruction sequence.
+%%  If true is returned, it means that the register will not be
+%%  referenced in ANY way (not even indirectly by an allocate instruction);
+%%  i.e. it is OK to enter the instruction sequence with Register
+%%  containing garbage.
+
+is_killed({x,N}=R, [{block,Blk}|Is]) ->
+    case is_killed(R, Blk) of
+	true -> true;
+	false ->
+	    %% Before looking beyond the block, we must be
+	    %% sure that the register is not referenced by
+	    %% any allocate instruction in the block.
+	    case all(fun({allocate,Live,_}) when N < Live -> false;
+			(_) -> true
+		     end, Blk) of
+		true -> is_killed(R, Is);
+		false -> false
+	    end
+    end;
+is_killed(R, [{block,Blk}|Is]) ->
+    case is_killed(R, Blk) of
+	true -> true;
+	false -> is_killed(R, Is)
+    end;
+is_killed(R, [{set,Ds,Ss,_Op}|Is]) ->
+    case member(R, Ss) of
+	true -> false;
+	false ->
+	    case member(R, Ds) of
+		true -> true;
+		false -> is_killed(R, Is)
+	    end
+    end;
+is_killed(R, [{case_end,Used}|_]) -> R =/= Used;
+is_killed(R, [{badmatch,Used}|_]) -> R =/= Used;
+is_killed(_, [if_end|_]) -> true;
+is_killed(R, [{func_info,_,_,Ar}|_]) ->
+    case R of
+	{x,X} when X < Ar -> false;
+	_ -> true
+    end;
+is_killed(R, [{kill,R}|_]) -> true;
+is_killed(R, [{kill,_}|Is]) -> is_killed(R, Is);
+is_killed(R, [{bs_init2,_,_,_,_,_,Dst}|Is]) ->
+    if
+	R =:= Dst -> true;
+	true -> is_killed(R, Is)
+    end;
+is_killed(R, [{bs_put_string,_,_}|Is]) -> is_killed(R, Is);
+is_killed({x,R}, [{'%live',Live}|_]) when R >= Live -> true;
+is_killed({x,R}, [{'%live',_}|Is]) -> is_killed(R, Is);
+is_killed({x,R}, [{allocate,Live,_}|_]) ->
+    %% Note: To be safe here, we must return either true or false,
+    %% not looking further at the instructions beyond the allocate
+    %% instruction.
+    R >= Live;
+is_killed({x,R}, [{call,Live,_}|_]) when R >= Live -> true;
+is_killed({x,R}, [{call_last,Live,_,_}|_]) when R >= Live -> true;
+is_killed({x,R}, [{call_only,Live,_}|_]) when R >= Live -> true;
+is_killed({x,R}, [{call_ext,Live,_}|_]) when R >= Live -> true;
+is_killed({x,R}, [{call_ext_last,Live,_,_}|_]) when R >= Live -> true;
+is_killed({x,R}, [{call_ext_only,Live,_}|_]) when R >= Live -> true;
+is_killed({x,R}, [return|_]) when R > 0 -> true;
+is_killed(_, _) -> false.
+
+%% is_not_used(Register, [Instruction]) -> true|false
+%%  Determine whether a register is used by the instruction sequence.
+%%  If true is returned, it means that the register will not be
+%%  referenced directly, but it may be referenced by an allocate
+%%  instruction (meaning that it is NOT allowed to contain garbage).
+
+is_not_used(R, [{block,Blk}|Is]) ->
+    case is_not_used(R, Blk) of
+	true -> true;
+	false -> is_not_used(R, Is)
+    end;
+is_not_used({x,R}=Reg, [{allocate,Live,_}|Is]) ->
+    if
+	R >= Live -> true;
+	true -> is_not_used(Reg, Is)
+    end;
+is_not_used(R, [{set,Ds,Ss,_Op}|Is]) ->
+    case member(R, Ss) of
+	true -> false;
+	false ->
+	    case member(R, Ds) of
+		true -> true;
+		false -> is_not_used(R, Is)
+	    end
+    end;
+is_not_used(R, Is) -> is_killed(R, Is).
+
+%% opt_alloc(Instructions) -> Instructions'
+%%  Optimises all allocate instructions.
+
+opt_alloc([{allocate,R,{_,Ns,Nh,[]}}|Is]) ->
+    [opt_alloc(Is, Ns, Nh, R)|opt(Is)];
+opt_alloc([I|Is]) -> [I|opt_alloc(Is)];
+opt_alloc([]) -> [].
+
+%% opt_alloc(Instructions, FrameSize, HeapNeed, LivingRegs) -> [Instr]
+%%  Generates the optimal sequence of instructions for
+%%  allocating and initalizing the stack frame and needed heap.
+
+opt_alloc(_Is, nostack, Nh, LivingRegs) ->
+    {allocate,LivingRegs,{nozero,nostack,Nh,[]}};
+opt_alloc(Is, Ns, Nh, LivingRegs) ->
+    InitRegs = init_yreg(Is, 0),
+    case count_ones(InitRegs) of
+	N when N*2 > Ns ->
+	    {allocate,LivingRegs,{nozero,Ns,Nh,gen_init(Ns, InitRegs)}};
+	_ ->
+	    {allocate,LivingRegs,{zero,Ns,Nh,[]}}
+    end.
+
+gen_init(Fs, Regs) -> gen_init(Fs, Regs, 0, []).
+
+gen_init(SameFs, _Regs, SameFs, Acc) -> reverse(Acc);
+gen_init(Fs, Regs, Y, Acc) when Regs band 1 == 0 ->
+    gen_init(Fs, Regs bsr 1, Y+1, [{init, {y,Y}}|Acc]);
+gen_init(Fs, Regs, Y, Acc) ->
+    gen_init(Fs, Regs bsr 1, Y+1, Acc).
+
+%% init_yreg(Instructions, RegSet) -> RegSetInitialized
+%%  Calculate the set of initialized y registers.
+
+init_yreg([{set,_,_,{bif,_,_}}|_], Reg) -> Reg;
+init_yreg([{set,Ds,_,_}|Is], Reg) -> init_yreg(Is, add_yregs(Ds, Reg));
+init_yreg(_Is, Reg) -> Reg.
+
+add_yregs(Ys, Reg) -> foldl(fun(Y, R0) -> add_yreg(Y, R0) end, Reg, Ys).
+
+add_yreg({y,Y}, Reg) -> Reg bor (1 bsl Y);
+add_yreg(_, Reg)     -> Reg.
+
+count_ones(Bits) -> count_ones(Bits, 0).
+count_ones(0, Acc) -> Acc;
+count_ones(Bits, Acc) ->
+    count_ones(Bits bsr 1, Acc + (Bits band 1)).
+
+%% live_at_entry(Is) -> NumberOfRegisters
+%%  Calculate the number of register live at the entry to the code
+%%  sequence.
+
+live_at_entry([{block,[{allocate,R,_}|_]}|_]) ->
+    R;
+live_at_entry([{label,_}|Is]) ->
+    live_at_entry(Is);
+live_at_entry([{block,Bl}|_]) ->
+    live_at_entry(Bl);
+live_at_entry([{func_info,_,_,Ar}|_]) ->
+    Ar;
+live_at_entry(Is0) ->
+    case reverse(Is0) of
+	[{'%live',Regs}|Is] -> live_at_entry_1(Is, (1 bsl Regs)-1);
+	_ -> unknown
+    end.
+
+live_at_entry_1([{set,Ds,Ss,_}|Is], Rset0) ->
+    Rset = x_live(Ss, x_dead(Ds, Rset0)),
+    live_at_entry_1(Is, Rset);
+live_at_entry_1([{allocate,_,_}|Is], Rset) ->
+    live_at_entry_1(Is, Rset);
+live_at_entry_1([], Rset) -> live_regs_1(0, Rset).
+
+%% Calculate the new number of live registers when we move an allocate
+%% instruction upwards, passing a 'set' instruction.
+
+live_regs(Ds, Ss, Regs0) ->
+    Rset = x_live(Ss, x_dead(Ds, (1 bsl Regs0)-1)),
+    live_regs_1(0, Rset).
+
+live_regs_1(N, 0) -> N;
+live_regs_1(N, Regs) -> live_regs_1(N+1, Regs bsr 1).
+
+x_dead([{x,N}|Rs], Regs) -> x_dead(Rs, Regs band (bnot (1 bsl N)));
+x_dead([_|Rs], Regs) -> x_dead(Rs, Regs);
+x_dead([], Regs) -> Regs.
+
+x_live([{x,N}|Rs], Regs) -> x_live(Rs, Regs bor (1 bsl N));
+x_live([_|Rs], Regs) -> x_live(Rs, Regs);
+x_live([], Regs) -> Regs.
+
+%%
+%% If a floating point literal occurs more than once, move it into
+%% a free register and re-use it.
+%%
+
+share_floats([{allocate,_,_}=Alloc|Is]) ->
+    [Alloc|share_floats(Is)];
+share_floats(Is0) ->
+    All = get_floats(Is0, []),
+    MoreThanOnce0 =  more_than_once(sort(All), gb_sets:empty()),
+    case gb_sets:is_empty(MoreThanOnce0) of
+	true -> Is0;
+	false ->
+	    MoreThanOnce = gb_sets:to_list(MoreThanOnce0),
+	    FreeX = highest_used(Is0, -1) + 1,
+	    Regs0 = make_reg_map(MoreThanOnce, FreeX, []),
+	    Regs = gb_trees:from_orddict(Regs0),
+	    Is = map(fun({set,Ds,[{float,F}],Op}=I) ->
+			     case gb_trees:lookup(F, Regs) of
+				 none -> I;
+				 {value,R} -> {set,Ds,[R],Op}
+			     end;
+			(I) -> I
+		     end, Is0),
+	    [{set,[R],[{float,F}],move} || {F,R} <- Regs0] ++ Is
+    end.
+
+get_floats([{set,_,[{float,F}],_}|Is], Acc) ->
+    get_floats(Is, [F|Acc]);
+get_floats([_|Is], Acc) ->
+    get_floats(Is, Acc);
+get_floats([], Acc) -> Acc.
+
+more_than_once([F,F|Fs], Set) ->
+    more_than_once(Fs, gb_sets:add(F, Set));
+more_than_once([_|Fs], Set) ->
+    more_than_once(Fs, Set);
+more_than_once([], Set) -> Set.
+
+highest_used([{set,Ds,Ss,_}|Is], High) ->
+    highest_used(Is, highest(Ds, highest(Ss, High)));
+highest_used([{'%live',Live}|Is], High) when Live > High ->
+    highest_used(Is, Live);
+highest_used([_|Is], High) ->
+    highest_used(Is, High);
+highest_used([], High) -> High.
+
+highest([{x,R}|Rs], High) when R > High ->
+    highest(Rs, R);
+highest([_|Rs], High) ->
+    highest(Rs, High);
+highest([], High) -> High.
+
+make_reg_map([F|Fs], R, Acc) when R < ?MAXREG ->
+    make_reg_map(Fs, R+1, [{F,{x,R}}|Acc]);
+make_reg_map(_, _, Acc) -> sort(Acc).
+
+%% inverse_comp_op(Op) -> none|RevOp
+
+inverse_comp_op('=:=') -> '=/=';
+inverse_comp_op('=/=') -> '=:=';
+inverse_comp_op('==') -> '/=';
+inverse_comp_op('/=') -> '==';
+inverse_comp_op('>') -> '=<';
+inverse_comp_op('<') -> '>=';
+inverse_comp_op('>=') -> '<';
+inverse_comp_op('=<') -> '>';
+inverse_comp_op(_) -> none.
+
+%%%
+%%% Evaluation of constant bit fields.
+%%%
+
+is_bs_put({bs_put_integer,_,_,_,_,_}) -> true;
+is_bs_put({bs_put_float,_,_,_,_,_}) -> true;
+is_bs_put(_) -> false.
+
+collect_bs_puts(Is) ->
+    collect_bs_puts_1(Is, []).
+
+collect_bs_puts_1([I|Is]=Is0, Acc) ->
+    case is_bs_put(I) of
+	false -> {reverse(Acc),Is0};
+	true -> collect_bs_puts_1(Is, [I|Acc])
+    end;
+collect_bs_puts_1([], Acc) -> {reverse(Acc),[]}.
+
+opt_bs_puts(Is) ->
+    opt_bs_1(Is, []).
+
+opt_bs_1([{bs_put_float,Fail,{integer,Sz},1,Flags0,Src}=I0|Is], Acc) ->
+    case catch eval_put_float(Src, Sz, Flags0) of
+	{'EXIT',_} ->
+	    opt_bs_1(Is, [I0|Acc]);
+	<<Int:Sz>> ->
+	    Flags = force_big(Flags0),
+	    I = {bs_put_integer,Fail,{integer,Sz},1,Flags,{integer,Int}},
+	    opt_bs_1([I|Is], Acc)
+    end;
+opt_bs_1([{bs_put_integer,_,{integer,8},1,_,{integer,_}}|_]=IsAll, Acc0) ->
+    {Is,Acc} = bs_collect_string(IsAll, Acc0),
+    opt_bs_1(Is, Acc);
+opt_bs_1([{bs_put_integer,Fail,{integer,Sz},1,F,{integer,N}}=I|Is0], Acc) when Sz > 8 ->
+    case field_endian(F) of
+	big ->
+	    case bs_split_int(N, Sz, Fail, Is0) of
+		no_split -> opt_bs_1(Is0, [I|Acc]);
+		Is -> opt_bs_1(Is, Acc)
+	    end;
+	little ->
+	    case catch <<N:Sz/little>> of
+		{'EXIT',_} ->
+		    opt_bs_1(Is0, [I|Acc]);
+		<<Int:Sz>> ->
+		    Flags = force_big(F),
+		    Is = [{bs_put_integer,Fail,{integer,Sz},1,
+			   Flags,{integer,Int}}|Is0],
+		    opt_bs_1(Is, Acc)
+	    end;
+	native -> opt_bs_1(Is0, [I|Acc])
+    end;
+opt_bs_1([{Op,Fail,{integer,Sz},U,F,Src}|Is], Acc) when U > 1 ->
+    opt_bs_1([{Op,Fail,{integer,U*Sz},1,F,Src}|Is], Acc);
+opt_bs_1([I|Is], Acc) ->
+    opt_bs_1(Is, [I|Acc]);
+opt_bs_1([], Acc) -> reverse(Acc).
+
+eval_put_float(Src, Sz, Flags) ->
+    Val = value(Src),
+    case field_endian(Flags) of
+	little -> <<Val:Sz/little-float-unit:1>>;
+	big -> <<Val:Sz/big-float-unit:1>>
+        %% native intentionally not handled here - we can't optimize it.
+    end.
+
+value({integer,I}) -> I;
+value({float,F}) -> F;
+value({atom,A}) -> A.
+
+bs_collect_string(Is, [{bs_put_string,Len,{string,Str}}|Acc]) ->
+    bs_coll_str_1(Is, Len, reverse(Str), Acc);
+bs_collect_string(Is, Acc) ->
+    bs_coll_str_1(Is, 0, [], Acc).
+
+bs_coll_str_1([{bs_put_integer,_,{integer,Sz},U,_,{integer,V}}|Is],
+	      Len, StrAcc, IsAcc) when U*Sz =:= 8 ->
+    Byte = V band 16#FF,
+    bs_coll_str_1(Is, Len+1, [Byte|StrAcc], IsAcc);
+bs_coll_str_1(Is, Len, StrAcc, IsAcc) ->
+    {Is,[{bs_put_string,Len,{string,reverse(StrAcc)}}|IsAcc]}.
+
+field_endian({field_flags,F}) -> field_endian_1(F).
+
+field_endian_1([big=E|_]) -> E;
+field_endian_1([little=E|_]) -> E;
+field_endian_1([native=E|_]) -> E;
+field_endian_1([_|Fs]) -> field_endian_1(Fs).
+
+force_big({field_flags,F}) ->
+    {field_flags,force_big_1(F)}.
+
+force_big_1([big|_]=Fs) -> Fs;
+force_big_1([little|Fs]) -> [big|Fs];
+force_big_1([F|Fs]) -> [F|force_big_1(Fs)].
+
+bs_split_int(0, Sz, _, _) when Sz > 64 ->
+    %% We don't want to split in this case because the
+    %% string will consist of only zeroes.
+    no_split;
+bs_split_int(N, Sz, Fail, Acc) ->
+    FirstByteSz = case Sz rem 8 of
+		      0 -> 8;
+		      Rem -> Rem
+		  end,
+    bs_split_int_1(N, FirstByteSz, Sz, Fail, Acc).
+
+bs_split_int_1(N, ByteSz, Sz, Fail, Acc) when Sz > 0 ->
+    Mask = (1 bsl ByteSz) - 1,
+    I = {bs_put_integer,Fail,{integer,ByteSz},1,
+	 {field_flags,[big]},{integer,N band Mask}},
+    bs_split_int_1(N bsr ByteSz, 8, Sz-ByteSz, Fail, [I|Acc]);
+bs_split_int_1(_, _, _, _, Acc) -> Acc.