diff options
author | Björn Gustavsson <[email protected]> | 2012-10-10 16:00:04 +0200 |
---|---|---|
committer | Björn Gustavsson <[email protected]> | 2012-10-10 16:00:04 +0200 |
commit | b16c127d4dc2bb070d6e98e5c4cdc38220708a13 (patch) | |
tree | 60e0fe128f9fe6d36354952d70f06463683a112d /lib/compiler/src | |
parent | 55358002ddfa9f19e4142677d1ad4bd0a1760c0a (diff) | |
parent | e1aa422290b09a68bd761cbd0e70bd48442009b3 (diff) | |
download | otp-b16c127d4dc2bb070d6e98e5c4cdc38220708a13.tar.gz otp-b16c127d4dc2bb070d6e98e5c4cdc38220708a13.tar.bz2 otp-b16c127d4dc2bb070d6e98e5c4cdc38220708a13.zip |
Merge branch 'bjorn/compiler/minor-optimization-polishing/OTP-10193'
* bjorn/compiler/minor-optimization-polishing/OTP-10193: (25 commits)
beam_bsm: Handle calls slightly better
Break apart tail-recursive call instructions
Represent the 'send' instruction as a call_ext/2 instruction
Rewrite select_val and select_tuple_arity to a select instruction
Rewrite binary creation instructions to bs_init instructions
Rewrite bs_add, bs_utf*_size to BIF instructions in optimizations
Rewrite bs_put* instructions to a generic bs_put instruction
Refactor removal of unused labels
Introduce the mandatory beam_a and beam_z passes
compile: Fix bug in selection of passes
beam_receive: Optimize receives using refs created by spawn_monitor/{1,3}
compile: Give a friendler error message if a parse transform cannot be found
beam_jump: Don't move a block which can be entered via a fallthrough
beam_jump: Fix broken optimization
v3_kernel: Fix match code for matched out segment size in multiple clauses
Improve binary matching of literals
v3_codegen: Combine adjacent bs_match_string instructions
beam_bool: Recognize more safe optimizations
beam_utils: Correct usage calculations for GC BIFs in blocks
beam_utils:live_opt/1: Correct liveness calculation for 'try'
...
Diffstat (limited to 'lib/compiler/src')
-rw-r--r-- | lib/compiler/src/Makefile | 2 | ||||
-rw-r--r-- | lib/compiler/src/beam_a.erl | 97 | ||||
-rw-r--r-- | lib/compiler/src/beam_block.erl | 77 | ||||
-rw-r--r-- | lib/compiler/src/beam_bool.erl | 41 | ||||
-rw-r--r-- | lib/compiler/src/beam_bsm.erl | 91 | ||||
-rw-r--r-- | lib/compiler/src/beam_clean.erl | 45 | ||||
-rw-r--r-- | lib/compiler/src/beam_dead.erl | 16 | ||||
-rw-r--r-- | lib/compiler/src/beam_except.erl | 4 | ||||
-rw-r--r-- | lib/compiler/src/beam_flatten.erl | 53 | ||||
-rw-r--r-- | lib/compiler/src/beam_jump.erl | 125 | ||||
-rw-r--r-- | lib/compiler/src/beam_peep.erl | 8 | ||||
-rw-r--r-- | lib/compiler/src/beam_receive.erl | 54 | ||||
-rw-r--r-- | lib/compiler/src/beam_trim.erl | 68 | ||||
-rw-r--r-- | lib/compiler/src/beam_utils.erl | 266 | ||||
-rw-r--r-- | lib/compiler/src/beam_z.erl | 79 | ||||
-rw-r--r-- | lib/compiler/src/compile.erl | 27 | ||||
-rw-r--r-- | lib/compiler/src/compiler.app.src | 2 | ||||
-rw-r--r-- | lib/compiler/src/v3_codegen.erl | 100 | ||||
-rw-r--r-- | lib/compiler/src/v3_kernel.erl | 53 |
19 files changed, 615 insertions, 593 deletions
diff --git a/lib/compiler/src/Makefile b/lib/compiler/src/Makefile index 958d3501c7..cbcbf79839 100644 --- a/lib/compiler/src/Makefile +++ b/lib/compiler/src/Makefile @@ -45,6 +45,7 @@ RELSYSDIR = $(RELEASE_PATH)/lib/compiler-$(VSN) # Target Specs # ---------------------------------------------------- MODULES = \ + beam_a \ beam_asm \ beam_block \ beam_bool \ @@ -65,6 +66,7 @@ MODULES = \ beam_type \ beam_utils \ beam_validator \ + beam_z \ cerl \ cerl_clauses \ cerl_inline \ diff --git a/lib/compiler/src/beam_a.erl b/lib/compiler/src/beam_a.erl new file mode 100644 index 0000000000..1c51226314 --- /dev/null +++ b/lib/compiler/src/beam_a.erl @@ -0,0 +1,97 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2012. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% Purpose: Run directly after code generation to do any normalization +%% or preparation to simplify the optimization passes. +%% (Mandatory.) + +-module(beam_a). + +-export([module/2]). + +module({Mod,Exp,Attr,Fs0,Lc}, _Opt) -> + Fs = [function(F) || F <- Fs0], + {ok,{Mod,Exp,Attr,Fs,Lc}}. + +function({function,Name,Arity,CLabel,Is0}) -> + try + %% Rename certain operations to simplify the optimization passes. + Is1 = rename_instrs(Is0), + + %% Remove unusued labels for cleanliness and to help + %% optimization passes and HiPE. + Is = beam_jump:remove_unused_labels(Is1), + {function,Name,Arity,CLabel,Is} + catch + Class:Error -> + Stack = erlang:get_stacktrace(), + io:fwrite("Function: ~w/~w\n", [Name,Arity]), + erlang:raise(Class, Error, Stack) + end. + +rename_instrs([{apply_last,A,N}|Is]) -> + [{apply,A},{deallocate,N},return|rename_instrs(Is)]; +rename_instrs([{call_last,A,F,N}|Is]) -> + [{call,A,F},{deallocate,N},return|rename_instrs(Is)]; +rename_instrs([{call_ext_last,A,F,N}|Is]) -> + [{call_ext,A,F},{deallocate,N},return|rename_instrs(Is)]; +rename_instrs([{call_only,A,F}|Is]) -> + [{call,A,F},return|rename_instrs(Is)]; +rename_instrs([{call_ext_only,A,F}|Is]) -> + [{call_ext,A,F},return|rename_instrs(Is)]; +rename_instrs([I|Is]) -> + [rename_instr(I)|rename_instrs(Is)]; +rename_instrs([]) -> []. + +rename_instr({bs_put_binary=I,F,Sz,U,Fl,Src}) -> + {bs_put,F,{I,U,Fl},[Sz,Src]}; +rename_instr({bs_put_float=I,F,Sz,U,Fl,Src}) -> + {bs_put,F,{I,U,Fl},[Sz,Src]}; +rename_instr({bs_put_integer=I,F,Sz,U,Fl,Src}) -> + {bs_put,F,{I,U,Fl},[Sz,Src]}; +rename_instr({bs_put_utf8=I,F,Fl,Src}) -> + {bs_put,F,{I,Fl},[Src]}; +rename_instr({bs_put_utf16=I,F,Fl,Src}) -> + {bs_put,F,{I,Fl},[Src]}; +rename_instr({bs_put_utf32=I,F,Fl,Src}) -> + {bs_put,F,{I,Fl},[Src]}; +%% rename_instr({bs_put_string,_,_}=I) -> +%% {bs_put,{f,0},I,[]}; +rename_instr({bs_add=I,F,[Src1,Src2,U],Dst}) when is_integer(U) -> + {bif,I,F,[Src1,Src2,{integer,U}],Dst}; +rename_instr({bs_utf8_size=I,F,Src,Dst}) -> + {bif,I,F,[Src],Dst}; +rename_instr({bs_utf16_size=I,F,Src,Dst}) -> + {bif,I,F,[Src],Dst}; +rename_instr({bs_init2=I,F,Sz,Extra,Live,Flags,Dst}) -> + {bs_init,F,{I,Extra,Flags},Live,[Sz],Dst}; +rename_instr({bs_init_bits=I,F,Sz,Extra,Live,Flags,Dst}) -> + {bs_init,F,{I,Extra,Flags},Live,[Sz],Dst}; +rename_instr({bs_append=I,F,Sz,Extra,Live,U,Src,Flags,Dst}) -> + {bs_init,F,{I,Extra,U,Flags},Live,[Sz,Src],Dst}; +rename_instr({bs_private_append=I,F,Sz,U,Src,Flags,Dst}) -> + {bs_init,F,{I,U,Flags},none,[Sz,Src],Dst}; +rename_instr(bs_init_writable=I) -> + {bs_init,{f,0},I,1,[{x,0}],{x,0}}; +rename_instr({select_val=I,Reg,Fail,{list,List}}) -> + {select,I,Reg,Fail,List}; +rename_instr({select_tuple_arity=I,Reg,Fail,{list,List}}) -> + {select,I,Reg,Fail,List}; +rename_instr(send) -> + {call_ext,2,send}; +rename_instr(I) -> I. diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl index cd568097fa..3e0050382c 100644 --- a/lib/compiler/src/beam_block.erl +++ b/lib/compiler/src/beam_block.erl @@ -31,19 +31,16 @@ module({Mod,Exp,Attr,Fs0,Lc0}, _Opt) -> function({function,Name,Arity,CLabel,Is0}, Lc0) -> try - %% Extra labels may thwart optimizations. - Is1 = beam_jump:remove_unused_labels(Is0), - %% Collect basic blocks and optimize them. - Is2 = blockify(Is1), - Is3 = embed_lines(Is2), - Is4 = move_allocates(Is3), - Is5 = beam_utils:live_opt(Is4), - Is6 = opt_blocks(Is5), - Is7 = beam_utils:delete_live_annos(Is6), + Is1 = blockify(Is0), + Is2 = embed_lines(Is1), + Is3 = move_allocates(Is2), + Is4 = beam_utils:live_opt(Is3), + Is5 = opt_blocks(Is4), + Is6 = beam_utils:delete_live_annos(Is5), %% Optimize bit syntax. - {Is,Lc} = bsm_opt(Is7, Lc0), + {Is,Lc} = bsm_opt(Is6, Lc0), %% Done. {{function,Name,Arity,CLabel,Is},Lc} @@ -74,9 +71,9 @@ blockify([{bs_save2,R,Point}=I,{test,is_eq_exact,_,_}=Test, %% Do other peep-hole optimizations. blockify([{test,is_atom,{f,Fail},[Reg]}=I| - [{select_val,Reg,{f,Fail}, - {list,[{atom,false},{f,_}=BrFalse, - {atom,true}=AtomTrue,{f,_}=BrTrue]}}|Is]=Is0], + [{select,select_val,Reg,{f,Fail}, + [{atom,false},{f,_}=BrFalse, + {atom,true}=AtomTrue,{f,_}=BrTrue]}|Is]=Is0], [{block,Bl}|_]=Acc) -> case is_last_bool(Bl, Reg) of false -> @@ -89,9 +86,9 @@ blockify([{test,is_atom,{f,Fail},[Reg]}=I| {test,is_eq_exact,BrFalse,[Reg,AtomTrue]}|Acc]) end; blockify([{test,is_atom,{f,Fail},[Reg]}=I| - [{select_val,Reg,{f,Fail}, - {list,[{atom,true}=AtomTrue,{f,_}=BrTrue, - {atom,false},{f,_}=BrFalse]}}|Is]=Is0], + [{select,select_val,Reg,{f,Fail}, + [{atom,true}=AtomTrue,{f,_}=BrTrue, + {atom,false},{f,_}=BrFalse]}|Is]=Is0], [{block,Bl}|_]=Acc) -> case is_last_bool(Bl, Reg) of false -> @@ -423,8 +420,8 @@ inverse_comp_op(_) -> none. %%% Evaluation of constant bit fields. %%% -is_bs_put({bs_put_integer,_,_,_,_,_}) -> true; -is_bs_put({bs_put_float,_,_,_,_,_}) -> true; +is_bs_put({bs_put,_,{bs_put_integer,_,_},_}) -> true; +is_bs_put({bs_put,_,{bs_put_float,_,_},_}) -> true; is_bs_put(_) -> false. collect_bs_puts(Is) -> @@ -439,20 +436,24 @@ collect_bs_puts_1([I|Is]=Is0, Acc) -> opt_bs_puts(Is) -> opt_bs_1(Is, []). -opt_bs_1([{bs_put_float,Fail,{integer,Sz},1,Flags0,Src}=I0|Is], Acc) -> +opt_bs_1([{bs_put,Fail, + {bs_put_float,1,Flags0},[{integer,Sz},Src]}=I0|Is], Acc) -> try eval_put_float(Src, Sz, Flags0) of <<Int:Sz>> -> Flags = force_big(Flags0), - I = {bs_put_integer,Fail,{integer,Sz},1,Flags,{integer,Int}}, + I = {bs_put,Fail,{bs_put_integer,1,Flags}, + [{integer,Sz},{integer,Int}]}, opt_bs_1([I|Is], Acc) catch error:_ -> opt_bs_1(Is, [I0|Acc]) end; -opt_bs_1([{bs_put_integer,_,{integer,8},1,_,{integer,_}}|_]=IsAll, Acc0) -> +opt_bs_1([{bs_put,_,{bs_put_integer,1,_},[{integer,8},{integer,_}]}|_]=IsAll, + Acc0) -> {Is,Acc} = bs_collect_string(IsAll, Acc0), opt_bs_1(Is, Acc); -opt_bs_1([{bs_put_integer,Fail,{integer,Sz},1,F,{integer,N}}=I|Is0], Acc) when Sz > 8 -> +opt_bs_1([{bs_put,Fail,{bs_put_integer,1,F},[{integer,Sz},{integer,N}]}=I|Is0], + Acc) when Sz > 8 -> case field_endian(F) of big -> %% We can do this optimization for any field size without risk @@ -466,14 +467,14 @@ opt_bs_1([{bs_put_integer,Fail,{integer,Sz},1,F,{integer,N}}=I|Is0], Acc) when S %% an explosion in code size. <<Int:Sz>> = <<N:Sz/little>>, Flags = force_big(F), - Is = [{bs_put_integer,Fail,{integer,Sz},1, - Flags,{integer,Int}}|Is0], + Is = [{bs_put,Fail,{bs_put_integer,1,Flags}, + [{integer,Sz},{integer,Int}]}|Is0], opt_bs_1(Is, Acc); _ -> %native or too wide little field opt_bs_1(Is0, [I|Acc]) end; -opt_bs_1([{Op,Fail,{integer,Sz},U,F,Src}|Is], Acc) when U > 1 -> - opt_bs_1([{Op,Fail,{integer,U*Sz},1,F,Src}|Is], Acc); +opt_bs_1([{bs_put,Fail,{Op,U,F},[{integer,Sz},Src]}|Is], Acc) when U > 1 -> + opt_bs_1([{bs_put,Fail,{Op,1,F},[{integer,U*Sz},Src]}|Is], Acc); opt_bs_1([I|Is], Acc) -> opt_bs_1(Is, [I|Acc]); opt_bs_1([], Acc) -> reverse(Acc). @@ -489,17 +490,17 @@ eval_put_float(Src, Sz, Flags) when Sz =< 256 -> %Only evaluate if Sz is reasona value({integer,I}) -> I; value({float,F}) -> F. -bs_collect_string(Is, [{bs_put_string,Len,{string,Str}}|Acc]) -> +bs_collect_string(Is, [{bs_put,_,{bs_put_string,Len,{string,Str}},[]}|Acc]) -> bs_coll_str_1(Is, Len, reverse(Str), Acc); bs_collect_string(Is, Acc) -> bs_coll_str_1(Is, 0, [], Acc). -bs_coll_str_1([{bs_put_integer,_,{integer,Sz},U,_,{integer,V}}|Is], +bs_coll_str_1([{bs_put,_,{bs_put_integer,U,_},[{integer,Sz},{integer,V}]}|Is], Len, StrAcc, IsAcc) when U*Sz =:= 8 -> Byte = V band 16#FF, bs_coll_str_1(Is, Len+1, [Byte|StrAcc], IsAcc); bs_coll_str_1(Is, Len, StrAcc, IsAcc) -> - {Is,[{bs_put_string,Len,{string,reverse(StrAcc)}}|IsAcc]}. + {Is,[{bs_put,{f,0},{bs_put_string,Len,{string,reverse(StrAcc)}},[]}|IsAcc]}. field_endian({field_flags,F}) -> field_endian_1(F). @@ -531,15 +532,17 @@ bs_split_int(N, Sz, Fail, Acc) -> bs_split_int_1(N, FirstByteSz, Sz, Fail, Acc). bs_split_int_1(-1, _, Sz, Fail, Acc) when Sz > 64 -> - I = {bs_put_integer,Fail,{integer,Sz},1,{field_flags,[big]},{integer,-1}}, + I = {bs_put,Fail,{bs_put_integer,1,{field_flags,[big]}}, + [{integer,Sz},{integer,-1}]}, [I|Acc]; bs_split_int_1(0, _, Sz, Fail, Acc) when Sz > 64 -> - I = {bs_put_integer,Fail,{integer,Sz},1,{field_flags,[big]},{integer,0}}, + I = {bs_put,Fail,{bs_put_integer,1,{field_flags,[big]}}, + [{integer,Sz},{integer,0}]}, [I|Acc]; bs_split_int_1(N, ByteSz, Sz, Fail, Acc) when Sz > 0 -> Mask = (1 bsl ByteSz) - 1, - I = {bs_put_integer,Fail,{integer,ByteSz},1, - {field_flags,[big]},{integer,N band Mask}}, + I = {bs_put,Fail,{bs_put_integer,1,{field_flags,[big]}}, + [{integer,ByteSz},{integer,N band Mask}]}, bs_split_int_1(N bsr ByteSz, 8, Sz-ByteSz, Fail, [I|Acc]); bs_split_int_1(_, _, _, _, Acc) -> Acc. @@ -577,9 +580,9 @@ bsm_reroute([{bs_restore2,Reg,Save}=I|Is], D, _, Acc) -> bsm_reroute(Is, D, {Reg,Save}, [I|Acc]); bsm_reroute([{label,_}=I|Is], D, S, Acc) -> bsm_reroute(Is, D, S, [I|Acc]); -bsm_reroute([{select_val,Reg,F0,{list,Lbls0}}|Is], D, {_,Save}=S, Acc0) -> +bsm_reroute([{select,select_val,Reg,F0,Lbls0}|Is], D, {_,Save}=S, Acc0) -> [F|Lbls] = bsm_subst_labels([F0|Lbls0], Save, D), - Acc = [{select_val,Reg,F,{list,Lbls}}|Acc0], + Acc = [{select,select_val,Reg,F,Lbls}|Acc0], bsm_reroute(Is, D, S, Acc); bsm_reroute([{test,TestOp,F0,TestArgs}=I|Is], D, {_,Save}=S, Acc0) -> F = bsm_subst_label(F0, Save, D), @@ -615,10 +618,6 @@ bsm_opt_2([{test,bs_skip_bits2,F,[Ctx,{integer,I1},Unit1,_]}|Is], [{test,bs_skip_bits2,F,[Ctx,{integer,I2},Unit2,Flags]}|Acc]) -> bsm_opt_2(Is, [{test,bs_skip_bits2,F, [Ctx,{integer,I1*Unit1+I2*Unit2},1,Flags]}|Acc]); -bsm_opt_2([{test,bs_match_string,F,[Ctx,Bin1]}, - {test,bs_match_string,F,[Ctx,Bin2]}|Is], Acc) -> - I = {test,bs_match_string,F,[Ctx,<<Bin1/bitstring,Bin2/bitstring>>]}, - bsm_opt_2([I|Is], Acc); bsm_opt_2([I|Is], Acc) -> bsm_opt_2(Is, [I|Acc]); bsm_opt_2([], Acc) -> reverse(Acc). diff --git a/lib/compiler/src/beam_bool.erl b/lib/compiler/src/beam_bool.erl index d9ea6f5a70..81be262d6d 100644 --- a/lib/compiler/src/beam_bool.erl +++ b/lib/compiler/src/beam_bool.erl @@ -168,18 +168,18 @@ bopt_block(Reg, Fail, OldIs, [{block,Bl0}|Acc0], St0) -> end. %% ensure_opt_safe(OriginalCode, OptCode, FollowingCode, Fail, -%% ReversedPreceedingCode, State) -> ok +%% ReversedPrecedingCode, State) -> ok %% Comparing the original code to the optimized code, determine %% whether the optimized code is guaranteed to work in the same %% way as the original code. %% %% Throw an exception if the optimization is not safe. %% -ensure_opt_safe(Bl, NewCode, OldIs, Fail, PreceedingCode, St) -> +ensure_opt_safe(Bl, NewCode, OldIs, Fail, PrecedingCode, St) -> %% Here are the conditions that must be true for the %% optimization to be safe. %% - %% 1. If a register is INITIALIZED by PreceedingCode, + %% 1. If a register is INITIALIZED by PrecedingCode, %% then if that register assigned a value in the original %% code, but not in the optimized code, it must be UNUSED or KILLED %% in the code that follows. @@ -190,29 +190,50 @@ ensure_opt_safe(Bl, NewCode, OldIs, Fail, PreceedingCode, St) -> %% by the code that follows. %% %% 3. Any register that is assigned a value in the optimized - %% code must be UNUSED or KILLED in the following code - %% (because the register might be assigned the wrong value, - %% and even if the value is right it might no longer be - %% assigned on *all* paths leading to its use). + %% code must be UNUSED or KILLED in the following code, + %% unless we can be sure that it is always assigned the same + %% value. - InitInPreceeding = initialized_regs(PreceedingCode), + InitInPreceding = initialized_regs(PrecedingCode), PrevDst = dst_regs(Bl), NewDst = dst_regs(NewCode), NotSet = ordsets:subtract(PrevDst, NewDst), - MustBeKilled = ordsets:subtract(NotSet, InitInPreceeding), - MustBeUnused = ordsets:subtract(ordsets:union(NotSet, NewDst), MustBeKilled), + MustBeKilled = ordsets:subtract(NotSet, InitInPreceding), case all_killed(MustBeKilled, OldIs, Fail, St) of false -> throw(all_registers_not_killed); true -> ok end, + Same = assigned_same_value(Bl, NewCode), + MustBeUnused = ordsets:subtract(ordsets:union(NotSet, NewDst), + ordsets:union(MustBeKilled, Same)), case none_used(MustBeUnused, OldIs, Fail, St) of false -> throw(registers_used); true -> ok end, ok. +%% assigned_same_value(OldCode, NewCodeReversed) -> [DestinationRegs] +%% Return an ordset with a list of all y registers that are always +%% assigned the same value in the old and new code. Currently, we +%% are very conservative in that we only consider identical move +%% instructions in the same order. +%% +assigned_same_value(Old, New) -> + case reverse(New) of + [{block,Bl}|_] -> + assigned_same_value(Old, Bl, []); + _ -> + ordsets:new() + end. + +assigned_same_value([{set,[{y,_}=D],[S],move}|T1], + [{set,[{y,_}=D],[S],move}|T2], Acc) -> + assigned_same_value(T1, T2, [D|Acc]); +assigned_same_value(_, _, Acc) -> + ordsets:from_list(Acc). + update_fail_label([{set,_,_,move}=I|Is], Fail, Acc) -> update_fail_label(Is, Fail, [I|Acc]); update_fail_label([{set,Ds,As,{bif,N,{f,_}}}|Is], Fail, Acc) -> diff --git a/lib/compiler/src/beam_bsm.erl b/lib/compiler/src/beam_bsm.erl index 1217f7f777..37053e1cc4 100644 --- a/lib/compiler/src/beam_bsm.erl +++ b/lib/compiler/src/beam_bsm.erl @@ -204,16 +204,6 @@ btb_reaches_match_1(Is, Regs, D) -> btb_reaches_match_2([{block,Bl}|Is], Regs0, D) -> Regs = btb_reaches_match_block(Bl, Regs0), btb_reaches_match_1(Is, Regs, D); -btb_reaches_match_2([{call_only,Arity,{f,Lbl}}|_], Regs0, D) -> - Regs = btb_kill_not_live(Arity, Regs0), - btb_tail_call(Lbl, Regs, D); -btb_reaches_match_2([{call_ext_only,Arity,Func}|_], Regs0, D) -> - Regs = btb_kill_not_live(Arity, Regs0), - btb_tail_call(Func, Regs, D); -btb_reaches_match_2([{call_last,Arity,{f,Lbl},_}|_], Regs0, D) -> - Regs1 = btb_kill_not_live(Arity, Regs0), - Regs = btb_kill_yregs(Regs1), - btb_tail_call(Lbl, Regs, D); btb_reaches_match_2([{call,Arity,{f,Lbl}}|Is], Regs, D) -> btb_call(Arity, Lbl, Regs, Is, D); btb_reaches_match_2([{apply,Arity}|Is], Regs, D) -> @@ -222,19 +212,16 @@ btb_reaches_match_2([{call_fun,Live}=I|Is], Regs, D) -> btb_call(Live, I, Regs, Is, D); btb_reaches_match_2([{make_fun2,_,_,_,Live}|Is], Regs, D) -> btb_call(Live, make_fun2, Regs, Is, D); -btb_reaches_match_2([{call_ext,Arity,{extfunc,Mod,Name,Arity}=Func}|Is], Regs0, D) -> +btb_reaches_match_2([{call_ext,Arity,Func}=I|Is], Regs0, D) -> %% Allow us scanning beyond the call in case the match %% context is saved on the stack. - case erl_bifs:is_exit_bif(Mod, Name, Arity) of + case beam_jump:is_exit_instruction(I) of false -> btb_call(Arity, Func, Regs0, Is, D); true -> Regs = btb_kill_not_live(Arity, Regs0), btb_tail_call(Func, Regs, D) end; -btb_reaches_match_2([{call_ext_last,Arity,_,_}=I|_], Regs, D) -> - btb_ensure_not_used(btb_regs_from_arity(Arity), I, Regs), - D; btb_reaches_match_2([{kill,Y}|Is], Regs, D) -> btb_reaches_match_1(Is, btb_kill([Y], Regs), D); btb_reaches_match_2([{deallocate,_}|Is], Regs0, D) -> @@ -278,12 +265,7 @@ btb_reaches_match_2([{test,_,{f,F},_,Ss,_}=I|Is], Regs, D0) -> btb_ensure_not_used(Ss, I, Regs), D = btb_follow_branch(F, Regs, D0), btb_reaches_match_1(Is, Regs, D); -btb_reaches_match_2([{select_val,Src,{f,F},{list,Conds}}=I|Is], Regs, D0) -> - btb_ensure_not_used([Src], I, Regs), - D1 = btb_follow_branch(F, Regs, D0), - D = btb_follow_branches(Conds, Regs, D1), - btb_reaches_match_1(Is, Regs, D); -btb_reaches_match_2([{select_tuple_arity,Src,{f,F},{list,Conds}}=I|Is], Regs, D0) -> +btb_reaches_match_2([{select,_,Src,{f,F},Conds}=I|Is], Regs, D0) -> btb_ensure_not_used([Src], I, Regs), D1 = btb_follow_branch(F, Regs, D0), D = btb_follow_branches(Conds, Regs, D1), @@ -293,46 +275,11 @@ btb_reaches_match_2([{jump,{f,Lbl}}|_], Regs, #btb{index=Li}=D) -> btb_reaches_match_2(Is, Regs, D); btb_reaches_match_2([{label,_}|Is], Regs, D) -> btb_reaches_match_2(Is, Regs, D); -btb_reaches_match_2([{bs_add,{f,0},_,Dst}|Is], Regs, D) -> - btb_reaches_match_1(Is, btb_kill([Dst], Regs), D); -btb_reaches_match_2([bs_init_writable|Is], Regs0, D) -> - Regs = btb_kill_not_live(0, Regs0), - btb_reaches_match_1(Is, Regs, D); -btb_reaches_match_2([{bs_init2,{f,0},_,_,_,_,Dst}|Is], Regs, D) -> - btb_reaches_match_1(Is, btb_kill([Dst], Regs), D); -btb_reaches_match_2([{bs_init_bits,{f,0},_,_,_,_,Dst}|Is], Regs, D) -> - btb_reaches_match_1(Is, btb_kill([Dst], Regs), D); -btb_reaches_match_2([{bs_append,{f,0},_,_,_,_,Src,_,Dst}=I|Is], Regs, D) -> - btb_ensure_not_used([Src], I, Regs), - btb_reaches_match_1(Is, btb_kill([Dst], Regs), D); -btb_reaches_match_2([{bs_private_append,{f,0},_,_,Src,_,Dst}=I|Is], Regs, D) -> - btb_ensure_not_used([Src], I, Regs), - btb_reaches_match_1(Is, btb_kill([Dst], Regs), D); -btb_reaches_match_2([{bs_put_integer,{f,0},_,_,_,Src}=I|Is], Regs, D) -> - btb_ensure_not_used([Src], I, Regs), - btb_reaches_match_1(Is, Regs, D); -btb_reaches_match_2([{bs_put_float,{f,0},_,_,_,Src}=I|Is], Regs, D) -> - btb_ensure_not_used([Src], I, Regs), - btb_reaches_match_1(Is, Regs, D); -btb_reaches_match_2([{bs_put_binary,{f,0},_,_,_,Src}=I|Is], Regs, D) -> - btb_ensure_not_used([Src], I, Regs), - btb_reaches_match_1(Is, Regs, D); -btb_reaches_match_2([{bs_put_string,_,_}|Is], Regs, D) -> - btb_reaches_match_1(Is, Regs, D); -btb_reaches_match_2([{bs_utf8_size,_,Src,Dst}=I|Is], Regs, D) -> - btb_ensure_not_used([Src], I, Regs), - btb_reaches_match_1(Is, btb_kill([Dst], Regs), D); -btb_reaches_match_2([{bs_utf16_size,_,Src,Dst}=I|Is], Regs, D) -> - btb_ensure_not_used([Src], I, Regs), +btb_reaches_match_2([{bs_init,{f,0},_,_,Ss,Dst}=I|Is], Regs, D) -> + btb_ensure_not_used(Ss, I, Regs), btb_reaches_match_1(Is, btb_kill([Dst], Regs), D); -btb_reaches_match_2([{bs_put_utf8,_,_,Src}=I|Is], Regs, D) -> - btb_ensure_not_used([Src], I, Regs), - btb_reaches_match_1(Is, Regs, D); -btb_reaches_match_2([{bs_put_utf16,_,_,Src}=I|Is], Regs, D) -> - btb_ensure_not_used([Src], I, Regs), - btb_reaches_match_1(Is, Regs, D); -btb_reaches_match_2([{bs_put_utf32,_,_,Src}=I|Is], Regs, D) -> - btb_ensure_not_used([Src], I, Regs), +btb_reaches_match_2([{bs_put,{f,0},_,Ss}=I|Is], Regs, D) -> + btb_ensure_not_used(Ss, I, Regs), btb_reaches_match_1(Is, Regs, D); btb_reaches_match_2([{bs_restore2,Src,_}=I|Is], Regs0, D) -> case btb_contains_context(Src, Regs0) of @@ -389,13 +336,16 @@ btb_call(Arity, Lbl, Regs0, Is, D0) -> %% First handle the call as if it were a tail call. D = btb_tail_call(Lbl, Regs, D0), - %% No problem so far, but now we must make sure that - %% we don't have any copies of the match context - %% tucked away in an y register. + %% No problem so far (the called function can handle a + %% match context). Now we must make sure that the rest + %% of this function following the call does not attempt + %% to use the match context in case there is a copy + %% tucked away in a y register. RegList = btb_context_regs(Regs), - case [R || {y,_}=R <- RegList] of - [] -> D; - [_|_] -> btb_error({multiple_uses,RegList}) + YRegs = [R || {y,_}=R <- RegList], + case btb_are_all_killed(YRegs, Is, D) of + true -> D; + false -> btb_error({multiple_uses,RegList}) end; true -> %% No match context in any x register. It could have been @@ -475,15 +425,6 @@ btb_reaches_match_block([{set,Ds,Ss,_}=I|Is], Regs0) -> btb_reaches_match_block([], Regs) -> Regs. -%% btb_regs_from_arity(Arity) -> [Register]) -%% Create a list of x registers from a function arity. - -btb_regs_from_arity(Arity) -> - btb_regs_from_arity_1(Arity, []). - -btb_regs_from_arity_1(0, Acc) -> Acc; -btb_regs_from_arity_1(N, Acc) -> btb_regs_from_arity_1(N-1, [{x,N-1}|Acc]). - %% btb_are_all_killed([Register], [Instruction], D) -> true|false %% Test whether all of the register are killed in the instruction stream. diff --git a/lib/compiler/src/beam_clean.erl b/lib/compiler/src/beam_clean.erl index a7994ab3b3..26ba93b91c 100644 --- a/lib/compiler/src/beam_clean.erl +++ b/lib/compiler/src/beam_clean.erl @@ -74,10 +74,6 @@ find_all_used([], _All, Used) -> Used. update_work_list([{call,_,{f,L}}|Is], Sets) -> update_work_list(Is, add_to_work_list(L, Sets)); -update_work_list([{call_last,_,{f,L},_}|Is], Sets) -> - update_work_list(Is, add_to_work_list(L, Sets)); -update_work_list([{call_only,_,{f,L}}|Is], Sets) -> - update_work_list(Is, add_to_work_list(L, Sets)); update_work_list([{make_fun2,{f,L},_,_,_}|Is], Sets) -> update_work_list(Is, add_to_work_list(L, Sets)); update_work_list([_|Is], Sets) -> @@ -200,7 +196,7 @@ replace([{test,Test,{f,Lbl},Ops}|Is], Acc, D) -> replace(Is, [{test,Test,{f,label(Lbl, D)},Ops}|Acc], D); replace([{test,Test,{f,Lbl},Live,Ops,Dst}|Is], Acc, D) -> replace(Is, [{test,Test,{f,label(Lbl, D)},Live,Ops,Dst}|Acc], D); -replace([{select_val,R,{f,Fail0},{list,Vls0}}|Is], Acc, D) -> +replace([{select,I,R,{f,Fail0},Vls0}|Is], Acc, D) -> Vls1 = map(fun ({f,L}) -> {f,label(L, D)}; (Other) -> Other end, Vls0), Fail = label(Fail0, D), @@ -210,12 +206,8 @@ replace([{select_val,R,{f,Fail0},{list,Vls0}}|Is], Acc, D) -> %% Convert to a plain jump. replace(Is, [{jump,{f,Fail}}|Acc], D); Vls -> - replace(Is, [{select_val,R,{f,Fail},{list,Vls}}|Acc], D) + replace(Is, [{select,I,R,{f,Fail},Vls}|Acc], D) end; -replace([{select_tuple_arity,R,{f,Fail},{list,Vls0}}|Is], Acc, D) -> - Vls = map(fun ({f,L}) -> {f,label(L, D)}; - (Other) -> Other end, Vls0), - replace(Is, [{select_tuple_arity,R,{f,label(Fail, D)},{list,Vls}}|Acc], D); replace([{'try',R,{f,Lbl}}|Is], Acc, D) -> replace(Is, [{'try',R,{f,label(Lbl, D)}}|Acc], D); replace([{'catch',R,{f,Lbl}}|Is], Acc, D) -> @@ -236,37 +228,12 @@ replace([{gc_bif,Name,{f,Lbl},Live,As,R}|Is], Acc, D) when Lbl =/= 0 -> replace(Is, [{gc_bif,Name,{f,label(Lbl, D)},Live,As,R}|Acc], D); replace([{call,Ar,{f,Lbl}}|Is], Acc, D) -> replace(Is, [{call,Ar,{f,label(Lbl,D)}}|Acc], D); -replace([{call_last,Ar,{f,Lbl},N}|Is], Acc, D) -> - replace(Is, [{call_last,Ar,{f,label(Lbl,D)},N}|Acc], D); -replace([{call_only,Ar,{f,Lbl}}|Is], Acc, D) -> - replace(Is, [{call_only,Ar,{f,label(Lbl, D)}}|Acc], D); replace([{make_fun2,{f,Lbl},U1,U2,U3}|Is], Acc, D) -> replace(Is, [{make_fun2,{f,label(Lbl, D)},U1,U2,U3}|Acc], D); -replace([{bs_init2,{f,Lbl},Sz,Words,R,F,Dst}|Is], Acc, D) when Lbl =/= 0 -> - replace(Is, [{bs_init2,{f,label(Lbl, D)},Sz,Words,R,F,Dst}|Acc], D); -replace([{bs_init_bits,{f,Lbl},Sz,Words,R,F,Dst}|Is], Acc, D) when Lbl =/= 0 -> - replace(Is, [{bs_init_bits,{f,label(Lbl, D)},Sz,Words,R,F,Dst}|Acc], D); -replace([{bs_put_integer,{f,Lbl},Bits,Unit,Fl,Val}|Is], Acc, D) when Lbl =/= 0 -> - replace(Is, [{bs_put_integer,{f,label(Lbl, D)},Bits,Unit,Fl,Val}|Acc], D); -replace([{bs_put_utf8=I,{f,Lbl},Fl,Val}|Is], Acc, D) when Lbl =/= 0 -> - replace(Is, [{I,{f,label(Lbl, D)},Fl,Val}|Acc], D); -replace([{bs_put_utf16=I,{f,Lbl},Fl,Val}|Is], Acc, D) when Lbl =/= 0 -> - replace(Is, [{I,{f,label(Lbl, D)},Fl,Val}|Acc], D); -replace([{bs_put_utf32=I,{f,Lbl},Fl,Val}|Is], Acc, D) when Lbl =/= 0 -> - replace(Is, [{I,{f,label(Lbl, D)},Fl,Val}|Acc], D); -replace([{bs_put_binary,{f,Lbl},Bits,Unit,Fl,Val}|Is], Acc, D) when Lbl =/= 0 -> - replace(Is, [{bs_put_binary,{f,label(Lbl, D)},Bits,Unit,Fl,Val}|Acc], D); -replace([{bs_put_float,{f,Lbl},Bits,Unit,Fl,Val}|Is], Acc, D) when Lbl =/= 0 -> - replace(Is, [{bs_put_float,{f,label(Lbl, D)},Bits,Unit,Fl,Val}|Acc], D); -replace([{bs_add,{f,Lbl},Src,Dst}|Is], Acc, D) when Lbl =/= 0 -> - replace(Is, [{bs_add,{f,label(Lbl, D)},Src,Dst}|Acc], D); -replace([{bs_append,{f,Lbl},_,_,_,_,_,_,_}=I0|Is], Acc, D) when Lbl =/= 0 -> - I = setelement(2, I0, {f,label(Lbl, D)}), - replace(Is, [I|Acc], D); -replace([{bs_utf8_size=I,{f,Lbl},Src,Dst}|Is], Acc, D) when Lbl =/= 0 -> - replace(Is, [{I,{f,label(Lbl, D)},Src,Dst}|Acc], D); -replace([{bs_utf16_size=I,{f,Lbl},Src,Dst}|Is], Acc, D) when Lbl =/= 0 -> - replace(Is, [{I,{f,label(Lbl, D)},Src,Dst}|Acc], D); +replace([{bs_init,{f,Lbl},Info,Live,Ss,Dst}|Is], Acc, D) when Lbl =/= 0 -> + replace(Is, [{bs_init,{f,label(Lbl, D)},Info,Live,Ss,Dst}|Acc], D); +replace([{bs_put,{f,Lbl},Info,Ss}|Is], Acc, D) when Lbl =/= 0 -> + replace(Is, [{bs_put,{f,label(Lbl, D)},Info,Ss}|Acc], D); replace([I|Is], Acc, D) -> replace(Is, [I|Acc], D); replace([], Acc, _) -> Acc. diff --git a/lib/compiler/src/beam_dead.erl b/lib/compiler/src/beam_dead.erl index 5f12a98f09..92d8e5acb3 100644 --- a/lib/compiler/src/beam_dead.erl +++ b/lib/compiler/src/beam_dead.erl @@ -182,7 +182,7 @@ forward(Is, Lc) -> forward([{block,[]}|Is], D, Lc, Acc) -> %% Empty blocks can prevent optimizations. forward(Is, D, Lc, Acc); -forward([{select_val,Reg,_,{list,List}}=I|Is], D0, Lc, Acc) -> +forward([{select,select_val,Reg,_,List}=I|Is], D0, Lc, Acc) -> D = update_value_dict(List, Reg, D0), forward(Is, D, Lc, [I|Acc]); forward([{label,Lbl}=LblI,{block,[{set,[Dst],[Lit],move}|BlkIs]}=Blk|Is], D, Lc, Acc) -> @@ -271,11 +271,11 @@ backward([{test,is_eq_exact,Fail,[Dst,{integer,Arity}]}=I| end; backward([{label,Lbl}=L|Is], D, Acc) -> backward(Is, beam_utils:index_label(Lbl, Acc, D), [L|Acc]); -backward([{select_val,Reg,{f,Fail0},{list,List0}}|Is], D, Acc) -> +backward([{select,select_val,Reg,{f,Fail0},List0}|Is], D, Acc) -> List = shortcut_select_list(List0, Reg, D, []), Fail1 = shortcut_label(Fail0, D), Fail = shortcut_bs_test(Fail1, Is, D), - Sel = {select_val,Reg,{f,Fail},{list,List}}, + Sel = {select,select_val,Reg,{f,Fail},List}, backward(Is, D, [Sel|Acc]); backward([{jump,{f,To0}},{move,Src,Reg}=Move0|Is], D, Acc) -> {To,Move} = case Src of @@ -382,7 +382,7 @@ shortcut_select_label(To0, Reg, Val, D) -> case beam_utils:code_at(To0, D) of [{jump,{f,To}}|_] -> shortcut_select_label(To, Reg, Val, D); - [{test,is_atom,_,[Reg]},{select_val,Reg,{f,Fail},{list,Map}}|_] -> + [{test,is_atom,_,[Reg]},{select,select_val,Reg,{f,Fail},Map}|_] -> To = find_select_val(Map, Val, Fail), shortcut_select_label(To, Reg, Val, D); [{test,is_eq_exact,{f,_},[Reg,{atom,Val}]},{label,To}|_] when is_atom(Val) -> @@ -472,10 +472,10 @@ combine_eqs(To, [Reg,{Type,_}=Lit1]=Ops, D, [{label,L1}|_]) case beam_utils:code_at(To, D) of [{test,is_eq_exact,{f,F2},[Reg,{Type,_}=Lit2]}, {label,L2}|_] when Lit1 =/= Lit2 -> - {select_val,Reg,{f,F2},{list,[Lit1,{f,L1},Lit2,{f,L2}]}}; - [{select_val,Reg,{f,F2},{list,[{Type,_}|_]=List0}}|_] -> + {select,select_val,Reg,{f,F2},[Lit1,{f,L1},Lit2,{f,L2}]}; + [{select,select_val,Reg,{f,F2},[{Type,_}|_]=List0}|_] -> List = remove_from_list(Lit1, List0), - {select_val,Reg,{f,F2},{list,[Lit1,{f,L1}|List]}}; + {select,select_val,Reg,{f,F2},[Lit1,{f,L1}|List]}; _Is -> {test,is_eq_exact,{f,To},Ops} end; @@ -527,6 +527,8 @@ count_bits_matched([{test,_,_,_,[_,Sz,U,{field_flags,_}],_}|Is], SavePoint, Bits {integer,N} -> count_bits_matched(Is, SavePoint, Bits+N*U); _ -> count_bits_matched(Is, SavePoint, Bits) end; +count_bits_matched([{test,bs_match_string,_,[_,Bits,_]}|Is], SavePoint, Bits0) -> + count_bits_matched(Is, SavePoint, Bits0+Bits); count_bits_matched([{test,_,_,_}|Is], SavePoint, Bits) -> count_bits_matched(Is, SavePoint, Bits); count_bits_matched([{bs_save2,Reg,SavePoint}|_], {Reg,SavePoint}, Bits) -> diff --git a/lib/compiler/src/beam_except.erl b/lib/compiler/src/beam_except.erl index fb1a43cd9e..14e9943f88 100644 --- a/lib/compiler/src/beam_except.erl +++ b/lib/compiler/src/beam_except.erl @@ -65,10 +65,6 @@ function_1(Is0) -> translate([{call_ext,Ar,{extfunc,erlang,error,Ar}}=I|Is], St, Acc) -> translate_1(Ar, I, Is, St, Acc); -translate([{call_ext_only,Ar,{extfunc,erlang,error,Ar}}=I|Is], St, Acc) -> - translate_1(Ar, I, Is, St, Acc); -translate([{call_ext_last,Ar,{extfunc,erlang,error,Ar},_}=I|Is], St, Acc) -> - translate_1(Ar, I, Is, St, Acc); translate([I|Is], St, Acc) -> translate(Is, St, [I|Acc]); translate([], _, Acc) -> diff --git a/lib/compiler/src/beam_flatten.erl b/lib/compiler/src/beam_flatten.erl index 6c7cb849aa..04232d8fd2 100644 --- a/lib/compiler/src/beam_flatten.erl +++ b/lib/compiler/src/beam_flatten.erl @@ -79,49 +79,28 @@ norm_allocate({nozero,Ns,Nh,Inits}, Regs) -> %% insert_alloc_in_bs_init(ReverseInstructionStream, AllocationInfo) -> %% impossible | ReverseInstructionStream' -%% A bs_init2/6 instruction should not be followed by a test heap instruction. +%% A bs_init/6 instruction should not be followed by a test heap instruction. %% Given the AllocationInfo from a test heap instruction, merge the -%% allocation amounts into the previous bs_init2/6 instruction (if any). +%% allocation amounts into the previous bs_init/6 instruction (if any). %% -insert_alloc_in_bs_init([I|_]=Is, Alloc) -> - case is_bs_constructor(I) of - false -> impossible; - true -> insert_alloc_1(Is, Alloc, []) - end. - -insert_alloc_1([{bs_init2=Op,Fail,Bs,Ws1,Regs,F,Dst}|Is], {_,nostack,Ws2,[]}, Acc) -> - Al = beam_utils:combine_heap_needs(Ws1, Ws2), - I = {Op,Fail,Bs,Al,Regs,F,Dst}, - reverse(Acc, [I|Is]); -insert_alloc_1([{bs_init_bits=Op,Fail,Bs,Ws1,Regs,F,Dst}|Is], {_,nostack,Ws2,[]}, Acc) -> - Al = beam_utils:combine_heap_needs(Ws1, Ws2), - I = {Op,Fail,Bs,Al,Regs,F,Dst}, - reverse(Acc, [I|Is]); -insert_alloc_1([{bs_append,Fail,Sz,Ws1,Regs,U,Bin,Fl,Dst}|Is], - {_,nostack,Ws2,[]}, Acc) -> +insert_alloc_in_bs_init([{bs_put,_,_,_}=I|Is], Alloc) -> + %% The instruction sequence ends with an bs_put/4 instruction. + %% We'll need to search backwards for the bs_init/6 instruction. + insert_alloc_1(Is, Alloc, [I]); +insert_alloc_in_bs_init(_, _) -> impossible. + +insert_alloc_1([{bs_init=Op,Fail,Info0,Live,Ss,Dst}|Is], + {_,nostack,Ws2,[]}, Acc) when is_integer(Live) -> + %% The number of extra heap words is always in the second position + %% in the Info tuple. + Ws1 = element(2, Info0), Al = beam_utils:combine_heap_needs(Ws1, Ws2), - I = {bs_append,Fail,Sz,Al,Regs,U,Bin,Fl,Dst}, + Info = setelement(2, Info0, Al), + I = {Op,Fail,Info,Live,Ss,Dst}, reverse(Acc, [I|Is]); -insert_alloc_1([I|Is], Alloc, Acc) -> +insert_alloc_1([{bs_put,_,_,_}=I|Is], Alloc, Acc) -> insert_alloc_1(Is, Alloc, [I|Acc]). - -%% is_bs_constructor(Instruction) -> true|false. -%% Test whether the instruction is a bit syntax construction -%% instruction that can occur at the end of a bit syntax -%% construction. (Since an empty binary would be expressed -%% as a literal, the bs_init2/6 instruction will not occur -%% at the end and therefore it is no need to test for it here.) -%% -is_bs_constructor({bs_put_integer,_,_,_,_,_}) -> true; -is_bs_constructor({bs_put_utf8,_,_,_}) -> true; -is_bs_constructor({bs_put_utf16,_,_,_}) -> true; -is_bs_constructor({bs_put_utf32,_,_,_}) -> true; -is_bs_constructor({bs_put_float,_,_,_,_,_}) -> true; -is_bs_constructor({bs_put_binary,_,_,_,_,_}) -> true; -is_bs_constructor({bs_put_string,_,_}) -> true; -is_bs_constructor(_) -> false. - %% opt(Is0) -> Is %% Simple peep-hole optimization to move a {move,Any,{x,0}} past %% any kill up to the next call instruction. (To give the loader diff --git a/lib/compiler/src/beam_jump.erl b/lib/compiler/src/beam_jump.erl index db67d24514..b05d01b2a1 100644 --- a/lib/compiler/src/beam_jump.erl +++ b/lib/compiler/src/beam_jump.erl @@ -20,7 +20,7 @@ -module(beam_jump). --export([module/2,module_labels/1, +-export([module/2, is_unreachable_after/1,is_exit_instruction/1, remove_unused_labels/1,is_label_used_in/2]). @@ -46,10 +46,13 @@ %%% such as a jump that never transfers control to the instruction %%% following it. %%% -%%% (2) case_end, if_end, and badmatch, and function calls that cause an -%%% exit (such as calls to exit/1) are moved to the end of the function. -%%% The purpose is to allow further optimizations at the place from -%%% which the code was moved. +%%% (2) Short sequences starting with a label and ending in case_end, if_end, +%%% and badmatch, and function calls that cause an exit (such as calls +%%% to exit/1) are moved to the end of the function, but only if the +%%% the block is not entered via a fallthrough. The purpose of this move +%%% is to allow further optimizations at the place from which the +%%% code was moved (a jump around the block could be replaced with a +%%% fallthrough). %%% %%% (3) Any unreachable code is removed. Unreachable code is code %%% after jump, call_last and other instructions which never @@ -130,13 +133,6 @@ module({Mod,Exp,Attr,Fs0,Lc}, _Opt) -> Fs = [function(F) || F <- Fs0], {ok,{Mod,Exp,Attr,Fs,Lc}}. -module_labels({Mod,Exp,Attr,Fs,Lc}) -> - {Mod,Exp,Attr,[function_labels(F) || F <- Fs],Lc}. - -function_labels({function,Name,Arity,CLabel,Asm0}) -> - Asm = remove_unused_labels(Asm0), - {function,Name,Arity,CLabel,Asm}. - %% function(Function) -> Function' %% Optimize jumps and branches. %% @@ -232,6 +228,9 @@ extract_seq_1([{line,_}=Line|Is], Acc) -> extract_seq_1(Is, [Line|Acc]); extract_seq_1([{label,_},{func_info,_,_,_}|_], _) -> no; +extract_seq_1([{label,Lbl},{jump,{f,Lbl}}|_], _) -> + %% Don't move a sequence which have a fallthrough entering it. + no; extract_seq_1([{label,_}=Lbl|Is], Acc) -> {yes,[Lbl|Acc],Is}; extract_seq_1(_, _) -> no. @@ -260,43 +259,39 @@ find_fixpoint(OptFun, Is0) -> Is -> find_fixpoint(OptFun, Is) end. -opt([{test,Test0,{f,Lnum}=Lbl,Ops}=I|Is0], Acc, St) -> - case Is0 of - [{jump,{f,Lnum}}|Is] -> - %% We have - %% Test Label Ops - %% jump Label - %% The test instruction is definitely not needed. - %% The jump instruction is not needed if there is - %% a definition of Label following the jump instruction. - case is_label_defined(Is, Lnum) of - false -> - %% The jump instruction is still needed. - opt(Is0, [I|Acc], label_used(Lbl, St)); - true -> - %% Neither the test nor the jump are needed. - opt(Is, Acc, St) - end; - [{jump,To}|Is] -> - case is_label_defined(Is, Lnum) of - false -> +opt([{test,_,{f,L}=Lbl,_}=I|[{jump,{f,L}}|_]=Is], Acc, St) -> + %% We have + %% Test Label Ops + %% jump Label + %% The test instruction is not needed if the test is pure + %% (it modifies neither registers nor bit syntax state). + case beam_utils:is_pure_test(I) of + false -> + %% Test is not pure; we must keep it. + opt(Is, [I|Acc], label_used(Lbl, St)); + true -> + %% The test is pure and its failure label is the same + %% as in the jump that follows -- thus it is not needed. + opt(Is, Acc, St) + end; +opt([{test,Test0,{f,L}=Lbl,Ops}=I|[{jump,To}|Is]=Is0], Acc, St) -> + case is_label_defined(Is, L) of + false -> + opt(Is0, [I|Acc], label_used(Lbl, St)); + true -> + case invert_test(Test0) of + not_possible -> opt(Is0, [I|Acc], label_used(Lbl, St)); - true -> - case invert_test(Test0) of - not_possible -> - opt(Is0, [I|Acc], label_used(Lbl, St)); - Test -> - opt([{test,Test,To,Ops}|Is], Acc, St) - end - end; - _Other -> - opt(Is0, [I|Acc], label_used(Lbl, St)) + Test -> + %% Invert the test and remove the jump. + opt([{test,Test,To,Ops}|Is], Acc, St) + end end; +opt([{test,_,{f,_}=Lbl,_}=I|Is], Acc, St) -> + opt(Is, [I|Acc], label_used(Lbl, St)); opt([{test,_,{f,_}=Lbl,_,_,_}=I|Is], Acc, St) -> opt(Is, [I|Acc], label_used(Lbl, St)); -opt([{select_val,_R,Fail,{list,Vls}}=I|Is], Acc, St) -> - skip_unreachable(Is, [I|Acc], label_used([Fail|Vls], St)); -opt([{select_tuple_arity,_R,Fail,{list,Vls}}=I|Is], Acc, St) -> +opt([{select,_,_R,Fail,Vls}=I|Is], Acc, St) -> skip_unreachable(Is, [I|Acc], label_used([Fail|Vls], St)); opt([{label,L}=I|Is], Acc, #st{entry=L}=St) -> %% NEVER move the entry label. @@ -412,14 +407,8 @@ is_label_used(L, St) -> is_unreachable_after({func_info,_M,_F,_A}) -> true; is_unreachable_after(return) -> true; -is_unreachable_after({call_ext_last,_Ar,_ExtFunc,_D}) -> true; -is_unreachable_after({call_ext_only,_Ar,_ExtFunc}) -> true; -is_unreachable_after({call_last,_Ar,_Lbl,_D}) -> true; -is_unreachable_after({call_only,_Ar,_Lbl}) -> true; -is_unreachable_after({apply_last,_Ar,_N}) -> true; is_unreachable_after({jump,_Lbl}) -> true; -is_unreachable_after({select_val,_R,_Lbl,_Cases}) -> true; -is_unreachable_after({select_tuple_arity,_R,_Lbl,_Cases}) -> true; +is_unreachable_after({select,_What,_R,_Lbl,_Cases}) -> true; is_unreachable_after({loop_rec_end,_}) -> true; is_unreachable_after({wait,_}) -> true; is_unreachable_after(I) -> is_exit_instruction(I). @@ -430,10 +419,6 @@ is_unreachable_after(I) -> is_exit_instruction(I). is_exit_instruction({call_ext,_,{extfunc,M,F,A}}) -> erl_bifs:is_exit_bif(M, F, A); -is_exit_instruction({call_ext_last,_,{extfunc,M,F,A},_}) -> - erl_bifs:is_exit_bif(M, F, A); -is_exit_instruction({call_ext_only,_,{extfunc,M,F,A}}) -> - erl_bifs:is_exit_bif(M, F, A); is_exit_instruction(if_end) -> true; is_exit_instruction({case_end,_}) -> true; is_exit_instruction({try_case_end,_}) -> true; @@ -516,9 +501,7 @@ ulbl({test,_,Fail,_}, Used) -> mark_used(Fail, Used); ulbl({test,_,Fail,_,_,_}, Used) -> mark_used(Fail, Used); -ulbl({select_val,_,Fail,{list,Vls}}, Used) -> - mark_used_list(Vls, mark_used(Fail, Used)); -ulbl({select_tuple_arity,_,Fail,{list,Vls}}, Used) -> +ulbl({select,_,_,Fail,Vls}, Used) -> mark_used_list(Vls, mark_used(Fail, Used)); ulbl({'try',_,Lbl}, Used) -> mark_used(Lbl, Used); @@ -538,29 +521,9 @@ ulbl({bif,_Name,Lbl,_As,_R}, Used) -> mark_used(Lbl, Used); ulbl({gc_bif,_Name,Lbl,_Live,_As,_R}, Used) -> mark_used(Lbl, Used); -ulbl({bs_init2,Lbl,_,_,_,_,_}, Used) -> - mark_used(Lbl, Used); -ulbl({bs_init_bits,Lbl,_,_,_,_,_}, Used) -> - mark_used(Lbl, Used); -ulbl({bs_put_integer,Lbl,_Bits,_Unit,_Fl,_Val}, Used) -> - mark_used(Lbl, Used); -ulbl({bs_put_float,Lbl,_Bits,_Unit,_Fl,_Val}, Used) -> - mark_used(Lbl, Used); -ulbl({bs_put_binary,Lbl,_Bits,_Unit,_Fl,_Val}, Used) -> - mark_used(Lbl, Used); -ulbl({bs_put_utf8,Lbl,_Fl,_Val}, Used) -> - mark_used(Lbl, Used); -ulbl({bs_put_utf16,Lbl,_Fl,_Val}, Used) -> - mark_used(Lbl, Used); -ulbl({bs_put_utf32,Lbl,_Fl,_Val}, Used) -> - mark_used(Lbl, Used); -ulbl({bs_add,Lbl,_,_}, Used) -> - mark_used(Lbl, Used); -ulbl({bs_append,Lbl,_,_,_,_,_,_,_}, Used) -> - mark_used(Lbl, Used); -ulbl({bs_utf8_size,Lbl,_,_}, Used) -> +ulbl({bs_init,Lbl,_,_,_,_}, Used) -> mark_used(Lbl, Used); -ulbl({bs_utf16_size,Lbl,_,_}, Used) -> +ulbl({bs_put,Lbl,_,_}, Used) -> mark_used(Lbl, Used); ulbl(_, Used) -> Used. diff --git a/lib/compiler/src/beam_peep.erl b/lib/compiler/src/beam_peep.erl index f39fc50b95..a199aa50ed 100644 --- a/lib/compiler/src/beam_peep.erl +++ b/lib/compiler/src/beam_peep.erl @@ -120,13 +120,13 @@ peep([{test,Op,_,Ops}=I|Is], SeenTests0, Acc) -> peep(Is, SeenTests, [I|Acc]) end end; -peep([{select_val,Src,Fail, - {list,[{atom,false},{f,L},{atom,true},{f,L}]}}| +peep([{select,select_val,Src,Fail, + [{atom,false},{f,L},{atom,true},{f,L}]}| [{label,L}|_]=Is], SeenTests, Acc) -> I = {test,is_boolean,Fail,[Src]}, peep([I|Is], SeenTests, Acc); -peep([{select_val,Src,Fail, - {list,[{atom,true},{f,L},{atom,false},{f,L}]}}| +peep([{select,select_val,Src,Fail, + [{atom,true},{f,L},{atom,false},{f,L}]}| [{label,L}|_]=Is], SeenTests, Acc) -> I = {test,is_boolean,Fail,[Src]}, peep([I|Is], SeenTests, Acc); diff --git a/lib/compiler/src/beam_receive.erl b/lib/compiler/src/beam_receive.erl index bd1f44f66b..fe95a7e35b 100644 --- a/lib/compiler/src/beam_receive.erl +++ b/lib/compiler/src/beam_receive.erl @@ -84,13 +84,29 @@ function({function,Name,Arity,Entry,Is}) -> erlang:raise(Class, Error, Stack) end. +opt([{call_ext,A,{extfunc,erlang,spawn_monitor,A}}=I0|Is0], D, Acc) + when A =:= 1; A =:= 3 -> + case ref_in_tuple(Is0) of + no -> + opt(Is0, D, [I0|Acc]); + {yes,Regs,Is1,MatchReversed} -> + %% The call creates a brand new reference. Now + %% search for a receive statement in the same + %% function that will match against the reference. + case opt_recv(Is1, Regs, D) of + no -> + opt(Is0, D, [I0|Acc]); + {yes,Is,Lbl} -> + opt(Is, D, MatchReversed++[I0,{recv_mark,{f,Lbl}}|Acc]) + end + end; opt([{call_ext,Arity,{extfunc,erlang,Name,Arity}}=I|Is0], D, Acc) -> case creates_new_ref(Name, Arity) of true -> %% The call creates a brand new reference. Now %% search for a receive statement in the same %% function that will match against the reference. - case opt_recv(Is0, D) of + case opt_recv(Is0, regs_init_x0(), D) of no -> opt(Is0, D, [I|Acc]); {yes,Is,Lbl} -> @@ -104,19 +120,34 @@ opt([I|Is], D, Acc) -> opt([], _, Acc) -> reverse(Acc). +ref_in_tuple([{test,is_tuple,_,[{x,0}]}=I1, + {test,test_arity,_,[{x,0},2]}=I2, + {block,[{set,[_],[{x,0}],{get_tuple_element,0}}, + {set,[Dst],[{x,0}],{get_tuple_element,1}}|Bl]}=I3|Is]) -> + ref_in_tuple_1(Bl, Dst, Is, [I3,I2,I1]); +ref_in_tuple([{test,is_tuple,_,[{x,0}]}=I1, + {test,test_arity,_,[{x,0},2]}=I2, + {block,[{set,[Dst],[{x,0}],{get_tuple_element,1}}|Bl]}=I3|Is]) -> + ref_in_tuple_1(Bl, Dst, Is, [I3,I2,I1]); +ref_in_tuple(_) -> no. + +ref_in_tuple_1(Bl, Dst, Is, MatchReversed) -> + Regs0 = regs_init_singleton(Dst), + Regs = opt_update_regs_bl(Bl, Regs0), + {yes,Regs,Is,MatchReversed}. + %% creates_new_ref(Name, Arity) -> true|false. %% Return 'true' if the BIF Name/Arity will create a new reference. creates_new_ref(monitor, 2) -> true; creates_new_ref(make_ref, 0) -> true; creates_new_ref(_, _) -> false. -%% opt_recv([Instruction], LabelIndex) -> no|{yes,[Instruction]} +%% opt_recv([Instruction], Regs, LabelIndex) -> no|{yes,[Instruction]} %% Search for a receive statement that will only retrieve messages %% that contain the newly created reference (which is currently in {x,0}). -opt_recv(Is, D) -> - R = regs_init_x0(), +opt_recv(Is, Regs, D) -> L = gb_sets:empty(), - opt_recv(Is, D, R, L, []). + opt_recv(Is, D, Regs, L, []). opt_recv([{label,L}=Lbl,{loop_rec,{f,Fail},_}=Loop|Is], D, R0, _, Acc) -> R = regs_kill_not_live(0, R0), @@ -157,8 +188,6 @@ opt_update_regs({call_fun,_}, R, L) -> {regs_kill_not_live(0, R),L}; opt_update_regs({kill,Y}, R, L) -> {regs_kill([Y], R),L}; -opt_update_regs(send, R, L) -> - {regs_kill_not_live(0, R),L}; opt_update_regs({'catch',_,{f,Lbl}}, R, L) -> {R,gb_sets:add(Lbl, L)}; opt_update_regs({catch_end,_}, R, L) -> @@ -253,10 +282,7 @@ opt_ref_used_1([{test,is_ne_exact,{f,Fail},Args}|Is], RefReg, D, Done0, Regs) -> opt_ref_used_1([{test,_,{f,Fail},_}|Is], RefReg, D, Done0, Regs) -> Done = opt_ref_used_at(Fail, RefReg, D, Done0, Regs), opt_ref_used_1(Is, RefReg, D, Done, Regs); -opt_ref_used_1([{select_tuple_arity,_,{f,Fail},{list,List}}|_], RefReg, D, Done, Regs) -> - Lbls = [F || {f,F} <- List] ++ [Fail], - opt_ref_used_in_all(Lbls, RefReg, D, Done, Regs); -opt_ref_used_1([{select_val,_,{f,Fail},{list,List}}|_], RefReg, D, Done, Regs) -> +opt_ref_used_1([{select,_,_,{f,Fail},List}|_], RefReg, D, Done, Regs) -> Lbls = [F || {f,F} <- List] ++ [Fail], opt_ref_used_in_all(Lbls, RefReg, D, Done, Regs); opt_ref_used_1([{label,Lbl}|Is], RefReg, D, Done, Regs) -> @@ -323,6 +349,12 @@ opt_ref_used_bl([], Regs) -> Regs. regs_init() -> {0,0}. +%% regs_init_singleton(Register) -> RegisterSet +%% Return a set that only contains one register. + +regs_init_singleton(Reg) -> + regs_add(Reg, regs_init()). + %% regs_init_x0() -> RegisterSet %% Return a set that only contains the {x,0} register. diff --git a/lib/compiler/src/beam_trim.erl b/lib/compiler/src/beam_trim.erl index 5f4fa3b1f8..d95db1f681 100644 --- a/lib/compiler/src/beam_trim.erl +++ b/lib/compiler/src/beam_trim.erl @@ -172,38 +172,16 @@ remap([{bif,Name,Fail,Ss,D}|Is], Map, Acc) -> remap([{gc_bif,Name,Fail,Live,Ss,D}|Is], Map, Acc) -> I = {gc_bif,Name,Fail,Live,[Map(S) || S <- Ss],Map(D)}, remap(Is, Map, [I|Acc]); -remap([{bs_add,Fail,[SrcA,SrcB,U],D}|Is], Map, Acc) -> - I = {bs_add,Fail,[Map(SrcA),Map(SrcB),U],Map(D)}, +remap([{bs_init,Fail,Info,Live,Ss0,Dst0}|Is], Map, Acc) -> + Ss = [Map(Src) || Src <- Ss0], + Dst = Map(Dst0), + I = {bs_init,Fail,Info,Live,Ss,Dst}, remap(Is, Map, [I|Acc]); -remap([{bs_append=Op,Fail,Bits,Heap,Live,Unit,Bin,Flags,D}|Is], Map, Acc) -> - I = {Op,Fail,Map(Bits),Heap,Live,Unit,Map(Bin),Flags,Map(D)}, - remap(Is, Map, [I|Acc]); -remap([{bs_private_append=Op,Fail,Bits,Unit,Bin,Flags,D}|Is], Map, Acc) -> - I = {Op,Fail,Map(Bits),Unit,Map(Bin),Flags,Map(D)}, - remap(Is, Map, [I|Acc]); -remap([bs_init_writable=I|Is], Map, Acc) -> - remap(Is, Map, [I|Acc]); -remap([{bs_init2,Fail,Src,Live,U,Flags,D}|Is], Map, Acc) -> - I = {bs_init2,Fail,Map(Src),Live,U,Flags,Map(D)}, - remap(Is, Map, [I|Acc]); -remap([{bs_init_bits,Fail,Src,Live,U,Flags,D}|Is], Map, Acc) -> - I = {bs_init_bits,Fail,Map(Src),Live,U,Flags,Map(D)}, - remap(Is, Map, [I|Acc]); -remap([{bs_put_binary=Op,Fail,Src,U,Flags,D}|Is], Map, Acc) -> - I = {Op,Fail,Map(Src),U,Flags,Map(D)}, - remap(Is, Map, [I|Acc]); -remap([{bs_put_integer=Op,Fail,Src,U,Flags,D}|Is], Map, Acc) -> - I = {Op,Fail,Map(Src),U,Flags,Map(D)}, - remap(Is, Map, [I|Acc]); -remap([{bs_put_float=Op,Fail,Src,U,Flags,D}|Is], Map, Acc) -> - I = {Op,Fail,Map(Src),U,Flags,Map(D)}, - remap(Is, Map, [I|Acc]); -remap([{bs_put_string,_,_}=I|Is], Map, Acc) -> +remap([{bs_put=Op,Fail,Info,Ss}|Is], Map, Acc) -> + I = {Op,Fail,Info,[Map(S) || S <- Ss]}, remap(Is, Map, [I|Acc]); remap([{kill,Y}|T], Map, Acc) -> remap(T, Map, [{kill,Map(Y)}|Acc]); -remap([send=I|T], Map, Acc) -> - remap(T, Map, [I|Acc]); remap([{make_fun2,_,_,_,_}=I|T], Map, Acc) -> remap(T, Map, [I|Acc]); remap([{deallocate,N}|Is], Map, Acc) -> @@ -217,12 +195,6 @@ remap([{test,Name,Fail,Live,Ss,Dst}|Is], Map, Acc) -> remap(Is, Map, [I|Acc]); remap([return|_]=Is, _, Acc) -> reverse(Acc, Is); -remap([{call_last,Ar,Name,N}|Is], Map, Acc) -> - I = {call_last,Ar,Name,Map({frame_size,N})}, - reverse(Acc, [I|Is]); -remap([{call_ext_last,Ar,Name,N}|Is], Map, Acc) -> - I = {call_ext_last,Ar,Name,Map({frame_size,N})}, - reverse(Acc, [I|Is]); remap([{line,_}=I|Is], Map, Acc) -> remap(Is, Map, [I|Acc]). @@ -280,8 +252,8 @@ frame_size([{call_fun,_}|Is], Safe) -> frame_size(Is, Safe); frame_size([{call,_,_}|Is], Safe) -> frame_size(Is, Safe); -frame_size([{call_ext,A,{extfunc,M,F,A}}|Is], Safe) -> - case erl_bifs:is_exit_bif(M, F, A) of +frame_size([{call_ext,_,_}=I|Is], Safe) -> + case beam_jump:is_exit_instruction(I) of true -> throw(not_possible); false -> frame_size(Is, Safe) end; @@ -295,35 +267,15 @@ frame_size([{test,_,{f,L},_}|Is], Safe) -> frame_size_branch(L, Is, Safe); frame_size([{test,_,{f,L},_,_,_}|Is], Safe) -> frame_size_branch(L, Is, Safe); -frame_size([{bs_add,{f,L},_,_}|Is], Safe) -> +frame_size([{bs_init,{f,L},_,_,_,_}|Is], Safe) -> frame_size_branch(L, Is, Safe); -frame_size([{bs_append,{f,L},_,_,_,_,_,_,_}|Is], Safe) -> +frame_size([{bs_put,{f,L},_,_}|Is], Safe) -> frame_size_branch(L, Is, Safe); -frame_size([{bs_private_append,{f,L},_,_,_,_,_}|Is], Safe) -> - frame_size_branch(L, Is, Safe); -frame_size([bs_init_writable|Is], Safe) -> - frame_size(Is, Safe); -frame_size([{bs_init2,{f,L},_,_,_,_,_}|Is], Safe) -> - frame_size_branch(L, Is, Safe); -frame_size([{bs_init_bits,{f,L},_,_,_,_,_}|Is], Safe) -> - frame_size_branch(L, Is, Safe); -frame_size([{bs_put_binary,{f,L},_,_,_,_}|Is], Safe) -> - frame_size_branch(L, Is, Safe); -frame_size([{bs_put_integer,{f,L},_,_,_,_}|Is], Safe) -> - frame_size_branch(L, Is, Safe); -frame_size([{bs_put_float,{f,L},_,_,_,_}|Is], Safe) -> - frame_size_branch(L, Is, Safe); -frame_size([{bs_put_string,_,_}|Is], Safe) -> - frame_size(Is, Safe); frame_size([{kill,_}|Is], Safe) -> frame_size(Is, Safe); -frame_size([send|Is], Safe) -> - frame_size(Is, Safe); frame_size([{make_fun2,_,_,_,_}|Is], Safe) -> frame_size(Is, Safe); frame_size([{deallocate,N}|_], _) -> N; -frame_size([{call_last,_,_,N}|_], _) -> N; -frame_size([{call_ext_last,_,_,N}|_], _) -> N; frame_size([{line,_}|Is], Safe) -> frame_size(Is, Safe); frame_size([_|_], _) -> throw(not_possible). diff --git a/lib/compiler/src/beam_utils.erl b/lib/compiler/src/beam_utils.erl index 194f089ba1..8b661e6901 100644 --- a/lib/compiler/src/beam_utils.erl +++ b/lib/compiler/src/beam_utils.erl @@ -87,7 +87,7 @@ is_killed_at(R, Lbl, D) when is_integer(Lbl) -> %% across branches. is_not_used(R, Is, D) -> - St = #live{bl=fun check_used_block/2,lbl=D,res=gb_trees:empty()}, + St = #live{bl=check_used_block_fun(D),lbl=D,res=gb_trees:empty()}, case check_liveness(R, Is, St) of {killed,_} -> true; {used,_} -> false; @@ -102,7 +102,7 @@ is_not_used(R, Is, D) -> %% across branches. is_not_used_at(R, Lbl, D) -> - St = #live{bl=fun check_used_block/2,lbl=D,res=gb_trees:empty()}, + St = #live{bl=check_used_block_fun(D),lbl=D,res=gb_trees:empty()}, case check_liveness_at(R, Lbl, St) of {killed,_} -> true; {used,_} -> false; @@ -276,13 +276,9 @@ check_liveness(R, [{test,_,{f,Fail},Live,Ss,_}|Is], St0) -> {_,_}=Other -> Other end end; -check_liveness(R, [{select_val,R,_,_}|_], St) -> +check_liveness(R, [{select,_,R,_,_}|_], St) -> {used,St}; -check_liveness(R, [{select_val,_,Fail,{list,Branches}}|_], St) -> - check_liveness_everywhere(R, [Fail|Branches], St); -check_liveness(R, [{select_tuple_arity,R,_,_}|_], St) -> - {used,St}; -check_liveness(R, [{select_tuple_arity,_,Fail,{list,Branches}}|_], St) -> +check_liveness(R, [{select,_,_,Fail,Branches}|_], St) -> check_liveness_everywhere(R, [Fail|Branches], St); check_liveness(R, [{jump,{f,F}}|_], St) -> check_liveness_at(R, F, St); @@ -301,37 +297,33 @@ check_liveness(R, [{kill,R}|_], St) -> {killed,St}; check_liveness(R, [{kill,_}|Is], St) -> check_liveness(R, Is, St); -check_liveness(R, [bs_init_writable|Is], St) -> - if - R =:= {x,0} -> {used,St}; - true -> check_liveness(R, Is, St) - end; -check_liveness(R, [{bs_private_append,_,Bits,_,Bin,_,Dst}|Is], St) -> - case R of - Bits -> {used,St}; - Bin -> {used,St}; - Dst -> {killed,St}; - _ -> check_liveness(R, Is, St) +check_liveness(R, [{bs_init,_,_,none,Ss,Dst}|Is], St) -> + case member(R, Ss) of + true -> + {used,St}; + false -> + if + R =:= Dst -> {killed,St}; + true -> check_liveness(R, Is, St) + end end; -check_liveness(R, [{bs_append,_,Bits,_,_,_,Bin,_,Dst}|Is], St) -> +check_liveness(R, [{bs_init,_,_,Live,Ss,Dst}|Is], St) -> case R of - Bits -> {used,St}; - Bin -> {used,St}; - Dst -> {killed,St}; - _ -> check_liveness(R, Is, St) - end; -check_liveness(R, [{bs_init2,_,_,_,_,_,Dst}|Is], St) -> - if - R =:= Dst -> {killed,St}; - true -> check_liveness(R, Is, St) - end; -check_liveness(R, [{bs_init_bits,_,_,_,_,_,Dst}|Is], St) -> - if - R =:= Dst -> {killed,St}; - true -> check_liveness(R, Is, St) + {x,X} -> + case X < Live orelse member(R, Ss) of + true -> {used,St}; + false -> {killed,St} + end; + {y,_} -> + case member(R, Ss) of + true -> {used,St}; + false -> + if + R =:= Dst -> {killed,St}; + true -> check_liveness(R, Is, St) + end + end end; -check_liveness(R, [{bs_put_string,_,_}|Is], St) -> - check_liveness(R, Is, St); check_liveness(R, [{deallocate,_}|Is], St) -> case R of {y,_} -> {killed,St}; @@ -339,29 +331,20 @@ check_liveness(R, [{deallocate,_}|Is], St) -> end; check_liveness(R, [return|_], St) -> check_liveness_live_ret(R, 1, St); -check_liveness(R, [{call_last,Live,_,_}|_], St) -> - check_liveness_live_ret(R, Live, St); -check_liveness(R, [{call_only,Live,_}|_], St) -> - check_liveness_live_ret(R, Live, St); -check_liveness(R, [{call_ext_last,Live,_,_}|_], St) -> - check_liveness_live_ret(R, Live, St); -check_liveness(R, [{call_ext_only,Live,_}|_], St) -> - check_liveness_live_ret(R, Live, St); check_liveness(R, [{call,Live,_}|Is], St) -> case R of {x,X} when X < Live -> {used,St}; {x,_} -> {killed,St}; {y,_} -> check_liveness(R, Is, St) end; -check_liveness(R, [{call_ext,Live,Func}|Is], St) -> +check_liveness(R, [{call_ext,Live,_}=I|Is], St) -> case R of {x,X} when X < Live -> {used,St}; {x,_} -> {killed,St}; {y,_} -> - {extfunc,Mod,Name,Arity} = Func, - case erl_bifs:is_exit_bif(Mod, Name, Arity) of + case beam_jump:is_exit_instruction(I) of false -> check_liveness(R, Is, St); true -> @@ -387,14 +370,6 @@ check_liveness(R, [{apply,Args}|Is], St) -> {x,_} -> {killed,St}; {y,_} -> check_liveness(R, Is, St) end; -check_liveness(R, [{apply_last,Args,_}|_], St) -> - check_liveness_live_ret(R, Args+2, St); -check_liveness(R, [send|Is], St) -> - case R of - {x,X} when X < 2 -> {used,St}; - {x,_} -> {killed,St}; - {y,_} -> check_liveness(R, Is, St) - end; check_liveness({x,R}, [{'%live',Live}|Is], St) -> if R < Live -> check_liveness(R, Is, St); @@ -429,25 +404,9 @@ check_liveness(R, [{gc_bif,Op,{f,Fail},Live,Ss,D}|Is], St0) -> Other end end; -check_liveness(R, [{bs_add,{f,0},Ss,D}|Is], St) -> +check_liveness(R, [{bs_put,{f,0},_,Ss}|Is], St) -> case member(R, Ss) of true -> {used,St}; - false when R =:= D -> {killed,St}; - false -> check_liveness(R, Is, St) - end; -check_liveness(R, [{bs_put_binary,{f,0},Sz,_,_,Src}|Is], St) -> - case member(R, [Sz,Src]) of - true -> {used,St}; - false -> check_liveness(R, Is, St) - end; -check_liveness(R, [{bs_put_integer,{f,0},Sz,_,_,Src}|Is], St) -> - case member(R, [Sz,Src]) of - true -> {used,St}; - false -> check_liveness(R, Is, St) - end; -check_liveness(R, [{bs_put_float,{f,0},Sz,_,_,Src}|Is], St) -> - case member(R, [Sz,Src]) of - true -> {used,St}; false -> check_liveness(R, Is, St) end; check_liveness(R, [{bs_restore2,S,_}|Is], St) -> @@ -472,6 +431,16 @@ check_liveness(R, [{make_fun2,_,_,_,NumFree}|Is], St) -> {x,_} -> {killed,St}; _ -> check_liveness(R, Is, St) end; +check_liveness({x,_}=R, [{'catch',_,_}|Is], St) -> + %% All x registers will be killed if an exception occurs. + %% Therefore we only need to check the liveness for the + %% instructions following the catch instruction. + check_liveness(R, Is, St); +check_liveness({x,_}=R, [{'try',_,_}|Is], St) -> + %% All x registers will be killed if an exception occurs. + %% Therefore we only need to check the liveness for the + %% instructions inside the 'try' block. + check_liveness(R, Is, St); check_liveness(R, [{try_end,Y}|Is], St) -> case R of Y -> @@ -602,26 +571,50 @@ check_killed_block(_, []) -> transparent. %% %% (Unknown instructions will cause an exception.) -check_used_block({x,X}=R, [{set,_,_,{alloc,Live,_}}|Is]) -> +check_used_block_fun(D) -> + fun(R, Is) -> check_used_block(R, Is, D) end. + +check_used_block({x,X}=R, [{set,Ds,Ss,{alloc,Live,Op}}|Is], D) -> if X >= Live -> killed; - true -> check_used_block(R, Is) + true -> + case member(R, Ss) orelse + is_reg_used_at(R, Op, D) of + true -> used; + false -> + case member(R, Ds) of + true -> killed; + false -> check_used_block(R, Is, D) + end + end end; -check_used_block(R, [{set,Ds,Ss,_Op}|Is]) -> - case member(R, Ss) of +check_used_block(R, [{set,Ds,Ss,Op}|Is], D) -> + case member(R, Ss) orelse + is_reg_used_at(R, Op, D) of true -> used; false -> case member(R, Ds) of true -> killed; - false -> check_used_block(R, Is) + false -> check_used_block(R, Is, D) end end; -check_used_block(R, [{'%live',Live}|Is]) -> +check_used_block(R, [{'%live',Live}|Is], D) -> case R of {x,X} when X >= Live -> killed; - _ -> check_used_block(R, Is) + _ -> check_used_block(R, Is, D) end; -check_used_block(_, []) -> transparent. +check_used_block(_, [], _) -> transparent. + +is_reg_used_at(R, {gc_bif,_,{f,Lbl}}, D) -> + is_reg_used_at_1(R, Lbl, D); +is_reg_used_at(R, {bif,_,{f,Lbl}}, D) -> + is_reg_used_at_1(R, Lbl, D); +is_reg_used_at(_, _, _) -> false. + +is_reg_used_at_1(_, 0, _) -> + false; +is_reg_used_at_1(R, Lbl, D) -> + not is_not_used_at(R, Lbl, D). index_labels_1([{label,Lbl}|Is0], Acc) -> Is = lists:dropwhile(fun({label,_}) -> true; @@ -654,49 +647,21 @@ combine_alloc_lists_1([]) -> []. live_opt([{bs_context_to_binary,Src}=I|Is], Regs0, D, Acc) -> Regs = x_live([Src], Regs0), live_opt(Is, Regs, D, [I|Acc]); -live_opt([{bs_add,Fail,[Src1,Src2,_],Dst}=I|Is], Regs0, D, Acc) -> - Regs1 = x_live([Src1,Src2], x_dead([Dst], Regs0)), - Regs = live_join_label(Fail, D, Regs1), - live_opt(Is, Regs, D, [I|Acc]); -live_opt([{bs_init2,Fail,_,_,Live,_,_}=I|Is], _, D, Acc) -> - Regs1 = live_call(Live), +live_opt([{bs_init,Fail,_,none,Ss,Dst}=I|Is], Regs0, D, Acc) -> + Regs1 = x_live(Ss, x_dead([Dst], Regs0)), Regs = live_join_label(Fail, D, Regs1), live_opt(Is, Regs, D, [I|Acc]); -live_opt([{bs_init_bits,Fail,Src1,_,Live,_,Src2}=I|Is], _, D, Acc) -> - Regs1 = live_call(Live), - Regs2 = x_live([Src1,Src2], Regs1), - Regs = live_join_label(Fail, D, Regs2), - live_opt(Is, Regs, D, [I|Acc]); -live_opt([{bs_append,Fail,Src1,_,Live,_,Src2,_,Dst}=I|Is], _Regs0, D, Acc) -> - Regs1 = x_dead([Dst], x_live([Src1,Src2], live_call(Live))), - Regs = live_join_label(Fail, D, Regs1), +live_opt([{bs_init,Fail,Info,Live0,Ss,Dst}|Is], Regs0, D, Acc) -> + Regs1 = x_dead([Dst], Regs0), + Live = live_regs(Regs1), + true = Live =< Live0, %Assertion. + Regs2 = live_call(Live), + Regs3 = x_live(Ss, Regs2), + Regs = live_join_label(Fail, D, Regs3), + I = {bs_init,Fail,Info,Live,Ss,Dst}, live_opt(Is, Regs, D, [I|Acc]); -live_opt([{bs_private_append,Fail,Src1,_,Src2,_,Dst}=I|Is], Regs0, D, Acc) -> - Regs1 = x_live([Src1,Src2], x_dead([Dst], Regs0)), - Regs = live_join_label(Fail, D, Regs1), - live_opt(Is, Regs, D, [I|Acc]); -live_opt([{bs_put_binary,Fail,Src1,_,_,Src2}=I|Is], Regs0, D, Acc) -> - Regs1 = x_live([Src1,Src2], Regs0), - Regs = live_join_label(Fail, D, Regs1), - live_opt(Is, Regs, D, [I|Acc]); -live_opt([{bs_put_float,Fail,Src1,_,_,Src2}=I|Is], Regs0, D, Acc) -> - Regs1 = x_live([Src1,Src2], Regs0), - Regs = live_join_label(Fail, D, Regs1), - live_opt(Is, Regs, D, [I|Acc]); -live_opt([{bs_put_integer,Fail,Src1,_,_,Src2}=I|Is], Regs0, D, Acc) -> - Regs1 = x_live([Src1,Src2], Regs0), - Regs = live_join_label(Fail, D, Regs1), - live_opt(Is, Regs, D, [I|Acc]); -live_opt([{bs_put_utf8,Fail,_,Src}=I|Is], Regs0, D, Acc) -> - Regs1 = x_live([Src], Regs0), - Regs = live_join_label(Fail, D, Regs1), - live_opt(Is, Regs, D, [I|Acc]); -live_opt([{bs_put_utf16,Fail,_,Src}=I|Is], Regs0, D, Acc) -> - Regs1 = x_live([Src], Regs0), - Regs = live_join_label(Fail, D, Regs1), - live_opt(Is, Regs, D, [I|Acc]); -live_opt([{bs_put_utf32,Fail,_,Src}=I|Is], Regs0, D, Acc) -> - Regs1 = x_live([Src], Regs0), +live_opt([{bs_put,Fail,_,Ss}=I|Is], Regs0, D, Acc) -> + Regs1 = x_live(Ss, Regs0), Regs = live_join_label(Fail, D, Regs1), live_opt(Is, Regs, D, [I|Acc]); live_opt([{bs_restore2,Src,_}=I|Is], Regs0, D, Acc) -> @@ -705,14 +670,6 @@ live_opt([{bs_restore2,Src,_}=I|Is], Regs0, D, Acc) -> live_opt([{bs_save2,Src,_}=I|Is], Regs0, D, Acc) -> Regs = x_live([Src], Regs0), live_opt(Is, Regs, D, [I|Acc]); -live_opt([{bs_utf8_size,Fail,Src,Dst}=I|Is], Regs0, D, Acc) -> - Regs1 = x_live([Src], x_dead([Dst], Regs0)), - Regs = live_join_label(Fail, D, Regs1), - live_opt(Is, Regs, D, [I|Acc]); -live_opt([{bs_utf16_size,Fail,Src,Dst}=I|Is], Regs0, D, Acc) -> - Regs1 = x_live([Src], x_dead([Dst], Regs0)), - Regs = live_join_label(Fail, D, Regs1), - live_opt(Is, Regs, D, [I|Acc]); live_opt([{test,bs_start_match2,Fail,Live,[Src,_],_}=I|Is], _, D, Acc) -> Regs0 = live_call(Live), Regs1 = x_live([Src], Regs0), @@ -747,30 +704,16 @@ live_opt([{try_case_end,Src}=I|Is], _, D, Acc) -> live_opt([if_end=I|Is], _, D, Acc) -> Regs = 0, live_opt(Is, Regs, D, [I|Acc]); -live_opt([bs_init_writable=I|Is], _, D, Acc) -> - live_opt(Is, live_call(1), D, [I|Acc]); live_opt([{call,Arity,_}=I|Is], _, D, Acc) -> live_opt(Is, live_call(Arity), D, [I|Acc]); live_opt([{call_ext,Arity,_}=I|Is], _, D, Acc) -> live_opt(Is, live_call(Arity), D, [I|Acc]); live_opt([{call_fun,Arity}=I|Is], _, D, Acc) -> live_opt(Is, live_call(Arity+1), D, [I|Acc]); -live_opt([{call_last,Arity,_,_}=I|Is], _, D, Acc) -> - live_opt(Is, live_call(Arity), D, [I|Acc]); -live_opt([{call_ext_last,Arity,_,_}=I|Is], _, D, Acc) -> - live_opt(Is, live_call(Arity), D, [I|Acc]); live_opt([{apply,Arity}=I|Is], _, D, Acc) -> live_opt(Is, live_call(Arity+2), D, [I|Acc]); -live_opt([{apply_last,Arity,_}=I|Is], _, D, Acc) -> - live_opt(Is, live_call(Arity+2), D, [I|Acc]); -live_opt([{call_only,Arity,_}=I|Is], _, D, Acc) -> - live_opt(Is, live_call(Arity), D, [I|Acc]); -live_opt([{call_ext_only,Arity,_}=I|Is], _, D, Acc) -> - live_opt(Is, live_call(Arity), D, [I|Acc]); live_opt([{make_fun2,_,_,_,Arity}=I|Is], _, D, Acc) -> live_opt(Is, live_call(Arity), D, [I|Acc]); -live_opt([send=I|Is], _, D, Acc) -> - live_opt(Is, live_call(2), D, [I|Acc]); live_opt([{test,_,Fail,Ss}=I|Is], Regs0, D, Acc) -> Regs1 = x_live(Ss, Regs0), Regs = live_join_label(Fail, D, Regs1), @@ -780,16 +723,14 @@ live_opt([{test,_,Fail,Live,Ss,_}=I|Is], _, D, Acc) -> Regs1 = x_live(Ss, Regs0), Regs = live_join_label(Fail, D, Regs1), live_opt(Is, Regs, D, [I|Acc]); -live_opt([{select_val,Src,Fail,{list,List}}=I|Is], Regs0, D, Acc) -> +live_opt([{select,_,Src,Fail,List}=I|Is], Regs0, D, Acc) -> Regs1 = x_live([Src], Regs0), Regs = live_join_labels([Fail|List], D, Regs1), live_opt(Is, Regs, D, [I|Acc]); -live_opt([{select_tuple_arity,Src,Fail,{list,List}}=I|Is], Regs0, D, Acc) -> - Regs1 = x_live([Src], Regs0), - Regs = live_join_labels([Fail|List], D, Regs1), - live_opt(Is, Regs, D, [I|Acc]); -live_opt([{'try',_,Fail}=I|Is], Regs0, D, Acc) -> - Regs = live_join_label(Fail, D, Regs0), +live_opt([{'try',_,_}=I|Is], Regs, D, Acc) -> + %% If an exeption happens, all x registers will be killed. + %% Therefore, we should only base liveness of the code inside + %% the try. live_opt(Is, Regs, D, [I|Acc]); live_opt([{try_case,_}=I|Is], _, D, Acc) -> live_opt(Is, live_call(1), D, [I|Acc]); @@ -799,8 +740,6 @@ live_opt([timeout=I|Is], _, D, Acc) -> live_opt(Is, 0, D, [I|Acc]); %% Transparent instructions - they neither use nor modify x registers. -live_opt([{bs_put_string,_,_}=I|Is], Regs, D, Acc) -> - live_opt(Is, Regs, D, [I|Acc]); live_opt([{deallocate,_}=I|Is], Regs, D, Acc) -> live_opt(Is, Regs, D, [I|Acc]); live_opt([{kill,_}=I|Is], Regs, D, Acc) -> @@ -827,13 +766,24 @@ live_opt([{allocate_heap,_,_,Live}=I|Is], _, D, Acc) -> live_opt([], _, _, Acc) -> Acc. -live_opt_block([{set,[],[],{alloc,Live,_}}=I|Is], _, D, Acc) -> - live_opt_block(Is, live_call(Live), D, [I|Acc]); -live_opt_block([{set,Ds,Ss,Op}=I|Is], Regs0, D, Acc) -> - Regs = case Op of - {alloc,Live,_} -> live_call(Live); - _ -> x_live(Ss, x_dead(Ds, Regs0)) - end, +live_opt_block([{set,Ds,Ss,Op}=I0|Is], Regs0, D, Acc) -> + Regs1 = x_live(Ss, x_dead(Ds, Regs0)), + {I,Regs} = case Op of + {alloc,Live0,Alloc} -> + %% The life-time analysis used by the code generator + %% is sometimes too conservative, so it may be + %% possible to lower the number of live registers + %% based on the exact liveness information. + %% The main benefit is that more optimizations that + %% depend on liveness information (such as the + %% beam_bool and beam_dead passes) may be applied. + Live = live_regs(Regs1), + true = Live =< Live0, %Assertion. + I1 = {set,Ds,Ss,{alloc,Live,Alloc}}, + {I1,live_call(Live)}; + _ -> + {I0,Regs1} + end, case Ds of [{x,X}] -> case (not is_live(X, Regs0)) andalso Op =:= move of diff --git a/lib/compiler/src/beam_z.erl b/lib/compiler/src/beam_z.erl new file mode 100644 index 0000000000..8c6b0c916d --- /dev/null +++ b/lib/compiler/src/beam_z.erl @@ -0,0 +1,79 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2012. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% Purpose: Run right before beam_asm to do any final fix-ups or clean-ups. +%% (Mandatory.) + +-module(beam_z). + +-export([module/2]). + +module({Mod,Exp,Attr,Fs0,Lc}, _Opt) -> + Fs = [function(F) || F <- Fs0], + {ok,{Mod,Exp,Attr,Fs,Lc}}. + +function({function,Name,Arity,CLabel,Is0}) -> + try + Is = undo_renames(Is0), + {function,Name,Arity,CLabel,Is} + catch + Class:Error -> + Stack = erlang:get_stacktrace(), + io:fwrite("Function: ~w/~w\n", [Name,Arity]), + erlang:raise(Class, Error, Stack) + end. + +undo_renames([{call_ext,2,send}|Is]) -> + [send|undo_renames(Is)]; +undo_renames([{apply,A},{deallocate,N},return|Is]) -> + [{apply_last,A,N}|undo_renames(Is)]; +undo_renames([{call,A,F},{deallocate,N},return|Is]) -> + [{call_last,A,F,N}|undo_renames(Is)]; +undo_renames([{call_ext,A,F},{deallocate,N},return|Is]) -> + [{call_ext_last,A,F,N}|undo_renames(Is)]; +undo_renames([{call,A,F},return|Is]) -> + [{call_only,A,F}|undo_renames(Is)]; +undo_renames([{call_ext,A,F},return|Is]) -> + [{call_ext_only,A,F}|undo_renames(Is)]; +undo_renames([I|Is]) -> + [undo_rename(I)|undo_renames(Is)]; +undo_renames([]) -> []. + +undo_rename({bs_put,F,{I,U,Fl},[Sz,Src]}) -> + {I,F,Sz,U,Fl,Src}; +undo_rename({bs_put,F,{I,Fl},[Src]}) -> + {I,F,Fl,Src}; +undo_rename({bs_put,{f,0},{bs_put_string,_,_}=I,[]}) -> + I; +undo_rename({bif,bs_add=I,F,[Src1,Src2,{integer,U}],Dst}) -> + {I,F,[Src1,Src2,U],Dst}; +undo_rename({bif,bs_utf8_size=I,F,[Src],Dst}) -> + {I,F,Src,Dst}; +undo_rename({bif,bs_utf16_size=I,F,[Src],Dst}) -> + {I,F,Src,Dst}; +undo_rename({bs_init,F,{I,U,Flags},none,[Sz,Src],Dst}) -> + {I,F,Sz,U,Src,Flags,Dst}; +undo_rename({bs_init,F,{I,Extra,Flags},Live,[Sz],Dst}) -> + {I,F,Sz,Extra,Live,Flags,Dst}; +undo_rename({bs_init,F,{I,Extra,U,Flags},Live,[Sz,Src],Dst}) -> + {I,F,Sz,Extra,Live,U,Src,Flags,Dst}; +undo_rename({bs_init,_,bs_init_writable=I,_,_,_}) -> + I; +undo_rename({select,I,Reg,Fail,List}) -> + {I,Reg,Fail,{list,List}}; +undo_rename(I) -> I. diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl index 0a368df5d6..df1af36eeb 100644 --- a/lib/compiler/src/compile.erl +++ b/lib/compiler/src/compile.erl @@ -224,6 +224,8 @@ format_error({delete_temp,File,Error}) -> [File,file:format_error(Error)]); format_error({parse_transform,M,R}) -> io_lib:format("error in parse transform '~s': ~p", [M, R]); +format_error({undef_parse_transform,M}) -> + io_lib:format("undefined parse transform '~s'", [M]); format_error({core_transform,M,R}) -> io_lib:format("error in core transform '~s': ~p", [M, R]); format_error({crash,Pass,Reason}) -> @@ -551,12 +553,12 @@ select_list_passes_1([{iff,Flag,{done,Ext}}|Ps], Opts, Acc) -> end; select_list_passes_1([{iff=Op,Flag,List0}|Ps], Opts, Acc) when is_list(List0) -> case select_list_passes(List0, Opts) of - {done,_}=Done -> Done; + {done,List} -> {done,reverse(Acc) ++ List}; {not_done,List} -> select_list_passes_1(Ps, Opts, [{Op,Flag,List}|Acc]) end; select_list_passes_1([{unless=Op,Flag,List0}|Ps], Opts, Acc) when is_list(List0) -> case select_list_passes(List0, Opts) of - {done,_}=Done -> Done; + {done,List} -> {done,reverse(Acc) ++ List}; {not_done,List} -> select_list_passes_1(Ps, Opts, [{Op,Flag,List}|Acc]) end; select_list_passes_1([P|Ps], Opts, Acc) -> @@ -630,7 +632,8 @@ kernel_passes() -> asm_passes() -> %% Assembly level optimisations. [{delay, - [{unless,no_postopt, + [{pass,beam_a}, + {unless,no_postopt, [{pass,beam_block}, {iff,dblk,{listing,"block"}}, {unless,no_except,{pass,beam_except}}, @@ -657,13 +660,11 @@ asm_passes() -> {iff,dtrim,{listing,"trim"}}, {pass,beam_flatten}]}, - %% If post optimizations are turned off, we still coalesce - %% adjacent labels and remove unused labels to keep the - %% HiPE compiler happy. - {iff,no_postopt, - [?pass(beam_unused_labels), - {pass,beam_clean}]}, + %% If post optimizations are turned off, we still + %% need to do a few clean-ups to code. + {iff,no_postopt,[{pass,beam_clean}]}, + {pass,beam_z}, {iff,dopt,{listing,"optimize"}}, {iff,'S',{listing,"S"}}, {iff,'to_asm',{done,"S"}}]}, @@ -850,6 +851,10 @@ foldl_transform(St, [T|Ts]) -> {error,Es,Ws} -> {error,St#compile{warnings=St#compile.warnings ++ Ws, errors=St#compile.errors ++ Es}}; + {'EXIT',{undef,_}} -> + Es = [{St#compile.ifile,[{none,compile, + {undef_parse_transform,T}}]}], + {error,St#compile{errors=St#compile.errors ++ Es}}; {'EXIT',R} -> Es = [{St#compile.ifile,[{none,compile,{parse_transform,T,R}}]}], {error,St#compile{errors=St#compile.errors ++ Es}}; @@ -1236,10 +1241,6 @@ random_bytes_1(N, Acc) -> random_bytes_1(N-1, [random:uniform(255)|Acc]). save_core_code(St) -> {ok,St#compile{core_code=cerl:from_records(St#compile.code)}}. -beam_unused_labels(#compile{code=Code0}=St) -> - Code = beam_jump:module_labels(Code0), - {ok,St#compile{code=Code}}. - beam_asm(#compile{ifile=File,code=Code0, abstract_code=Abst,mod_options=Opts0}=St) -> Source = filename:absname(File), diff --git a/lib/compiler/src/compiler.app.src b/lib/compiler/src/compiler.app.src index 1133882728..94c78e68f9 100644 --- a/lib/compiler/src/compiler.app.src +++ b/lib/compiler/src/compiler.app.src @@ -20,6 +20,7 @@ [{description, "ERTS CXC 138 10"}, {vsn, "%VSN%"}, {modules, [ + beam_a, beam_asm, beam_block, beam_bool, @@ -40,6 +41,7 @@ beam_type, beam_utils, beam_validator, + beam_z, cerl, cerl_clauses, cerl_inline, diff --git a/lib/compiler/src/v3_codegen.erl b/lib/compiler/src/v3_codegen.erl index be15495672..3b73269545 100644 --- a/lib/compiler/src/v3_codegen.erl +++ b/lib/compiler/src/v3_codegen.erl @@ -123,15 +123,24 @@ cg_fun(Les, Hvs, Vdb, AtomMod, NameArity, Anno, St0) -> put_reg(V, Reg) end, [], Hvs), stk=[]}, 0, Vdb), - {B,_Aft,St} = cg_list(Les, 0, Vdb, Bef, + {B0,_Aft,St} = cg_list(Les, 0, Vdb, Bef, St3#cg{bfail=0, ultimate_failure=UltimateMatchFail, is_top_block=true}), + B = fix_bs_match_strings(B0), {Name,Arity} = NameArity, Asm = [{label,Fi},line(Anno),{func_info,AtomMod,{atom,Name},Arity}, {label,Fl}|B++[{label,UltimateMatchFail},if_end]], {Asm,Fl,St}. +fix_bs_match_strings([{test,bs_match_string,F,[Ctx,BinList]}|Is]) + when is_list(BinList) -> + I = {test,bs_match_string,F,[Ctx,list_to_bitstring(BinList)]}, + [I|fix_bs_match_strings(Is)]; +fix_bs_match_strings([I|Is]) -> + [I|fix_bs_match_strings(Is)]; +fix_bs_match_strings([]) -> []. + %% cg(Lkexpr, Vdb, StackReg, State) -> {[Ainstr],StackReg,State}. %% Generate code for a kexpr. %% Split function into two steps for clarity, not efficiency. @@ -713,7 +722,22 @@ select_bin_seg(#l{ke={val_clause,{bin_int,Ctx,Sz,U,Fs,Val,Es},B},i=I,vdb=Vdb}, I, Vdb, Bef, Ctx, St0), {Bis,Aft,St2} = match_cg(B, Fail, Int, St1), CtxReg = fetch_var(Ctx, Bef), - {[{bs_restore2,CtxReg,{Ctx,Ivar}}|Mis] ++ Bis,Aft,St2}. + Is = case Mis ++ Bis of + [{test,bs_match_string,F,[OtherCtx,Bin1]}, + {bs_save2,OtherCtx,_}, + {bs_restore2,OtherCtx,_}, + {test,bs_match_string,F,[OtherCtx,Bin2]}|Is0] -> + %% We used to do this optimization later, but it + %% turns out that in huge functions with many + %% bs_match_string instructions, it's a big win + %% to do the combination now. To avoid copying the + %% binary data again and again, we'll combine bitstrings + %% in a list and convert all of it to a bitstring later. + [{test,bs_match_string,F,[OtherCtx,[Bin1,Bin2]]}|Is0]; + Is0 -> + Is0 + end, + {[{bs_restore2,CtxReg,{Ctx,Ivar}}|Is],Aft,St2}. select_extract_int([{var,Tl}], Val, {integer,Sz}, U, Fs, Vf, I, Vdb, Bef, Ctx, St) -> @@ -1385,22 +1409,32 @@ catch_cg(C, {var,R}, Le, Vdb, Bef, St0) -> %% %% put_list for constructing a cons is an atomic instruction %% which can safely resuse one of the source registers as target. -%% Also binaries can reuse a source register as target. set_cg([{var,R}], {cons,Es}, Le, Vdb, Bef, St) -> - [S1,S2] = map(fun ({var,V}) -> fetch_var(V, Bef); - (Other) -> Other - end, Es), + [S1,S2] = cg_reg_args(Es, Bef), Int0 = clear_dead(Bef, Le#l.i, Vdb), Int1 = Int0#sr{reg=put_reg(R, Int0#sr.reg)}, Ret = fetch_reg(R, Int1#sr.reg), {[{put_list,S1,S2,Ret}], Int1, St}; set_cg([{var,R}], {binary,Segs}, Le, Vdb, Bef, #cg{in_catch=InCatch, bfail=Bfail}=St) -> + %% At run-time, binaries are constructed in three stages: + %% 1) First the size of the binary is calculated. + %% 2) Then the binary is allocated. + %% 3) Then each field in the binary is constructed. + %% For simplicity, we use the target register to also hold the + %% size of the binary. Therefore the target register must *not* + %% be one of the source registers. + + %% First allocate the target register. Int0 = Bef#sr{reg=put_reg(R, Bef#sr.reg)}, Target = fetch_reg(R, Int0#sr.reg), - Fail = {f,Bfail}, + + %% Also allocate a scratch register for size calculations. Temp = find_scratch_reg(Int0#sr.reg), + + %% First generate the code that constructs each field. + Fail = {f,Bfail}, PutCode = cg_bin_put(Segs, Fail, Bef), {Sis,Int1} = case InCatch of @@ -1409,6 +1443,8 @@ set_cg([{var,R}], {binary,Segs}, Le, Vdb, Bef, end, MaxRegs = max_reg(Bef#sr.reg), Aft = clear_dead(Int1, Le#l.i, Vdb), + + %% Now generate the complete code for constructing the binary. Code = cg_binary(PutCode, Target, Temp, Fail, MaxRegs, Le#l.a), {Sis++Code,Aft,St}; set_cg([{var,R}], Con, Le, Vdb, Bef, St) -> @@ -1418,10 +1454,8 @@ set_cg([{var,R}], Con, Le, Vdb, Bef, St) -> Ais = case Con of {tuple,Es} -> [{put_tuple,length(Es),Ret}] ++ cg_build_args(Es, Bef); - {var,V} -> % Normally removed by kernel optimizer. - [{move,fetch_var(V, Int),Ret}]; Other -> - [{move,Other,Ret}] + [{move,cg_reg_arg(Other, Int),Ret}] end, {Ais,clear_dead(Int, Le#l.i, Vdb),St}. @@ -1575,8 +1609,7 @@ cg_gen_binsize([], _, _, _, _, Acc) -> Acc. %% cg_bin_opt(Code0) -> Code %% Optimize the size calculations for binary construction. -cg_bin_opt([{move,Size,D},{bs_append,Fail,D,Extra,Regs0,U,Bin,Flags,D}|Is]) -> - Regs = cg_bo_newregs(Regs0, D), +cg_bin_opt([{move,Size,D},{bs_append,Fail,D,Extra,Regs,U,Bin,Flags,D}|Is]) -> cg_bin_opt([{bs_append,Fail,Size,Extra,Regs,U,Bin,Flags,D}|Is]); cg_bin_opt([{move,Size,D},{bs_private_append,Fail,D,U,Bin,Flags,D}|Is]) -> cg_bin_opt([{bs_private_append,Fail,Size,U,Bin,Flags,D}|Is]); @@ -1584,9 +1617,8 @@ cg_bin_opt([{move,{integer,0},D},{bs_add,_,[D,{integer,_}=S,1],Dst}|Is]) -> cg_bin_opt([{move,S,Dst}|Is]); cg_bin_opt([{move,{integer,0},D},{bs_add,Fail,[D,S,U],Dst}|Is]) -> cg_bin_opt([{bs_add,Fail,[{integer,0},S,U],Dst}|Is]); -cg_bin_opt([{move,{integer,Bytes},D},{Op,Fail,D,Extra,Regs0,Flags,D}|Is]) +cg_bin_opt([{move,{integer,Bytes},D},{Op,Fail,D,Extra,Regs,Flags,D}|Is]) when Op =:= bs_init2; Op =:= bs_init_bits -> - Regs = cg_bo_newregs(Regs0, D), cg_bin_opt([{Op,Fail,Bytes,Extra,Regs,Flags,D}|Is]); cg_bin_opt([{move,Src1,Dst},{bs_add,Fail,[Dst,Src2,U],Dst}|Is]) -> cg_bin_opt([{bs_add,Fail,[Src1,Src2,U],Dst}|Is]); @@ -1594,20 +1626,9 @@ cg_bin_opt([I|Is]) -> [I|cg_bin_opt(Is)]; cg_bin_opt([]) -> []. -cg_bo_newregs(R, {x,X}) when R-1 =:= X -> R-1; -cg_bo_newregs(R, _) -> R. - -%% Common for new and old binary code generation. - cg_bin_put({bin_seg,[],S0,U,T,Fs,[E0,Next]}, Fail, Bef) -> - S1 = case S0 of - {var,Sv} -> fetch_var(Sv, Bef); - _ -> S0 - end, - E1 = case E0 of - {var,V} -> fetch_var(V, Bef); - Other -> Other - end, + S1 = cg_reg_arg(S0, Bef), + E1 = cg_reg_arg(E0, Bef), {Format,Op} = case T of integer -> {plain,bs_put_integer}; utf8 -> {utf,bs_put_utf8}; @@ -1625,9 +1646,7 @@ cg_bin_put({bin_seg,[],S0,U,T,Fs,[E0,Next]}, Fail, Bef) -> cg_bin_put({bin_end,[]}, _, _) -> []. cg_build_args(As, Bef) -> - map(fun ({var,V}) -> {put,fetch_var(V, Bef)}; - (Other) -> {put,Other} - end, As). + [{put,cg_reg_arg(A, Bef)} || A <- As]. %% return_cg([Val], Le, Vdb, Bef, St) -> {[Ainstr],Aft,St}. %% break_cg([Val], Le, Vdb, Bef, St) -> {[Ainstr],Aft,St}. @@ -1906,27 +1925,13 @@ fetch_var(V, Sr) -> error -> fetch_stack(V, Sr#sr.stk) end. -% find_var(V, Sr) -> -% case find_reg(V, Sr#sr.reg) of -% {ok,R} -> {ok,R}; -% error -> -% case find_stack(V, Sr#sr.stk) of -% {ok,S} -> {ok,S}; -% error -> error -% end -% end. - load_vars(Vs, Regs) -> foldl(fun ({var,V}, Rs) -> put_reg(V, Rs) end, Regs, Vs). %% put_reg(Val, Regs) -> Regs. -%% free_reg(Val, Regs) -> Regs. %% find_reg(Val, Regs) -> ok{r{R}} | error. %% fetch_reg(Val, Regs) -> r{R}. %% Functions to interface the registers. -%% put_reg puts a value into a free register, -%% load_reg loads a value into a fixed register -%% free_reg frees a register containing a specific value. % put_regs(Vs, Rs) -> foldl(fun put_reg/2, Rs, Vs). @@ -1937,10 +1942,6 @@ put_reg_1(V, [{reserved,I,V}|Rs], I) -> [{I,V}|Rs]; put_reg_1(V, [R|Rs], I) -> [R|put_reg_1(V, Rs, I+1)]; put_reg_1(V, [], I) -> [{I,V}]. -% free_reg(V, [{I,V}|Rs]) -> [free|Rs]; -% free_reg(V, [R|Rs]) -> [R|free_reg(V, Rs)]; -% free_reg(V, []) -> []. - fetch_reg(V, [{I,V}|_]) -> {x,I}; fetch_reg(V, [_|SRs]) -> fetch_reg(V, SRs). @@ -1957,9 +1958,6 @@ find_scratch_reg([free|_], I) -> {x,I}; find_scratch_reg([_|Rs], I) -> find_scratch_reg(Rs, I+1); find_scratch_reg([], I) -> {x,I}. -%%copy_reg(Val, R, Regs) -> load_reg(Val, R, Regs). -%%move_reg(Val, R, Regs) -> load_reg(Val, R, free_reg(Val, Regs)). - replace_reg_contents(Old, New, [{I,Old}|Rs]) -> [{I,New}|Rs]; replace_reg_contents(Old, New, [R|Rs]) -> [R|replace_reg_contents(Old, New, Rs)]. diff --git a/lib/compiler/src/v3_kernel.erl b/lib/compiler/src/v3_kernel.erl index b184987625..8ef71e1346 100644 --- a/lib/compiler/src/v3_kernel.erl +++ b/lib/compiler/src/v3_kernel.erl @@ -81,7 +81,7 @@ -export([module/2,format_error/1]). -import(lists, [map/2,foldl/3,foldr/3,mapfoldl/3,splitwith/2,member/2, - keymember/3,keyfind/3]). + keymember/3,keyfind/3,partition/2]). -import(ordsets, [add_element/2,del_element/2,union/2,union/1,subtract/2]). -import(cerl, [c_tuple/1]). @@ -1081,9 +1081,44 @@ select_bin_con(Cs0) -> end, Cs0), select_bin_con_1(Cs1). + select_bin_con_1(Cs) -> try - select_bin_int(Cs) + %% The usual way to match literals is to first extract the + %% value to a register, and then compare the register to the + %% literal value. Extracting the value is good if we need + %% compare it more than once. + %% + %% But we would like to combine the extracting and the + %% comparing into a single instruction if we know that + %% a binary segment must contain specific integer value + %% or the matching will fail, like in this example: + %% + %% <<42:8,...>> -> + %% <<42:8,...>> -> + %% . + %% . + %% . + %% <<42:8,...>> -> + %% <<>> -> + %% + %% The first segment must either contain the integer 42 + %% or the binary must end for the match to succeed. + %% + %% The way we do is to replace the generic #k_bin_seg{} + %% record with a #k_bin_int{} record if all clauses will + %% select the same literal integer (except for one or more + %% clauses that will end the binary). + + {BinSegs0,BinEnd} = + partition(fun (C) -> + clause_con(C) =:= k_bin_seg + end, Cs), + BinSegs = select_bin_int(BinSegs0), + case BinEnd of + [] -> BinSegs; + [_|_] -> BinSegs ++ [{k_bin_end,BinEnd}] + end catch throw:not_possible -> select_bin_con_2(Cs) @@ -1097,7 +1132,7 @@ select_bin_con_2([]) -> []. %% select_bin_int([Clause]) -> {k_bin_int,[Clause]} %% If the first pattern in each clause selects the same integer, -%% rewrite all clauses to use #k_bin_int{} (which will later to +%% rewrite all clauses to use #k_bin_int{} (which will later be %% translated to a bs_match_string/4 instruction). %% %% If it is not possible to do this rewrite, a 'not_possible' @@ -1346,7 +1381,7 @@ clause_arg(#iclause{pats=[Arg|_]}) -> Arg. clause_con(C) -> arg_con(clause_arg(C)). -clause_val(C) -> arg_val(clause_arg(C)). +clause_val(C) -> arg_val(clause_arg(C), C). is_var_clause(C) -> clause_con(C) =:= k_var. @@ -1377,7 +1412,7 @@ arg_con(Arg) -> #k_var{} -> k_var end. -arg_val(Arg) -> +arg_val(Arg, C) -> case arg_arg(Arg) of #k_literal{val=Lit} -> Lit; #k_int{val=I} -> I; @@ -1385,7 +1420,13 @@ arg_val(Arg) -> #k_atom{val=A} -> A; #k_tuple{es=Es} -> length(Es); #k_bin_seg{size=S,unit=U,type=T,flags=Fs} -> - {set_kanno(S, []),U,T,Fs} + case S of + #k_var{name=V} -> + #iclause{isub=Isub} = C, + {#k_var{name=get_vsub(V, Isub)},U,T,Fs}; + _ -> + {set_kanno(S, []),U,T,Fs} + end end. %% ubody_used_vars(Expr, State) -> [UsedVar] |