aboutsummaryrefslogtreecommitdiffstats
path: root/lib/compiler
diff options
context:
space:
mode:
authorBjörn Gustavsson <[email protected]>2018-02-01 08:33:10 +0100
committerBjörn Gustavsson <[email protected]>2018-08-24 09:57:06 +0200
commit6bee2ac7d11668888d93ec4f93730bcae3e5fa79 (patch)
treedf6c6be429ccacfa9c1cf7ea07f890892f73b461 /lib/compiler
parent004257f6fc1ea9efea1c99a93211e2f39b1d14ad (diff)
downloadotp-6bee2ac7d11668888d93ec4f93730bcae3e5fa79.tar.gz
otp-6bee2ac7d11668888d93ec4f93730bcae3e5fa79.tar.bz2
otp-6bee2ac7d11668888d93ec4f93730bcae3e5fa79.zip
Introduce a new SSA-based intermediate format
v3_codegen is replaced by three new passes: * beam_kernel_to_ssa which translates the Kernel Erlang format to a new SSA-based intermediate format. * beam_ssa_pre_codegen which prepares the SSA-based format for code generation, including register allocation. Registers are allocated using the linear scan algorithm. * beam_ssa_codegen which generates BEAM assembly code from the SSA-based format. It easier and more effective to optimize the SSA-based format before X and Y registers have been assigned. The current optimization passes constantly have to make sure no "holes" in the X register assignments are created (that is, that no X register becomes undefined that an allocation instruction depends on). This commit also introduces the following optimizations: * Replacing of tuple matching of records with the is_tagged_tuple instruction. (Replacing beam_record.) * Sinking of get_tuple_element instructions to just before the first use of the extracted values. As well as potentially avoiding extracting tuple elements when they are not actually used on all executions paths, this optimization could also reduce the number values that will need to be stored in Y registers. (Similar to beam_reorder, but more effective.) * Live optimizations, removing the definition of a variable that is not subsequently used (provided that the operation has no side effects), as well strength reduction of binary matching by replacing the extraction of value from a binary with a skip instruction. (Used to be done by beam_block, beam_utils, and v3_codegen.) * Removal of redundant bs_restore2 instructions. (Formerly done by beam_bs.) * Type-based optimizations across branches. More effective than the old beam_type pass that only did type-based optimizations in basic blocks. * Optimization of floating point instructions. (Formerly done by beam_type.) * Optimization of receive statements to introduce recv_mark and recv_set instructions. More effective with far fewer restrictions on what instructions are allowed between creating the reference and entering the receive statement. * Common subexpression elimination. (Formerly done by beam_block.)
Diffstat (limited to 'lib/compiler')
-rw-r--r--lib/compiler/src/Makefile22
-rw-r--r--lib/compiler/src/beam_a.erl17
-rw-r--r--lib/compiler/src/beam_block.erl16
-rw-r--r--lib/compiler/src/beam_clean.erl143
-rw-r--r--lib/compiler/src/beam_dead.erl20
-rw-r--r--lib/compiler/src/beam_flatten.erl4
-rw-r--r--lib/compiler/src/beam_jump.erl6
-rw-r--r--lib/compiler/src/beam_kernel_to_ssa.erl1330
-rw-r--r--lib/compiler/src/beam_listing.erl7
-rw-r--r--lib/compiler/src/beam_peep.erl69
-rw-r--r--lib/compiler/src/beam_split.erl4
-rw-r--r--lib/compiler/src/beam_ssa.erl616
-rw-r--r--lib/compiler/src/beam_ssa.hrl66
-rw-r--r--lib/compiler/src/beam_ssa_codegen.erl1827
-rw-r--r--lib/compiler/src/beam_ssa_lint.erl349
-rw-r--r--lib/compiler/src/beam_ssa_opt.erl1307
-rw-r--r--lib/compiler/src/beam_ssa_pp.erl238
-rw-r--r--lib/compiler/src/beam_ssa_pre_codegen.erl2437
-rw-r--r--lib/compiler/src/beam_ssa_recv.erl281
-rw-r--r--lib/compiler/src/beam_ssa_type.erl1106
-rw-r--r--lib/compiler/src/beam_utils.erl3
-rw-r--r--lib/compiler/src/compile.erl116
-rw-r--r--lib/compiler/src/compiler.app.src10
-rw-r--r--lib/compiler/src/v3_kernel.erl11
-rw-r--r--lib/compiler/src/v3_kernel.hrl2
-rw-r--r--lib/compiler/test/Makefile8
-rw-r--r--lib/compiler/test/beam_ssa_SUITE.erl290
-rw-r--r--lib/compiler/test/beam_type_SUITE.erl54
-rw-r--r--lib/compiler/test/bs_match_SUITE.erl44
-rw-r--r--lib/compiler/test/compilation_SUITE.erl6
-rw-r--r--lib/compiler/test/compile_SUITE.erl27
-rw-r--r--lib/compiler/test/core_fold_SUITE.erl2
-rw-r--r--lib/compiler/test/guard_SUITE.erl15
-rw-r--r--lib/compiler/test/inline_SUITE.erl12
-rw-r--r--lib/compiler/test/misc_SUITE.erl53
-rw-r--r--lib/compiler/test/receive_SUITE_data/ref_opt/no_5.erl38
-rw-r--r--lib/compiler/test/receive_SUITE_data/ref_opt/yes_14.S71
-rw-r--r--lib/compiler/test/receive_SUITE_data/ref_opt/yes_14.erl27
-rw-r--r--lib/compiler/test/test_lib.erl8
39 files changed, 10342 insertions, 320 deletions
diff --git a/lib/compiler/src/Makefile b/lib/compiler/src/Makefile
index 08042aeba4..f036a2d8dc 100644
--- a/lib/compiler/src/Makefile
+++ b/lib/compiler/src/Makefile
@@ -62,6 +62,15 @@ MODULES = \
beam_opcodes \
beam_peep \
beam_split \
+ beam_ssa \
+ beam_ssa_codegen \
+ beam_ssa_lint \
+ beam_ssa_opt \
+ beam_ssa_pp \
+ beam_ssa_pre_codegen \
+ beam_ssa_recv \
+ beam_ssa_type \
+ beam_kernel_to_ssa \
beam_trim \
beam_utils \
beam_validator \
@@ -86,7 +95,6 @@ MODULES = \
sys_core_fold_lists \
sys_core_inline \
sys_pre_attributes \
- v3_codegen \
v3_core \
v3_kernel \
v3_kernel_pp
@@ -95,6 +103,7 @@ BEAM_H = $(wildcard ../priv/beam_h/*.h)
HRL_FILES= \
beam_disasm.hrl \
+ beam_ssa.hrl \
core_parse.hrl \
v3_kernel.hrl
@@ -181,7 +190,16 @@ release_docs_spec:
# ----------------------------------------------------
$(EBIN)/beam_disasm.beam: $(EGEN)/beam_opcodes.hrl beam_disasm.hrl
-$(EBIN)/beam_listing.beam: core_parse.hrl v3_kernel.hrl
+$(EBIN)/beam_listing.beam: core_parse.hrl v3_kernel.hrl beam_ssa.hrl
+$(EBIN)/beam_kernel_to_ssa.beam: v3_kernel.hrl beam_ssa.hrl
+$(EBIN)/beam_ssa.beam: beam_ssa.hrl
+$(EBIN)/beam_ssa_codegen.beam: beam_ssa.hrl
+$(EBIN)/beam_ssa_lint.beam: beam_ssa.hrl
+$(EBIN)/beam_ssa_opt.beam: beam_ssa.hrl
+$(EBIN)/beam_ssa_pp.beam: beam_ssa.hrl
+$(EBIN)/beam_ssa_pre_codegen.beam: beam_ssa.hrl
+$(EBIN)/beam_ssa_recv.beam: beam_ssa.hrl
+$(EBIN)/beam_ssa_type.beam: beam_ssa.hrl
$(EBIN)/cerl.beam: core_parse.hrl
$(EBIN)/compile.beam: core_parse.hrl ../../stdlib/include/erl_compile.hrl
$(EBIN)/core_lib.beam: core_parse.hrl
diff --git a/lib/compiler/src/beam_a.erl b/lib/compiler/src/beam_a.erl
index a26f245446..266e8f46c8 100644
--- a/lib/compiler/src/beam_a.erl
+++ b/lib/compiler/src/beam_a.erl
@@ -39,8 +39,13 @@ function({function,Name,Arity,CLabel,Is0}) ->
%% Remove unusued labels for cleanliness and to help
%% optimization passes and HiPE.
- Is = beam_jump:remove_unused_labels(Is1),
- {function,Name,Arity,CLabel,Is}
+ Is2 = beam_jump:remove_unused_labels(Is1),
+
+ %% Some optimization passes can't handle consecutive labels.
+ %% Coalesce multiple consecutive labels.
+ Is = coalesce_consecutive_labels(Is2, [], []),
+
+ {function,Name,Arity,CLabel,Is}
catch
Class:Error:Stack ->
io:fwrite("Function: ~w/~w\n", [Name,Arity]),
@@ -122,3 +127,11 @@ rename_instr({select_tuple_arity=I,Reg,Fail,{list,List}}) ->
rename_instr(send) ->
{call_ext,2,send};
rename_instr(I) -> I.
+
+coalesce_consecutive_labels([{label,L}=Lbl,{label,Alias}|Is], Replace, Acc) ->
+ coalesce_consecutive_labels([Lbl|Is], [{Alias,L}|Replace], Acc);
+coalesce_consecutive_labels([I|Is], Replace, Acc) ->
+ coalesce_consecutive_labels(Is, Replace, [I|Acc]);
+coalesce_consecutive_labels([], Replace, Acc) ->
+ D = maps:from_list(Replace),
+ beam_utils:replace_labels(Acc, [], D, fun(L) -> L end).
diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl
index 3e28ff2c01..c928fc7187 100644
--- a/lib/compiler/src/beam_block.erl
+++ b/lib/compiler/src/beam_block.erl
@@ -17,13 +17,12 @@
%%
%% %CopyrightEnd%
%%
-%% Purpose : Partitions assembly instructions into basic blocks and
-%% optimizes them.
+%% Purpose: Partition BEAM instructions into basic blocks.
-module(beam_block).
-export([module/2]).
--import(lists, [reverse/1]).
+-import(lists, [reverse/1,splitwith/2]).
-spec module(beam_utils:module_code(), [compile:option()]) ->
{'ok',beam_utils:module_code()}.
@@ -34,11 +33,8 @@ module({Mod,Exp,Attr,Fs0,Lc}, _Opts) ->
function({function,Name,Arity,CLabel,Is0}) ->
try
- %% Collect basic blocks and optimize them.
Is1 = blockify(Is0),
Is = embed_lines(Is1),
-
- %% Done.
{function,Name,Arity,CLabel,Is}
catch
Class:Error:Stack ->
@@ -69,12 +65,10 @@ collect_block(Is) ->
collect_block(Is, []).
collect_block([{allocate,N,R}|Is0], Acc) ->
- {Inits,Is} = lists:splitwith(fun ({init,{y,_}}) -> true;
- (_) -> false
- end, Is0),
+ {Inits,Is} = splitwith(fun ({init,{y,_}}) -> true;
+ (_) -> false
+ end, Is0),
collect_block(Is, [{set,[],[],{alloc,R,{nozero,N,0,Inits}}}|Acc]);
-collect_block([{allocate_zero,Ns,R},{test_heap,Nh,R}|Is], Acc) ->
- collect_block(Is, [{set,[],[],{alloc,R,{zero,Ns,Nh,[]}}}|Acc]);
collect_block([I|Is]=Is0, Acc) ->
case collect(I) of
error -> {reverse(Acc),Is0};
diff --git a/lib/compiler/src/beam_clean.erl b/lib/compiler/src/beam_clean.erl
index 5a9023c259..f5f0ac2218 100644
--- a/lib/compiler/src/beam_clean.erl
+++ b/lib/compiler/src/beam_clean.erl
@@ -23,7 +23,7 @@
-export([module/2]).
-export([clean_labels/1]).
--import(lists, [foldl/3,reverse/1]).
+-import(lists, [foldl/3]).
-spec module(beam_utils:module_code(), [compile:option()]) ->
{'ok',beam_utils:module_code()}.
@@ -36,8 +36,7 @@ module({Mod,Exp,Attr,Fs0,_}, Opts) ->
Used = find_all_used(WorkList, All, sets:from_list(WorkList)),
Fs1 = remove_unused(Order, Used, All),
{Fs2,Lc} = clean_labels(Fs1),
- Fs3 = bs_fix(Fs2),
- Fs = maybe_remove_lines(Fs3, Opts),
+ Fs = maybe_remove_lines(Fs2, Opts),
{ok,{Mod,Exp,Attr,Fs,Lc}}.
%% Determine the rootset, i.e. exported functions and
@@ -87,12 +86,8 @@ add_to_work_list(F, {Fs,Used}=Sets) ->
%%%
%%% Coalesce adjacent labels. Renumber all labels to eliminate gaps.
-%%% This cleanup will slightly reduce file size and slightly speed up loading.
-%%%
-%%% We also expand is_record/3 to a sequence of instructions. It is done
-%%% here merely because this module will always be called even if optimization
-%%% is turned off. We don't want to do the expansion in beam_asm because we
-%%% want to see the expanded code in a .S file.
+%%% This cleanup will slightly reduce file size and slightly speed up
+%%% loading.
%%%
-type label() :: beam_asm:label().
@@ -117,45 +112,6 @@ function_renumber([{function,Name,Arity,_Entry,Asm0}|Fs], St0, Acc) ->
function_renumber(Fs, St, [{function,Name,Arity,St#st.entry,Asm}|Acc]);
function_renumber([], St, Acc) -> {Acc,St}.
-renumber_labels([{bif,is_record,{f,_},
- [Term,{atom,Tag}=TagAtom,{integer,Arity}],Dst}|Is0], Acc, St) ->
- ContLabel = 900000000+2*St#st.lc,
- FailLabel = ContLabel+1,
- Fail = {f,FailLabel},
- Tmp = Dst,
- Is = case is_record_tuple(Term, Tag, Arity) of
- yes ->
- [{move,{atom,true},Dst}|Is0];
- no ->
- [{move,{atom,false},Dst}|Is0];
- maybe ->
- [{test,is_tuple,Fail,[Term]},
- {test,test_arity,Fail,[Term,Arity]},
- {get_tuple_element,Term,0,Tmp},
- {test,is_eq_exact,Fail,[Tmp,TagAtom]},
- {move,{atom,true},Dst},
- {jump,{f,ContLabel}},
- {label,FailLabel},
- {move,{atom,false},Dst},
- {jump,{f,ContLabel}}, %Improves optimization by beam_dead.
- {label,ContLabel}|Is0]
- end,
- renumber_labels(Is, Acc, St);
-renumber_labels([{test,is_record,{f,_}=Fail,
- [Term,{atom,Tag}=TagAtom,{integer,Arity}]}|Is0], Acc, St) ->
- Tmp = {x,1022},
- Is = case is_record_tuple(Term, Tag, Arity) of
- yes ->
- Is0;
- no ->
- [{jump,Fail}|Is0];
- maybe ->
- [{test,is_tuple,Fail,[Term]},
- {test,test_arity,Fail,[Term,Arity]},
- {get_tuple_element,Term,0,Tmp},
- {test,is_eq_exact,Fail,[Tmp,TagAtom]}|Is0]
- end,
- renumber_labels(Is, Acc, St);
renumber_labels([{label,Old}|Is], [{label,New}|_]=Acc, #st{lmap=D0}=St) ->
D = [{Old,New}|D0],
renumber_labels(Is, Acc, St#st{lmap=D});
@@ -169,12 +125,6 @@ renumber_labels([I|Is], Acc, St0) ->
renumber_labels(Is, [I|Acc], St0);
renumber_labels([], Acc, St) -> {Acc,St}.
-is_record_tuple({x,_}, _, _) -> maybe;
-is_record_tuple({y,_}, _, _) -> maybe;
-is_record_tuple({literal,Tuple}, Tag, Arity)
- when element(1, Tuple) =:= Tag, tuple_size(Tuple) =:= Arity -> yes;
-is_record_tuple(_, _, _) -> no.
-
function_replace([{function,Name,Arity,Entry,Asm0}|Fs], Dict, Acc) ->
Asm = try
Fb = fun(Old) -> throw({error,{undefined_label,Old}}) end,
@@ -189,91 +139,6 @@ function_replace([{function,Name,Arity,Entry,Asm0}|Fs], Dict, Acc) ->
function_replace([], _, Acc) -> Acc.
%%%
-%%% Final fixup of bs_start_match2/5,bs_save2/bs_restore2 instructions for
-%%% new bit syntax matching (introduced in R11B).
-%%%
-%%% Pass 1: Scan the code, looking for bs_restore2/2 instructions.
-%%%
-%%% Pass 2: Update bs_save2/2 and bs_restore/2 instructions. Remove
-%%% any bs_save2/2 instruction whose save position are never referenced
-%%% by any bs_restore2/2 instruction.
-%%%
-%%% Note this module can be invoked several times, so we must be careful
-%%% not to touch instructions that have already been fixed up.
-%%%
-
-bs_fix(Fs) ->
- bs_fix(Fs, []).
-
-bs_fix([{function,Name,Arity,Entry,Asm0}|Fs], Acc) ->
- Asm = bs_function(Asm0),
- bs_fix(Fs, [{function,Name,Arity,Entry,Asm}|Acc]);
-bs_fix([], Acc) -> reverse(Acc).
-
-bs_function(Is) ->
- Dict0 = bs_restores(Is, []),
- S0 = sofs:relation(Dict0, [{context,save_point}]),
- S1 = sofs:relation_to_family(S0),
- S = sofs:to_external(S1),
- Dict = make_save_point_dict(S, []),
- bs_replace(Is, Dict, []).
-
-make_save_point_dict([{Ctx,Pts}|T], Acc0) ->
- Acc = make_save_point_dict_1(Pts, Ctx, 0, Acc0),
- make_save_point_dict(T, Acc);
-make_save_point_dict([], Acc) ->
- gb_trees:from_orddict(ordsets:from_list(Acc)).
-
-make_save_point_dict_1([H|T], Ctx, I, Acc) ->
- make_save_point_dict_1(T, Ctx, I+1, [{{Ctx,H},I}|Acc]);
-make_save_point_dict_1([], Ctx, I, Acc) ->
- [{Ctx,I}|Acc].
-
-%% Pass 1.
-bs_restores([{bs_restore2,_,{Same,Same}}|Is], Dict) ->
- %% This save point is special. No explicit save is needed.
- bs_restores(Is, Dict);
-bs_restores([{bs_restore2,_,{atom,start}}|Is], Dict) ->
- %% This instruction can occur if "compilation"
- %% started from a .S file.
- bs_restores(Is, Dict);
-bs_restores([{bs_restore2,_,{_,_}=SavePoint}|Is], Dict) ->
- bs_restores(Is, [SavePoint|Dict]);
-bs_restores([_|Is], Dict) ->
- bs_restores(Is, Dict);
-bs_restores([], Dict) -> Dict.
-
-%% Pass 2.
-bs_replace([{test,bs_start_match2,F,Live,[Src,{context,Ctx}],CtxR}|T], Dict, Acc) ->
- Slots = case gb_trees:lookup(Ctx, Dict) of
- {value,Slots0} -> Slots0;
- none -> 0
- end,
- I = {test,bs_start_match2,F,Live,[Src,Slots],CtxR},
- bs_replace(T, Dict, [I|Acc]);
-bs_replace([{bs_save2,CtxR,{_,_}=SavePoint}|T], Dict, Acc) ->
- case gb_trees:lookup(SavePoint, Dict) of
- {value,N} ->
- bs_replace(T, Dict, [{bs_save2,CtxR,N}|Acc]);
- none ->
- bs_replace(T, Dict, Acc)
- end;
-bs_replace([{bs_restore2,_,{atom,start}}=I|T], Dict, Acc) ->
- %% This instruction can occur if "compilation"
- %% started from a .S file.
- bs_replace(T, Dict, [I|Acc]);
-bs_replace([{bs_restore2,CtxR,{Same,Same}}|T], Dict, Acc) ->
- %% This save point refers to the point in the binary where the match
- %% started. It has a special name.
- bs_replace(T, Dict, [{bs_restore2,CtxR,{atom,start}}|Acc]);
-bs_replace([{bs_restore2,CtxR,{_,_}=SavePoint}|T], Dict, Acc) ->
- N = gb_trees:get(SavePoint, Dict),
- bs_replace(T, Dict, [{bs_restore2,CtxR,N}|Acc]);
-bs_replace([I|Is], Dict, Acc) ->
- bs_replace(Is, Dict, [I|Acc]);
-bs_replace([], _, Acc) -> reverse(Acc).
-
-%%%
%%% Remove line instructions if requested.
%%%
diff --git a/lib/compiler/src/beam_dead.erl b/lib/compiler/src/beam_dead.erl
index 1004f6048b..546f0461b9 100644
--- a/lib/compiler/src/beam_dead.erl
+++ b/lib/compiler/src/beam_dead.erl
@@ -325,25 +325,11 @@ backward([{test,Op,{f,To0},Ops0}|Is], D, Acc) ->
is_eq_exact -> combine_eqs(To, Ops0, D, Acc);
_ -> {test,Op,{f,To},Ops0}
end,
- case {I,Acc} of
- {{test,is_atom,Fail,Ops0},[{test,is_boolean,Fail,Ops0}|_]} ->
- %% An is_atom test before an is_boolean test (with the
- %% same failure label) is redundant.
- backward(Is, D, Acc);
- {{test,is_atom,Fail,[R]},
- [{test,is_eq_exact,Fail,[R,{atom,_}]}|_]} ->
- %% An is_atom test before a comparison with an atom (with
- %% the same failure label) is redundant.
- backward(Is, D, Acc);
- {{test,is_integer,Fail,[R]},
- [{test,is_eq_exact,Fail,[R,{integer,_}]}|_]} ->
- %% An is_integer test before a comparison with an integer
- %% (with the same failure label) is redundant.
- backward(Is, D, Acc);
- {{test,_,_,_},_} ->
+ case I of
+ {test,_,_,_} ->
%% Still a test instruction. Done.
backward(Is, D, [I|Acc]);
- {_,_} ->
+ _ ->
%% Rewritten to a select_val. Rescan.
backward([I|Is], D, Acc)
end;
diff --git a/lib/compiler/src/beam_flatten.erl b/lib/compiler/src/beam_flatten.erl
index 20bd23a912..9a1c5a1021 100644
--- a/lib/compiler/src/beam_flatten.erl
+++ b/lib/compiler/src/beam_flatten.erl
@@ -43,13 +43,15 @@ block([{block,Is0}|Is1], Acc) -> block(Is1, norm_block(Is0, Acc));
block([I|Is], Acc) -> block(Is, [I|Acc]);
block([], Acc) -> reverse(Acc).
-norm_block([{set,[],[],{alloc,R,Alloc}}|Is], Acc0) ->
+norm_block([{set,[],[],{alloc,R,{_,nostack,_,_}=Alloc}}|Is], Acc0) ->
case insert_alloc_in_bs_init(Acc0, Alloc) of
impossible ->
norm_block(Is, reverse(norm_allocate(Alloc, R), Acc0));
Acc ->
norm_block(Is, Acc)
end;
+norm_block([{set,[],[],{alloc,R,Alloc}}|Is], Acc0) ->
+ norm_block(Is, reverse(norm_allocate(Alloc, R), Acc0));
norm_block([{set,[D1],[S],get_hd},{set,[D2],[S],get_tl}|Is], Acc) ->
I = {get_list,S,D1,D2},
norm_block(Is, [I|Acc]);
diff --git a/lib/compiler/src/beam_jump.erl b/lib/compiler/src/beam_jump.erl
index 43084ad588..42b4cdaf4f 100644
--- a/lib/compiler/src/beam_jump.erl
+++ b/lib/compiler/src/beam_jump.erl
@@ -610,6 +610,12 @@ ulbl({put_map,Lbl,_Op,_Src,_Dst,_Live,_List}, Used) ->
mark_used(Lbl, Used);
ulbl({get_map_elements,Lbl,_Src,_List}, Used) ->
mark_used(Lbl, Used);
+ulbl({recv_mark,Lbl}, Used) ->
+ mark_used(Lbl, Used);
+ulbl({recv_set,Lbl}, Used) ->
+ mark_used(Lbl, Used);
+ulbl({fcheckerror,Lbl}, Used) ->
+ mark_used(Lbl, Used);
ulbl(_, Used) -> Used.
mark_used({f,0}, Used) -> Used;
diff --git a/lib/compiler/src/beam_kernel_to_ssa.erl b/lib/compiler/src/beam_kernel_to_ssa.erl
new file mode 100644
index 0000000000..c55a57ab32
--- /dev/null
+++ b/lib/compiler/src/beam_kernel_to_ssa.erl
@@ -0,0 +1,1330 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2018. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+%% Purpose: Convert the Kernel Erlang format to the SSA format.
+
+-module(beam_kernel_to_ssa).
+
+%% The main interface.
+-export([module/2]).
+
+-import(lists, [append/1,duplicate/2,flatmap/2,foldl/3,
+ keysort/2,mapfoldl/3,map/2,member/2,
+ reverse/1,reverse/2,sort/1]).
+
+-include("v3_kernel.hrl").
+-include("beam_ssa.hrl").
+
+-type label() :: beam_ssa:label().
+
+%% Main codegen structure.
+-record(cg, {lcount=1 :: label(), %Label counter
+ bfail=1 :: label(),
+ catch_label=none :: 'none' | label(),
+ vars=#{} :: map(), %Defined variables.
+ break=0 :: label(), %Break label
+ recv=0 :: label(), %Receive label
+ ultimate_failure=0 :: label() %Label for ultimate match failure.
+ }).
+
+%% Internal records.
+-record(cg_break, {args :: [beam_ssa:value()],
+ phi :: label()
+ }).
+-record(cg_phi, {vars :: [beam_ssa:b_var()]
+ }).
+-record(cg_unreachable, {}).
+
+-spec module(#k_mdef{}, [compile:option()]) -> {'ok',#b_module{}}.
+
+module(#k_mdef{name=Mod,exports=Es,attributes=Attr,body=Forms}, _Opts) ->
+ Body = functions(Forms, Mod),
+ Module = #b_module{name=Mod,exports=Es,attributes=Attr,body=Body},
+ {ok,Module}.
+
+functions(Forms, Mod) ->
+ [function(F, Mod) || F <- Forms].
+
+function(#k_fdef{anno=Anno0,func=Name,arity=Arity,
+ vars=As0,body=Kb}, Mod) ->
+ try
+ #k_match{} = Kb, %Assertion.
+
+ %% Generate the SSA form immediate format.
+ St0 = #cg{},
+ {As,St1} = new_ssa_vars(As0, St0),
+ {Asm,St} = cg_fun(Kb, St1),
+ Anno1 = line_anno(Anno0),
+ Anno = Anno1#{func_info=>{Mod,Name,Arity}},
+ #b_function{anno=Anno,args=As,bs=Asm,cnt=St#cg.lcount}
+ catch
+ Class:Error:Stack ->
+ io:fwrite("Function: ~w/~w\n", [Name,Arity]),
+ erlang:raise(Class, Error, Stack)
+ end.
+
+%% cg_fun([Lkexpr], [HeadVar], State) -> {[Ainstr],State}
+
+cg_fun(Ke, St0) ->
+ {UltimateFail,FailIs,St1} = make_failure(badarg, St0),
+ St2 = St1#cg{bfail=UltimateFail,ultimate_failure=UltimateFail},
+ {B,St} = cg(Ke, St2),
+ Asm = [{label,0}|B++FailIs],
+ finalize(Asm, St).
+
+make_failure(Reason, St0) ->
+ {Lbl,St1} = new_label(St0),
+ {Dst,St} = new_ssa_var('@ssa_ret', St1),
+ Is = [{label,Lbl},
+ #b_set{op=call,dst=Dst,
+ args=[#b_remote{mod=#b_literal{val=erlang},
+ name=#b_literal{val=error},
+ arity=1},
+ #b_literal{val=Reason}]},
+ #b_ret{arg=Dst}],
+ {Lbl,Is,St}.
+
+%% cg(Lkexpr, State) -> {[Ainstr],State}.
+%% Generate code for a kexpr.
+
+cg(#k_match{body=M,ret=Rs}, St) ->
+ do_match_cg(M, Rs, St);
+cg(#k_guard_match{body=M,ret=Rs}, St) ->
+ do_match_cg(M, Rs, St);
+cg(#k_seq{arg=Arg,body=Body}, St0) ->
+ {ArgIs,St1} = cg(Arg, St0),
+ {BodyIs,St} = cg(Body, St1),
+ {ArgIs++BodyIs,St};
+cg(#k_call{anno=Le,op=Func,args=As,ret=Rs}, St) ->
+ call_cg(Func, As, Rs, Le, St);
+cg(#k_enter{anno=Le,op=Func,args=As}, St) ->
+ enter_cg(Func, As, Le, St);
+cg(#k_bif{anno=Le}=Bif, St) ->
+ bif_cg(Bif, Le, St);
+cg(#k_try{arg=Ta,vars=Vs,body=Tb,evars=Evs,handler=Th,ret=Rs}, St) ->
+ try_cg(Ta, Vs, Tb, Evs, Th, Rs, St);
+cg(#k_try_enter{arg=Ta,vars=Vs,body=Tb,evars=Evs,handler=Th}, St) ->
+ try_enter_cg(Ta, Vs, Tb, Evs, Th, St);
+cg(#k_catch{body=Cb,ret=[R]}, St) ->
+ do_catch_cg(Cb, R, St);
+cg(#k_receive{anno=Le,timeout=Te,var=Rvar,body=Rm,action=Tes,ret=Rs}, St) ->
+ recv_loop_cg(Te, Rvar, Rm, Tes, Rs, Le, St);
+cg(#k_receive_next{}, #cg{recv=Recv}=St) ->
+ Is = [#b_set{op=recv_next},make_uncond_branch(Recv)],
+ {Is,St};
+cg(#k_receive_accept{}, St) ->
+ Remove = #b_set{op=remove_message},
+ {[Remove],St};
+cg(#k_put{anno=Le,arg=Con,ret=Var}, St) ->
+ put_cg(Var, Con, Le, St);
+cg(#k_return{args=[Ret0]}, St) ->
+ Ret = ssa_arg(Ret0, St),
+ {[#b_ret{arg=Ret}],St};
+cg(#k_break{args=Bs}, #cg{break=Br}=St) ->
+ Args = ssa_args(Bs, St),
+ {[#cg_break{args=Args,phi=Br}],St};
+cg(#k_guard_break{args=Bs}, St) ->
+ cg(#k_break{args=Bs}, St).
+
+%% match_cg(Matc, [Ret], State) -> {[Ainstr],State}.
+%% Generate code for a match.
+
+do_match_cg(M, Rs, St0) ->
+ {B,St1} = new_label(St0),
+ {Mis,St2} = match_cg(M, St1#cg.bfail, St1#cg{break=B}),
+ {BreakVars,St} = new_ssa_vars(Rs, St2),
+ {Mis ++ [{label,B},#cg_phi{vars=BreakVars}],
+ St#cg{bfail=St0#cg.bfail,break=St1#cg.break}}.
+
+%% match_cg(Match, Fail, State) -> {[Ainstr],State}.
+%% Generate code for a match tree.
+
+match_cg(#k_alt{first=F,then=S}, Fail, St0) ->
+ {Tf,St1} = new_label(St0),
+ {Fis,St2} = match_cg(F, Tf, St1),
+ {Sis,St3} = match_cg(S, Fail, St2),
+ {Fis ++ [{label,Tf}] ++ Sis,St3};
+match_cg(#k_select{var=#k_var{}=V,types=Scs}, Fail, St) ->
+ match_fmf(fun (S, F, Sta) ->
+ select_cg(S, V, F, Fail, Sta)
+ end, Fail, St, Scs);
+match_cg(#k_guard{clauses=Gcs}, Fail, St) ->
+ match_fmf(fun (G, F, Sta) ->
+ guard_clause_cg(G, F, Sta)
+ end, Fail, St, Gcs);
+match_cg(Ke, _Fail, St0) ->
+ cg(Ke, St0).
+
+%% select_cg(Sclause, V, TypeFail, ValueFail, State) -> {Is,State}.
+%% Selecting type and value needs two failure labels, TypeFail is the
+%% label to jump to of the next type test when this type fails, and
+%% ValueFail is the label when this type is correct but the value is
+%% wrong. These are different as in the second case there is no need
+%% to try the next type, it will always fail.
+
+select_cg(#k_type_clause{type=k_binary,values=[S]}, Var, Tf, Vf, St) ->
+ select_binary(S, Var, Tf, Vf, St);
+select_cg(#k_type_clause{type=k_bin_seg,values=Vs}, Var, Tf, _Vf, St) ->
+ select_bin_segs(Vs, Var, Tf, St);
+select_cg(#k_type_clause{type=k_bin_int,values=Vs}, Var, Tf, _Vf, St) ->
+ select_bin_segs(Vs, Var, Tf, St);
+select_cg(#k_type_clause{type=k_bin_end,values=[S]}, Var, Tf, _Vf, St) ->
+ select_bin_end(S, Var, Tf, St);
+select_cg(#k_type_clause{type=k_map,values=Vs}, Var, Tf, Vf, St) ->
+ select_map(Vs, Var, Tf, Vf, St);
+select_cg(#k_type_clause{type=k_cons,values=[S]}, Var, Tf, Vf, St) ->
+ select_cons(S, Var, Tf, Vf, St);
+select_cg(#k_type_clause{type=k_nil,values=[S]}, Var, Tf, Vf, St) ->
+ select_nil(S, Var, Tf, Vf, St);
+select_cg(#k_type_clause{type=k_literal,values=Vs}, Var, Tf, Vf, St) ->
+ select_literal(Vs, Var, Tf, Vf, St);
+select_cg(#k_type_clause{type=Type,values=Scs}, Var, Tf, Vf, St0) ->
+ {Vis,St1} =
+ mapfoldl(fun (S, Sta) ->
+ {Val,Is,Stb} = select_val(S, Var, Vf, Sta),
+ {{Is,[Val]},Stb}
+ end, St0, Scs),
+ OptVls = combine(lists:sort(combine(Vis))),
+ {Vls,Sis,St2} = select_labels(OptVls, St1, [], []),
+ Arg = ssa_arg(Var, St2),
+ {Is,St} = select_val_cg(Type, Arg, Vls, Tf, Vf, Sis, St2),
+ {Is,St}.
+
+select_val_cg(k_tuple, Tuple, Vls, Tf, Vf, Sis, St0) ->
+ {Is0,St1} = make_cond_branch({bif,is_tuple}, [Tuple], Tf, St0),
+ {Arity,St2} = new_ssa_var('@ssa_arity', St1),
+ GetArity = #b_set{op={bif,tuple_size},dst=Arity,args=[Tuple]},
+ {Is,St} = select_val_cg(k_int, Arity, Vls, Vf, Vf, Sis, St2),
+ {Is0++[GetArity]++Is,St};
+select_val_cg(Type, R, Vls, Tf, Vf, Sis, St0) ->
+ {TypeIs,St1} = if
+ Tf =:= Vf ->
+ %% The type and value failure labels are the same;
+ %% we don't need a type test.
+ {[],St0};
+ true ->
+ %% Different labels for type failure and
+ %% label failure; we need a type test.
+ Test = select_type_test(Type),
+ make_cond_branch(Test, [R], Tf, St0)
+ end,
+ case Vls of
+ [{Val,Succ}] ->
+ {Is,St} = make_cond({bif,'=:='}, [R,Val], Vf, Succ, St1),
+ {TypeIs++Is++Sis,St};
+ [_|_] ->
+ {TypeIs++[#b_switch{arg=R,fail=Vf,list=Vls}|Sis],St1}
+ end.
+
+select_type_test(k_int) -> {bif,is_integer};
+select_type_test(k_atom) -> {bif,is_atom};
+select_type_test(k_float) -> {bif,is_float}.
+
+combine([{Is,Vs1},{Is,Vs2}|Vis]) -> combine([{Is,Vs1 ++ Vs2}|Vis]);
+combine([V|Vis]) -> [V|combine(Vis)];
+combine([]) -> [].
+
+select_labels([{Is,Vs}|Vis], St0, Vls, Sis) ->
+ {Lbl,St1} = new_label(St0),
+ select_labels(Vis, St1, add_vls(Vs, Lbl, Vls), [[{label,Lbl}|Is]|Sis]);
+select_labels([], St, Vls, Sis) ->
+ {Vls,append(Sis),St}.
+
+add_vls([V|Vs], Lbl, Acc) ->
+ add_vls(Vs, Lbl, [{#b_literal{val=V},Lbl}|Acc]);
+add_vls([], _, Acc) -> Acc.
+
+select_literal(S, V, Tf, Vf, St) ->
+ Src = ssa_arg(V, St),
+ F = fun(ValClause, Fail, St0) ->
+ {Val,ValIs,St1} = select_val(ValClause, V, Vf, St0),
+ Args = [Src,#b_literal{val=Val}],
+ {Is,St2} = make_cond_branch({bif,'=:='}, Args, Fail, St1),
+ {Is++ValIs,St2}
+ end,
+ match_fmf(F, Tf, St, S).
+
+select_cons(#k_val_clause{val=#k_cons{hd=Hd,tl=Tl},body=B},
+ V, Tf, Vf, St0) ->
+ Es = [Hd,Tl],
+ {Eis,St1} = select_extract_cons(V, Es, St0),
+ {Bis,St2} = match_cg(B, Vf, St1),
+ Src = ssa_arg(V, St2),
+ {Is,St} = make_cond_branch(is_nonempty_list, [Src], Tf, St2),
+ {Is ++ Eis ++ Bis,St}.
+
+select_nil(#k_val_clause{val=#k_nil{},body=B}, V, Tf, Vf, St0) ->
+ {Bis,St1} = match_cg(B, Vf, St0),
+ Src = ssa_arg(V, St1),
+ {Is,St} = make_cond_branch({bif,'=:='}, [Src,#b_literal{val=[]}], Tf, St1),
+ {Is ++ Bis,St}.
+
+select_binary(#k_val_clause{val=#k_binary{segs=#k_var{name=Ctx0}},body=B},
+ #k_var{anno=Anno0}=Src, Tf, Vf, St0) ->
+ Anno = #{reuse_for_context=>member(reuse_for_context, Anno0)},
+ {Ctx,St1} = new_ssa_var(Ctx0, St0),
+ {Bis0,St2} = match_cg(B, Vf, St1),
+ {TestIs,St} = make_cond_branch(succeeded, [Ctx], Tf, St2),
+ Bis1 = [#b_set{anno=Anno,op=bs_start_match,dst=Ctx,
+ args=[ssa_arg(Src, St)]}] ++ TestIs ++ Bis0,
+ Bis = finish_bs_matching(Bis1),
+ {Bis,St}.
+
+finish_bs_matching([#b_set{op=bs_match,
+ args=[#b_literal{val=string},Ctx,#b_literal{val=BinList}]}=Set|Is])
+ when is_list(BinList) ->
+ I = Set#b_set{args=[#b_literal{val=string},Ctx,
+ #b_literal{val=list_to_bitstring(BinList)}]},
+ finish_bs_matching([I|Is]);
+finish_bs_matching([I|Is]) ->
+ [I|finish_bs_matching(Is)];
+finish_bs_matching([]) -> [].
+
+make_cond(Cond, Args, Fail, Succ, St0) ->
+ {Bool,St} = new_ssa_var('@ssa_bool', St0),
+ Bif = #b_set{op=Cond,dst=Bool,args=Args},
+ Br = #b_br{bool=Bool,succ=Succ,fail=Fail},
+ {[Bif,Br],St}.
+
+make_cond_branch(Cond, Args, Fail, St0) ->
+ {Bool,St1} = new_ssa_var('@ssa_bool', St0),
+ {Succ,St} = new_label(St1),
+ Bif = #b_set{op=Cond,dst=Bool,args=Args},
+ Br = #b_br{bool=Bool,succ=Succ,fail=Fail},
+ {[Bif,Br,{label,Succ}],St}.
+
+make_uncond_branch(Fail) ->
+ #b_br{bool=#b_literal{val=true},succ=Fail,fail=Fail}.
+
+%% Instructions for selection of binary segments.
+
+select_bin_segs(Scs, Ivar, Tf, St) ->
+ match_fmf(fun(S, Fail, Sta) ->
+ select_bin_seg(S, Ivar, Fail, Sta)
+ end, Tf, St, Scs).
+
+select_bin_seg(#k_val_clause{val=#k_bin_seg{size=Size,unit=U,type=T,
+ seg=Seg,flags=Fs,next=Next},
+ body=B,anno=Anno},
+ #k_var{}=Src, Fail, St0) ->
+ LineAnno = line_anno(Anno),
+ Ctx = get_context(Src, St0),
+ {Mis,St1} = select_extract_bin(Next, Size, U, T, Fs, Fail,
+ Ctx, LineAnno, St0),
+ {Extracted,St2} = new_ssa_var(Seg#k_var.name, St1),
+ {Bis,St} = bin_match_cg(Size, B, Fail, St2),
+ BsGet = #b_set{op=bs_extract,dst=Extracted,args=[ssa_arg(Next, St)]},
+ Is = Mis ++ [BsGet] ++ Bis,
+ {Is,St};
+select_bin_seg(#k_val_clause{val=#k_bin_int{size=Sz,unit=U,flags=Fs,
+ val=Val,next=Next},
+ body=B},
+ #k_var{}=Src, Fail, St0) ->
+ Ctx = get_context(Src, St0),
+ {Mis,St1} = select_extract_int(Next, Val, Sz, U, Fs, Fail,
+ Ctx, St0),
+ {Bis,St} = match_cg(B, Fail, St1),
+ Is = case Mis ++ Bis of
+ [#b_set{op=bs_match,args=[#b_literal{val=string},OtherCtx1,Bin1]},
+ #b_set{op=succeeded,dst=Bool1},
+ #b_br{bool=Bool1,succ=Succ,fail=Fail},
+ {label,Succ},
+ #b_set{op=bs_match,dst=Dst,args=[#b_literal{val=string},_OtherCtx2,Bin2]}|
+ [#b_set{op=succeeded,dst=Bool2},
+ #b_br{bool=Bool2,fail=Fail}|_]=Is0] ->
+ %% We used to do this optimization later, but it
+ %% turns out that in huge functions with many
+ %% string matching instructions, it's a huge win
+ %% to do the combination now. To avoid copying the
+ %% binary data again and again, we'll combine bitstrings
+ %% in a list and convert all of it to a bitstring later.
+ {#b_literal{val=B1},#b_literal{val=B2}} = {Bin1,Bin2},
+ Bin = #b_literal{val=[B1,B2]},
+ Set = #b_set{op=bs_match,dst=Dst,args=[#b_literal{val=string},OtherCtx1,Bin]},
+ [Set|Is0];
+ Is0 ->
+ Is0
+ end,
+ {Is,St}.
+
+bin_match_cg(#k_atom{val=all}, B0, Fail, St) ->
+ #k_select{types=Types} = B0,
+ [#k_type_clause{type=k_bin_end,values=Values}] = Types,
+ [#k_val_clause{val=#k_bin_end{},body=B}] = Values,
+ match_cg(B, Fail, St);
+bin_match_cg(_, B, Fail, St) ->
+ match_cg(B, Fail, St).
+
+get_context(#k_var{}=Var, St) ->
+ ssa_arg(Var, St).
+
+select_bin_end(#k_val_clause{val=#k_bin_end{},body=B}, Src, Tf, St0) ->
+ Ctx = get_context(Src, St0),
+ {Bis,St1} = match_cg(B, Tf, St0),
+ {TestIs,St} = make_cond_branch(bs_test_tail, [Ctx,#b_literal{val=0}], Tf, St1),
+ Is = TestIs++Bis,
+ {Is,St}.
+
+select_extract_bin(#k_var{name=Hd}, Size0, Unit, Type, Flags, Vf,
+ Ctx, Anno, St0) ->
+ {Dst,St1} = new_ssa_var(Hd, St0),
+ Size = ssa_arg(Size0, St0),
+ build_bs_instr(Anno, Type, Vf, Ctx, Size, Unit, Flags, Dst, St1).
+
+select_extract_int(#k_var{name=Tl}, 0, #k_int{val=0}, _U, _Fs, _Vf,
+ Ctx, St0) ->
+ St = set_ssa_var(Tl, Ctx, St0),
+ {[],St};
+select_extract_int(#k_var{name=Tl}, Val, #k_int{val=Sz}, U, Fs, Vf,
+ Ctx, St0) ->
+ {Dst,St1} = new_ssa_var(Tl, St0),
+ Bits = U*Sz,
+ Bin = case member(big, Fs) of
+ true ->
+ <<Val:Bits>>;
+ false ->
+ true = member(little, Fs), %Assertion.
+ <<Val:Bits/little>>
+ end,
+ Bits = bit_size(Bin), %Assertion.
+ {TestIs,St} = make_cond_branch(succeeded, [Dst], Vf, St1),
+ Set = #b_set{op=bs_match,dst=Dst,
+ args=[#b_literal{val=string},Ctx,#b_literal{val=Bin}]},
+ {[Set|TestIs],St}.
+
+build_bs_instr(Anno, Type, Fail, Ctx, Size, Unit0, Flags0, Dst, St0) ->
+ Unit = #b_literal{val=Unit0},
+ Flags = #b_literal{val=Flags0},
+ NeedSize = bs_need_size(Type),
+ TypeArg = #b_literal{val=Type},
+ Get = case NeedSize of
+ true ->
+ #b_set{anno=Anno,op=bs_match,dst=Dst,
+ args=[TypeArg,Ctx,Flags,Size,Unit]};
+ false ->
+ #b_set{anno=Anno,op=bs_match,dst=Dst,
+ args=[TypeArg,Ctx,Flags]}
+ end,
+ {Is,St} = make_cond_branch(succeeded, [Dst], Fail, St0),
+ {[Get|Is],St}.
+
+select_val(#k_val_clause{val=#k_tuple{es=Es},body=B}, V, Vf, St0) ->
+ #k{us=Used} = k_get_anno(B),
+ {Eis,St1} = select_extract_tuple(V, Es, Used, St0),
+ {Bis,St2} = match_cg(B, Vf, St1),
+ {length(Es),Eis ++ Bis,St2};
+select_val(#k_val_clause{val=Val0,body=B}, _V, Vf, St0) ->
+ Val = case Val0 of
+ #k_atom{val=Lit} -> Lit;
+ #k_float{val=Lit} -> Lit;
+ #k_int{val=Lit} -> Lit;
+ #k_literal{val=Lit} -> Lit
+ end,
+ {Bis,St1} = match_cg(B, Vf, St0),
+ {Val,Bis,St1}.
+
+%% select_extract_tuple(Src, [V], State) -> {[E],State}.
+%% Extract tuple elements, but only if they are actually used.
+%%
+%% Not extracting tuple elements that are not used is an
+%% optimization for compile time and memory use during compilation.
+%% It is probably worthwhile because it is common to extract only a
+%% few elements from a huge record.
+
+select_extract_tuple(Src, Vs, Used, St0) ->
+ Tuple = ssa_arg(Src, St0),
+ F = fun (#k_var{name=V}, {Elem,S0}) ->
+ case member(V, Used) of
+ true ->
+ Args = [Tuple,#b_literal{val=Elem}],
+ {Dst,S} = new_ssa_var(V, S0),
+ Get = #b_set{op=get_tuple_element,dst=Dst,args=Args},
+ {[Get],{Elem+1,S}};
+ false ->
+ {[],{Elem+1,S0}}
+ end
+ end,
+ {Es,{_,St}} = flatmapfoldl(F, {0,St0}, Vs),
+ {Es,St}.
+
+select_map(Scs, V, Tf, Vf, St0) ->
+ MapSrc = ssa_arg(V, St0),
+ {Is,St1} =
+ match_fmf(fun(#k_val_clause{val=#k_map{op=exact,es=Es},
+ body=B}, Fail, St1) ->
+ select_map_val(V, Es, B, Fail, St1)
+ end, Vf, St0, Scs),
+ {TestIs,St} = make_cond_branch({bif,is_map}, [MapSrc], Tf, St1),
+ {TestIs++Is,St}.
+
+select_map_val(V, Es, B, Fail, St0) ->
+ {Eis,St1} = select_extract_map(Es, V, Fail, St0),
+ {Bis,St2} = match_cg(B, Fail, St1),
+ {Eis++Bis,St2}.
+
+select_extract_map([P|Ps], Src, Fail, St0) ->
+ MapSrc = ssa_arg(Src, St0),
+ #k_map_pair{key=Key0,val=#k_var{name=Dst0}} = P,
+ Key = ssa_arg(Key0, St0),
+ {Dst,St1} = new_ssa_var(Dst0, St0),
+ Set = #b_set{op=get_map_element,dst=Dst,args=[MapSrc,Key]},
+ {TestIs,St2} = make_cond_branch(succeeded, [Dst], Fail, St1),
+ {Is,St} = select_extract_map(Ps, Src, Fail, St2),
+ {[Set|TestIs]++Is,St};
+select_extract_map([], _, _, St) ->
+ {[],St}.
+
+select_extract_cons(Src0, [#k_var{name=Hd},#k_var{name=Tl}], St0) ->
+ Src = ssa_arg(Src0, St0),
+ {HdDst,St1} = new_ssa_var(Hd, St0),
+ {TlDst,St2} = new_ssa_var(Tl, St1),
+ GetHd = #b_set{op=get_hd,dst=HdDst,args=[Src]},
+ GetTl = #b_set{op=get_tl,dst=TlDst,args=[Src]},
+ {[GetHd,GetTl],St2}.
+
+guard_clause_cg(#k_guard_clause{guard=G,body=B}, Fail, St0) ->
+ {Gis,St1} = guard_cg(G, Fail, St0),
+ {Bis,St} = match_cg(B, Fail, St1),
+ {Gis ++ Bis,St}.
+
+%% guard_cg(Guard, Fail, State) -> {[Ainstr],State}.
+%% A guard is a boolean expression of tests. Tests return true or
+%% false. A fault in a test causes the test to return false. Tests
+%% never return the boolean, instead we generate jump code to go to
+%% the correct exit point. Primops and tests all go to the next
+%% instruction on success or jump to a failure label.
+
+guard_cg(#k_protected{arg=Ts,ret=Rs,inner=Inner}, Fail, St) ->
+ protected_cg(Ts, Rs, Inner, Fail, St);
+guard_cg(#k_test{op=Test0,args=As,inverted=Inverted}, Fail, St0) ->
+ #k_remote{mod=#k_atom{val=erlang},name=#k_atom{val=Test}} = Test0,
+ test_cg(Test, Inverted, As, Fail, St0);
+guard_cg(#k_seq{arg=Arg,body=Body}, Fail, St0) ->
+ {ArgIs,St1} = guard_cg(Arg, Fail, St0),
+ {BodyIs,St} = guard_cg(Body, Fail, St1),
+ {ArgIs++BodyIs,St};
+guard_cg(G, _Fail, St) ->
+ cg(G, St).
+
+test_cg('=/=', Inverted, As, Fail, St) ->
+ test_cg('=:=', not Inverted, As, Fail, St);
+test_cg('/=', Inverted, As, Fail, St) ->
+ test_cg('==', not Inverted, As, Fail, St);
+test_cg(Test, Inverted, As0, Fail, St0) ->
+ As = ssa_args(As0, St0),
+ case {Test,ssa_args(As0, St0)} of
+ {is_record,[Tuple,#b_literal{val=Atom}=Tag,#b_literal{val=Int}=Arity]}
+ when is_atom(Atom), is_integer(Int) ->
+ test_is_record_cg(Inverted, Fail, Tuple, Tag, Arity, St0);
+ {_,As} ->
+ {Bool,St1} = new_ssa_var('@ssa_bool', St0),
+ {Succ,St} = new_label(St1),
+ Bif = #b_set{op={bif,Test},dst=Bool,args=As},
+ Br = case Inverted of
+ false -> #b_br{bool=Bool,succ=Succ,fail=Fail};
+ true -> #b_br{bool=Bool,succ=Fail,fail=Succ}
+ end,
+ {[Bif,Br,{label,Succ}],St}
+ end.
+
+test_is_record_cg(false, Fail, Tuple, TagVal, ArityVal, St0) ->
+ {Arity,St1} = new_ssa_var('@ssa_arity', St0),
+ {Tag,St2} = new_ssa_var('@ssa_tag', St1),
+ {Is0,St3} = make_cond_branch({bif,is_tuple}, [Tuple], Fail, St2),
+ GetArity = #b_set{op={bif,tuple_size},dst=Arity,args=[Tuple]},
+ {Is1,St4} = make_cond_branch({bif,'=:='}, [Arity,ArityVal], Fail, St3),
+ GetTag = #b_set{op=get_tuple_element,dst=Tag,
+ args=[Tuple,#b_literal{val=0}]},
+ {Is2,St} = make_cond_branch({bif,'=:='}, [Tag,TagVal], Fail, St4),
+ Is = Is0 ++ [GetArity] ++ Is1 ++ [GetTag] ++ Is2,
+ {Is,St};
+test_is_record_cg(true, Fail, Tuple, TagVal, ArityVal, St0) ->
+ {Succ,St1} = new_label(St0),
+ {Arity,St2} = new_ssa_var('@ssa_arity', St1),
+ {Tag,St3} = new_ssa_var('@ssa_tag', St2),
+ {Is0,St4} = make_cond_branch({bif,is_tuple}, [Tuple], Succ, St3),
+ GetArity = #b_set{op={bif,tuple_size},dst=Arity,args=[Tuple]},
+ {Is1,St5} = make_cond_branch({bif,'=:='}, [Arity,ArityVal], Succ, St4),
+ GetTag = #b_set{op=get_tuple_element,dst=Tag,
+ args=[Tuple,#b_literal{val=0}]},
+ {Is2,St} = make_cond_branch({bif,'=:='}, [Tag,TagVal], Succ, St5),
+ Is3 = [make_uncond_branch(Fail),{label,Succ}],
+ Is = Is0 ++ [GetArity] ++ Is1 ++ [GetTag] ++ Is2 ++ Is3,
+ {Is,St}.
+
+%% protected_cg([Kexpr], [Ret], Fail, St) -> {[Ainstr],St}.
+%% Do a protected. Protecteds without return values are just done
+%% for effect, the return value is not checked, success passes on to
+%% the next instruction and failure jumps to Fail. If there are
+%% return values then these must be set to 'false' on failure,
+%% control always passes to the next instruction.
+
+protected_cg(Ts, [], _, Fail, St0) ->
+ %% Protect these calls, revert when done.
+ {Tis,St1} = guard_cg(Ts, Fail, St0#cg{bfail=Fail}),
+ {Tis,St1#cg{bfail=St0#cg.bfail}};
+protected_cg(Ts, Rs, Inner0, _Fail, St0) ->
+ {Pfail,St1} = new_label(St0),
+ {Br,St2} = new_label(St1),
+ Prot = duplicate(length(Rs), #b_literal{val=false}),
+ {Tis,St3} = guard_cg(Ts, Pfail, St2#cg{break=Pfail,bfail=Pfail}),
+ Inner = ssa_args(Inner0, St3),
+ {BreakVars,St} = new_ssa_vars(Rs, St3),
+ Is = Tis ++ [#cg_break{args=Inner,phi=Br},
+ {label,Pfail},#cg_break{args=Prot,phi=Br},
+ {label,Br},#cg_phi{vars=BreakVars}],
+ {Is,St#cg{break=St0#cg.break,bfail=St0#cg.bfail}}.
+
+%% match_fmf(Fun, LastFail, State, [Clause]) -> {Is,State}.
+%% This is a special flatmapfoldl for match code gen where we
+%% generate a "failure" label for each clause. The last clause uses
+%% an externally generated failure label, LastFail. N.B. We do not
+%% know or care how the failure labels are used.
+
+match_fmf(F, LastFail, St, [H]) ->
+ F(H, LastFail, St);
+match_fmf(F, LastFail, St0, [H|T]) ->
+ {Fail,St1} = new_label(St0),
+ {R,St2} = F(H, Fail, St1),
+ {Rs,St3} = match_fmf(F, LastFail, St2, T),
+ {R ++ [{label,Fail}] ++ Rs,St3}.
+
+%% fail_label(State) -> {Where,FailureLabel}.
+%% Where = guard | no_catch | in_catch
+%% Return an indication of which part of a function code is
+%% being generated for and the appropriate failure label to
+%% use.
+%%
+%% Where has the following meaning:
+%%
+%% guard - Inside a guard.
+%% no_catch - In a function body, not in the scope of
+%% a try/catch or catch.
+%% in_catch - In the scope of a try/catch or catch.
+
+fail_label(#cg{catch_label=Catch,bfail=Fail,ultimate_failure=Ult}) ->
+ if
+ Fail =/= Ult ->
+ {guard,Fail};
+ Catch =:= none ->
+ {no_catch,Fail};
+ is_integer(Catch) ->
+ {in_catch,Catch}
+ end.
+
+%% bif_fail_label(State) -> FailureLabel.
+%% Return the appropriate failure label for a guard BIF call or
+%% primop that fails.
+
+bif_fail_label(St) ->
+ {_,Fail} = fail_label(St),
+ Fail.
+
+%% call_cg(Func, [Arg], [Ret], Le, State) ->
+%% {[Ainstr],State}.
+%% enter_cg(Func, [Arg], Le, St) -> {[Ainstr],St}.
+%% Generate code for call and enter.
+
+call_cg(Func, As, [], Le, St) ->
+ call_cg(Func, As, [#k_var{name='@ssa_ignored'}], Le, St);
+call_cg(Func0, As, [#k_var{name=R}|MoreRs]=Rs, Le, St0) ->
+ case fail_label(St0) of
+ {guard,Fail} ->
+ %% Inside a guard. The only allowed function call is to
+ %% erlang:error/1,2. We will generate a branch to the
+ %% failure branch.
+ #k_remote{mod=#k_atom{val=erlang},
+ name=#k_atom{val=error}} = Func0, %Assertion.
+ [#k_var{name=DestVar}] = Rs,
+ St = set_ssa_var(DestVar, #b_literal{val=unused}, St0),
+ {[make_uncond_branch(Fail),#cg_unreachable{}],St};
+ {Catch,Fail} ->
+ %% Ordinary function call in a function body.
+ Args = ssa_args(As, St0),
+ {Ret,St1} = new_ssa_var(R, St0),
+ Func = call_target(Func0, Args, St0),
+ Call = #b_set{anno=line_anno(Le),op=call,dst=Ret,args=[Func|Args]},
+
+ %% If this is a call to erlang:error(), MoreRs could be a
+ %% nonempty list of variables that each need a value.
+ St2 = foldl(fun(#k_var{name=Dummy}, S) ->
+ set_ssa_var(Dummy, #b_literal{val=unused}, S)
+ end, St1, MoreRs),
+ case Catch of
+ no_catch ->
+ {[Call],St2};
+ in_catch ->
+ {TestIs,St} = make_cond_branch(succeeded, [Ret], Fail, St2),
+ {[Call|TestIs],St}
+ end
+ end.
+
+enter_cg(Func0, As0, Le, St0) ->
+ Anno = line_anno(Le),
+ Func = call_target(Func0, As0, St0),
+ As = ssa_args(As0, St0),
+ {Ret,St} = new_ssa_var('@ssa_ret', St0),
+ Call = #b_set{anno=Anno,op=call,dst=Ret,args=[Func|As]},
+ {[Call,#b_ret{arg=Ret}],St}.
+
+call_target(Func, As, St) ->
+ Arity = length(As),
+ case Func of
+ #k_remote{mod=Mod0,name=Name0} ->
+ Mod = ssa_arg(Mod0, St),
+ Name = ssa_arg(Name0, St),
+ #b_remote{mod=Mod,name=Name,arity=Arity};
+ #k_local{name=Name} when is_atom(Name) ->
+ #b_local{name=#b_literal{val=Name},arity=Arity};
+ #k_var{}=Var ->
+ ssa_arg(Var, St)
+ end.
+
+%% bif_cg(#k_bif{}, Le,State) -> {[Ainstr],State}.
+%% Generate code for a guard BIF or primop.
+
+bif_cg(#k_bif{op=#k_internal{name=Name},args=As,ret=Rs}, Le, St) ->
+ internal_cg(Name, As, Rs, Le, St);
+bif_cg(#k_bif{op=#k_remote{mod=#k_atom{val=erlang},name=#k_atom{val=Name}},
+ args=As,ret=Rs}, Le, St) ->
+ bif_cg(Name, As, Rs, Le, St).
+
+%% internal_cg(Bif, [Arg], [Ret], Le, State) ->
+%% {[Ainstr],State}.
+
+internal_cg(bs_context_to_binary, [Src0], [], _Le, St) ->
+ Src = ssa_arg(Src0, St),
+ Set = #b_set{op=context_to_binary,args=[Src]},
+ {[Set],St};
+internal_cg(dsetelement, [Index0,Tuple0,New0], _Rs, _Le, St) ->
+ [New,Tuple,#b_literal{val=Index1}] = ssa_args([New0,Tuple0,Index0], St),
+ Index = #b_literal{val=Index1-1},
+ Set = #b_set{op=set_tuple_element,args=[New,Tuple,Index]},
+ {[Set],St};
+internal_cg(make_fun, [Name0,Arity0|As], Rs, _Le, St0) ->
+ #k_atom{val=Name} = Name0,
+ #k_int{val=Arity} = Arity0,
+ [#k_var{name=Dst0}] = Rs,
+ {Dst,St} = new_ssa_var(Dst0, St0),
+ Args = ssa_args(As, St),
+ Local = #b_local{name=#b_literal{val=Name},arity=Arity},
+ MakeFun = #b_set{op=make_fun,dst=Dst,args=[Local|Args]},
+ {[MakeFun],St};
+internal_cg(bs_init_writable=I, As, [#k_var{name=Dst0}], _Le, St0) ->
+ %% This behaves like a function call.
+ {Dst,St} = new_ssa_var(Dst0, St0),
+ Args = ssa_args(As, St),
+ Set = #b_set{op=I,dst=Dst,args=Args},
+ {[Set],St};
+internal_cg(build_stacktrace=I, As, [#k_var{name=Dst0}], _Le, St0) ->
+ {Dst,St} = new_ssa_var(Dst0, St0),
+ Args = ssa_args(As, St),
+ Set = #b_set{op=I,dst=Dst,args=Args},
+ {[Set],St};
+internal_cg(raise, As, [#k_var{name=Dst0}], _Le, St0) ->
+ Args = ssa_args(As, St0),
+ {Dst,St} = new_ssa_var(Dst0, St0),
+ Resume = #b_set{op=resume,dst=Dst,args=Args},
+ case St of
+ #cg{catch_label=none} ->
+ {[Resume],St};
+ #cg{catch_label=Catch} when is_integer(Catch) ->
+ Is = [Resume,make_uncond_branch(Catch),#cg_unreachable{}],
+ {Is,St}
+ end;
+internal_cg(raw_raise=I, As, [#k_var{name=Dst0}], _Le, St0) ->
+ %% This behaves like a function call.
+ {Dst,St} = new_ssa_var(Dst0, St0),
+ Args = ssa_args(As, St),
+ Set = #b_set{op=I,dst=Dst,args=Args},
+ {[Set],St}.
+
+bif_cg(Bif, As0, [#k_var{name=Dst0}], Le, St0) ->
+ {Dst,St1} = new_ssa_var(Dst0, St0),
+ case {Bif,ssa_args(As0, St0)} of
+ {is_record,[Tuple,#b_literal{val=Atom}=Tag,
+ #b_literal{val=Int}=Arity]}
+ when is_atom(Atom), is_integer(Int) ->
+ bif_is_record_cg(Dst, Tuple, Tag, Arity, St1);
+ {_,As} ->
+ I = #b_set{anno=line_anno(Le),op={bif,Bif},dst=Dst,args=As},
+ case erl_bifs:is_safe(erlang, Bif, length(As)) of
+ false ->
+ Fail = bif_fail_label(St1),
+ {Is,St} = make_cond_branch(succeeded, [Dst], Fail, St1),
+ {[I|Is],St};
+ true->
+ {[I],St1}
+ end
+ end.
+
+bif_is_record_cg(Dst, Tuple, TagVal, ArityVal, St0) ->
+ {Arity,St1} = new_ssa_var('@ssa_arity', St0),
+ {Tag,St2} = new_ssa_var('@ssa_tag', St1),
+ {Phi,St3} = new_label(St2),
+ {False,St4} = new_label(St3),
+ {Is0,St5} = make_cond_branch({bif,is_tuple}, [Tuple], False, St4),
+ GetArity = #b_set{op={bif,tuple_size},dst=Arity,args=[Tuple]},
+ {Is1,St6} = make_cond_branch({bif,'=:='}, [Arity,ArityVal], False, St5),
+ GetTag = #b_set{op=get_tuple_element,dst=Tag,
+ args=[Tuple,#b_literal{val=0}]},
+ {Is2,St} = make_cond_branch({bif,'=:='}, [Tag,TagVal], False, St6),
+ Is3 = [#cg_break{args=[#b_literal{val=true}],phi=Phi},
+ {label,False},
+ #cg_break{args=[#b_literal{val=false}],phi=Phi},
+ {label,Phi},
+ #cg_phi{vars=[Dst]}],
+ Is = Is0 ++ [GetArity] ++ Is1 ++ [GetTag] ++ Is2 ++ Is3,
+ {Is,St}.
+
+%% recv_loop_cg(TimeOut, ReceiveVar, ReceiveMatch, TimeOutExprs,
+%% [Ret], Le, St) -> {[Ainstr],St}.
+
+recv_loop_cg(Te, Rvar, Rm, Tes, Rs, Le, St0) ->
+ %% Get labels.
+ {Rl,St1} = new_label(St0),
+ {Tl,St2} = new_label(St1),
+ {Bl,St3} = new_label(St2),
+ St4 = St3#cg{break=Bl,recv=Rl},
+ {Ris,St5} = cg_recv_mesg(Rvar, Rm, Tl, Le, St4),
+ {Wis,St6} = cg_recv_wait(Te, Tes, St5),
+ {BreakVars,St} = new_ssa_vars(Rs, St6),
+ {Ris ++ [{label,Tl}] ++ Wis ++
+ [{label,Bl},#cg_phi{vars=BreakVars}],
+ St#cg{break=St0#cg.break,recv=St0#cg.recv}}.
+
+%% cg_recv_mesg( ) -> {[Ainstr],St}.
+
+cg_recv_mesg(#k_var{name=R}, Rm, Tl, Le, St0) ->
+ {Dst,St1} = new_ssa_var(R, St0),
+ {Mis,St2} = match_cg(Rm, none, St1),
+ RecvLbl = St1#cg.recv,
+ {TestIs,St} = make_cond_branch(succeeded, [Dst], Tl, St2),
+ Is = [#b_br{anno=line_anno(Le),bool=#b_literal{val=true},
+ succ=RecvLbl,fail=RecvLbl},
+ {label,RecvLbl},
+ #b_set{op=peek_message,dst=Dst}|TestIs],
+ {Is++Mis,St}.
+
+%% cg_recv_wait(Te, Tes, St) -> {[Ainstr],St}.
+
+cg_recv_wait(#k_int{val=0}, Es, St0) ->
+ {Tis,St} = cg(Es, St0),
+ {[#b_set{op=timeout}|Tis],St};
+cg_recv_wait(Te, Es, St0) ->
+ {Tis,St1} = cg(Es, St0),
+ Args = [ssa_arg(Te, St1)],
+ {WaitDst,St2} = new_ssa_var('@ssa_wait', St1),
+ {WaitIs,St} = make_cond_branch(succeeded, [WaitDst], St1#cg.recv, St2),
+ %% Infinite timeout will be optimized later.
+ Is = [#b_set{op=wait_timeout,dst=WaitDst,args=Args}] ++ WaitIs ++
+ [#b_set{op=timeout}] ++ Tis,
+ {Is,St}.
+
+%% try_cg(TryBlock, [BodyVar], TryBody, [ExcpVar], TryHandler, [Ret], St) ->
+%% {[Ainstr],St}.
+
+try_cg(Ta, Vs, Tb, Evs, Th, Rs, St0) ->
+ {B,St1} = new_label(St0), %Body label
+ {H,St2} = new_label(St1), %Handler label
+ {E,St3} = new_label(St2), %End label
+ {Next,St4} = new_label(St3),
+ {TryTag,St5} = new_ssa_var('@ssa_catch_tag', St4),
+ {SsaVs,St6} = new_ssa_vars(Vs, St5),
+ {SsaEvs,St7} = new_ssa_vars(Evs, St6),
+ {Ais,St8} = cg(Ta, St7#cg{break=B,catch_label=H}),
+ St9 = St8#cg{break=E,catch_label=St7#cg.catch_label},
+ {Bis,St10} = cg(Tb, St9),
+ {His,St11} = cg(Th, St10),
+ {BreakVars,St12} = new_ssa_vars(Rs, St11),
+ {CatchedAgg,St} = new_ssa_var('@ssa_agg', St12),
+ ExtractVs = extract_vars(SsaEvs, CatchedAgg, 0),
+ KillTryTag = #b_set{op=kill_try_tag,args=[TryTag]},
+ Args = [#b_literal{val='try'},TryTag],
+ Handler = [{label,H},
+ #b_set{op=landingpad,dst=CatchedAgg,args=Args}] ++
+ ExtractVs ++ [KillTryTag],
+ {[#b_set{op=new_try_tag,dst=TryTag,args=[#b_literal{val='try'}]},
+ #b_br{bool=TryTag,succ=Next,fail=H},
+ {label,Next}] ++ Ais ++
+ [{label,B},#cg_phi{vars=SsaVs},KillTryTag] ++ Bis ++
+ Handler ++ His ++
+ [{label,E},#cg_phi{vars=BreakVars}],
+ St#cg{break=St0#cg.break}}.
+
+try_enter_cg(Ta, Vs, Tb, Evs, Th, St0) ->
+ {B,St1} = new_label(St0), %Body label
+ {H,St2} = new_label(St1), %Handler label
+ {Next,St3} = new_label(St2),
+ {TryTag,St4} = new_ssa_var('@ssa_catch_tag', St3),
+ {SsaVs,St5} = new_ssa_vars(Vs, St4),
+ {SsaEvs,St6} = new_ssa_vars(Evs, St5),
+ {Ais,St7} = cg(Ta, St6#cg{break=B,catch_label=H}),
+ St8 = St7#cg{catch_label=St6#cg.catch_label},
+ {Bis,St9} = cg(Tb, St8),
+ {His,St10} = cg(Th, St9),
+ {CatchedAgg,St} = new_ssa_var('@ssa_agg', St10),
+ ExtractVs = extract_vars(SsaEvs, CatchedAgg, 0),
+ KillTryTag = #b_set{op=kill_try_tag,args=[TryTag]},
+ Args = [#b_literal{val='try'},TryTag],
+ Handler = [{label,H},
+ #b_set{op=landingpad,dst=CatchedAgg,args=Args}] ++
+ ExtractVs ++ [KillTryTag],
+ {[#b_set{op=new_try_tag,dst=TryTag,args=[#b_literal{val='try'}]},
+ #b_br{bool=TryTag,succ=Next,fail=H},
+ {label,Next}] ++ Ais ++
+ [{label,B},#cg_phi{vars=SsaVs},KillTryTag] ++ Bis ++
+ Handler ++ His,
+ St#cg{break=St0#cg.break}}.
+
+extract_vars([V|Vs], Agg, N) ->
+ I = #b_set{op=extract,dst=V,args=[Agg,#b_literal{val=N}]},
+ [I|extract_vars(Vs, Agg, N+1)];
+extract_vars([], _, _) -> [].
+
+%% do_catch_cg(CatchBlock, Ret, St) -> {[Ainstr],St}.
+
+do_catch_cg(Block, #k_var{name=R}, St0) ->
+ {B,St1} = new_label(St0),
+ {Next,St2} = new_label(St1),
+ {H,St3} = new_label(St2),
+ {CatchReg,St4} = new_ssa_var('@ssa_catch_tag', St3),
+ {Dst,St5} = new_ssa_var(R, St4),
+ {Succ,St6} = new_label(St5),
+ {Cis,St7} = cg(Block, St6#cg{break=Succ,catch_label=H}),
+ {CatchedVal,St8} = new_ssa_var('@catched_val', St7),
+ {SuccVal,St9} = new_ssa_var('@success_val', St8),
+ {CatchedAgg,St10} = new_ssa_var('@ssa_agg', St9),
+ {CatchEndVal,St} = new_ssa_var('@catch_end_val', St10),
+ Args = [#b_literal{val='catch'},CatchReg],
+ {[#b_set{op=new_try_tag,dst=CatchReg,args=[#b_literal{val='catch'}]},
+ #b_br{bool=CatchReg,succ=Next,fail=H},
+ {label,Next}] ++ Cis ++
+ [{label,H},
+ #b_set{op=landingpad,dst=CatchedAgg,args=Args},
+ #b_set{op=extract,dst=CatchedVal,
+ args=[CatchedAgg,#b_literal{val=0}]},
+ #cg_break{args=[CatchedVal],phi=B},
+ {label,Succ},
+ #cg_phi{vars=[SuccVal]},
+ #cg_break{args=[SuccVal],phi=B},
+ {label,B},#cg_phi{vars=[CatchEndVal]},
+ #b_set{op=catch_end,dst=Dst,args=[CatchReg,CatchEndVal]}],
+ St#cg{break=St1#cg.break,catch_label=St1#cg.catch_label}}.
+
+%% put_cg([Var], Constr, Le, Vdb, Bef, St) -> {[Ainstr],St}.
+%% Generate code for constructing terms.
+
+put_cg([#k_var{name=R}], #k_cons{hd=Hd,tl=Tl}, _Le, St0) ->
+ Args = ssa_args([Hd,Tl], St0),
+ {Dst,St} = new_ssa_var(R, St0),
+ PutList = #b_set{op=put_list,dst=Dst,args=Args},
+ {[PutList],St};
+put_cg([#k_var{name=R}], #k_tuple{es=Es}, _Le, St0) ->
+ {Ret,St} = new_ssa_var(R, St0),
+ Args = ssa_args(Es, St),
+ PutTuple = #b_set{op=put_tuple,dst=Ret,args=Args},
+ {[PutTuple],St};
+put_cg([#k_var{name=R}], #k_binary{segs=Segs}, Le, St0) ->
+ Fail = bif_fail_label(St0),
+ {Dst,St1} = new_ssa_var(R, St0),
+ cg_binary(Dst, Segs, Fail, Le, St1);
+put_cg([#k_var{name=R}], #k_map{op=Op,var=Map,
+ es=[#k_map_pair{key=#k_var{}=K,val=V}]},
+ Le, St0) ->
+ %% Map: single variable key.
+ SrcMap = ssa_arg(Map, St0),
+ LineAnno = line_anno(Le),
+ List = [ssa_arg(K, St0),ssa_arg(V, St0)],
+ {Dst,St1} = new_ssa_var(R, St0),
+ {Is,St} = put_cg_map(LineAnno, Op, SrcMap, Dst, List, St1),
+ {Is,St};
+put_cg([#k_var{name=R}], #k_map{op=Op,var=Map,es=Es}, Le, St0) ->
+ %% Map: one or more literal keys.
+ [] = [Var || #k_map_pair{key=#k_var{}=Var} <- Es], %Assertion
+ SrcMap = ssa_arg(Map, St0),
+ LineAnno = line_anno(Le),
+ List = flatmap(fun(#k_map_pair{key=K,val=V}) ->
+ [ssa_arg(K, St0),ssa_arg(V, St0)]
+ end, Es),
+ {Dst,St1} = new_ssa_var(R, St0),
+ {Is,St} = put_cg_map(LineAnno, Op, SrcMap, Dst, List, St1),
+ {Is,St};
+put_cg([#k_var{name=R}], Con0, _Le, St0) ->
+ %% Create an alias for a variable or literal.
+ Con = ssa_arg(Con0, St0),
+ St = set_ssa_var(R, Con, St0),
+ {[],St}.
+
+put_cg_map(LineAnno, Op, SrcMap, Dst, List, St0) ->
+ Fail = bif_fail_label(St0),
+ Args = [#b_literal{val=Op},SrcMap|List],
+ PutMap = #b_set{anno=LineAnno,op=put_map,dst=Dst,args=Args},
+ if
+ Op =:= assoc ->
+ {[PutMap],St0};
+ true ->
+ {Is,St} = make_cond_branch(succeeded, [Dst], Fail, St0),
+ {[PutMap|Is],St}
+ end.
+
+%%%
+%%% Code generation for constructing binaries.
+%%%
+
+cg_binary(Dst, Segs0, Fail, Le, St0) ->
+ {PutCode0,SzCalc0,St1} = cg_bin_put(Segs0, Fail, St0),
+ LineAnno = line_anno(Le),
+ Anno = Le#k.a,
+ case PutCode0 of
+ [#b_set{op=bs_put,dst=Bool,args=[_,_,Src,#b_literal{val=all}|_]},
+ #b_br{bool=Bool},
+ {label,_}|_] ->
+ #k_bin_seg{unit=Unit0,next=Segs} = Segs0,
+ Unit = #b_literal{val=Unit0},
+ {PutCode,SzCalc1,St2} = cg_bin_put(Segs, Fail, St1),
+ {_,SzVar,SzCode0,St3} = cg_size_calc(1, SzCalc1, Fail, St2),
+ SzCode = cg_bin_anno(SzCode0, LineAnno),
+ Args = case member(single_use, Anno) of
+ true ->
+ [#b_literal{val=private_append},Src,SzVar,Unit];
+ false ->
+ [#b_literal{val=append},Src,SzVar,Unit]
+ end,
+ BsInit = #b_set{anno=LineAnno,op=bs_init,dst=Dst,args=Args},
+ {TestIs,St} = make_cond_branch(succeeded, [Dst], Fail, St3),
+ {SzCode ++ [BsInit] ++ TestIs ++ PutCode,St};
+ [#b_set{op=bs_put}|_] ->
+ {Unit,SzVar,SzCode0,St2} = cg_size_calc(8, SzCalc0, Fail, St1),
+ SzCode = cg_bin_anno(SzCode0, LineAnno),
+ Args = [#b_literal{val=new},SzVar,Unit],
+ BsInit = #b_set{anno=LineAnno,op=bs_init,dst=Dst,args=Args},
+ {TestIs,St} = make_cond_branch(succeeded, [Dst], Fail, St2),
+ {SzCode ++ [BsInit] ++ TestIs ++ PutCode0,St}
+ end.
+
+cg_bin_anno([Set|Sets], Anno) ->
+ [Set#b_set{anno=Anno}|Sets];
+cg_bin_anno([], _) -> [].
+
+%% cg_size_calc(PreferredUnit, SzCalc, Fail, St0) ->
+%% {ActualUnit,SizeVariable,SizeCode,St}.
+%% Generate size calculation code.
+
+cg_size_calc(Unit, error, _Fail, St) ->
+ {#b_literal{val=Unit},#b_literal{val=badarg},[],St};
+cg_size_calc(8, [{1,_}|_]=SzCalc, Fail, St) ->
+ cg_size_calc(1, SzCalc, Fail, St);
+cg_size_calc(8, SzCalc, Fail, St0) ->
+ {Var,Pre,St} = cg_size_calc_1(SzCalc, Fail, St0),
+ {#b_literal{val=8},Var,Pre,St};
+cg_size_calc(1, SzCalc0, Fail, St0) ->
+ SzCalc = map(fun({8,#b_literal{val=Size}}) ->
+ {1,#b_literal{val=8*Size}};
+ ({8,{{bif,byte_size},Src}}) ->
+ {1,{{bif,bit_size},Src}};
+ ({8,{_,_}=UtfCalc}) ->
+ {1,{'*',#b_literal{val=8},UtfCalc}};
+ ({_,_}=Pair) ->
+ Pair
+ end, SzCalc0),
+ {Var,Pre,St} = cg_size_calc_1(SzCalc, Fail, St0),
+ {#b_literal{val=1},Var,Pre,St}.
+
+cg_size_calc_1(SzCalc, Fail, St0) ->
+ cg_size_calc_2(SzCalc, #b_literal{val=0}, Fail, St0).
+
+cg_size_calc_2([{_,{'*',Unit,{_,_}=Bif}}|T], Sum0, Fail, St0) ->
+ {Sum1,Pre0,St1} = cg_size_calc_2(T, Sum0, Fail, St0),
+ {BifDst,Pre1,St2} = cg_size_bif(Bif, Fail, St1),
+ {Sum,Pre2,St} = cg_size_add(Sum1, BifDst, Unit, Fail, St2),
+ {Sum,Pre0++Pre1++Pre2,St};
+cg_size_calc_2([{_,#b_literal{}=Sz}|T], Sum0, Fail, St0) ->
+ {Sum1,Pre0,St1} = cg_size_calc_2(T, Sum0, Fail, St0),
+ {Sum,Pre,St} = cg_size_add(Sum1, Sz, #b_literal{val=1}, Fail, St1),
+ {Sum,Pre0++Pre,St};
+cg_size_calc_2([{_,#b_var{}=Sz}|T], Sum0, Fail, St0) ->
+ {Sum1,Pre0,St1} = cg_size_calc_2(T, Sum0, Fail, St0),
+ {Sum,Pre,St} = cg_size_add(Sum1, Sz, #b_literal{val=1}, Fail, St1),
+ {Sum,Pre0++Pre,St};
+cg_size_calc_2([{_,{_,_}=Bif}|T], Sum0, Fail, St0) ->
+ {Sum1,Pre0,St1} = cg_size_calc_2(T, Sum0, Fail, St0),
+ {BifDst,Pre1,St2} = cg_size_bif(Bif, Fail, St1),
+ {Sum,Pre2,St} = cg_size_add(Sum1, BifDst, #b_literal{val=1}, Fail, St2),
+ {Sum,Pre0++Pre1++Pre2,St};
+cg_size_calc_2([], Sum, _Fail, St) ->
+ {Sum,[],St}.
+
+cg_size_bif(#b_var{}=Var, _Fail, St) ->
+ {Var,[],St};
+cg_size_bif({Name,Src}, Fail, St0) ->
+ {Dst,St1} = new_ssa_var('@ssa_bif', St0),
+ Bif = #b_set{op=Name,dst=Dst,args=[Src]},
+ {TestIs,St} = make_cond_branch(succeeded, [Dst], Fail, St1),
+ {Dst,[Bif|TestIs],St}.
+
+cg_size_add(#b_literal{val=0}, Val, #b_literal{val=1}, _Fail, St) ->
+ {Val,[],St};
+cg_size_add(A, B, Unit, Fail, St0) ->
+ {Dst,St1} = new_ssa_var('@ssa_sum', St0),
+ {TestIs,St} = make_cond_branch(succeeded, [Dst], Fail, St1),
+ BsAdd = #b_set{op=bs_add,dst=Dst,args=[A,B,Unit]},
+ {Dst,[BsAdd|TestIs],St}.
+
+cg_bin_put(Seg, Fail, St) ->
+ cg_bin_put_1(Seg, Fail, [], [], St).
+
+cg_bin_put_1(#k_bin_seg{size=Size0,unit=U,type=T,flags=Fs,seg=Src0,next=Next},
+ Fail, Acc, SzCalcAcc, St0) ->
+ [Src,Size] = ssa_args([Src0,Size0], St0),
+ NeedSize = bs_need_size(T),
+ TypeArg = #b_literal{val=T},
+ Flags = #b_literal{val=Fs},
+ Unit = #b_literal{val=U},
+ Args = case NeedSize of
+ true -> [TypeArg,Flags,Src,Size,Unit];
+ false -> [TypeArg,Flags,Src]
+ end,
+ {Is,St} = make_cond_branch(bs_put, Args, Fail, St0),
+ SzCalc = bin_size_calc(T, Src, Size, U),
+ cg_bin_put_1(Next, Fail, reverse(Is, Acc), [SzCalc|SzCalcAcc], St);
+cg_bin_put_1(#k_bin_end{}, _, Acc, SzCalcAcc, St) ->
+ SzCalc = fold_size_calc(SzCalcAcc, 0, []),
+ {reverse(Acc),SzCalc,St}.
+
+bs_need_size(utf8) -> false;
+bs_need_size(utf16) -> false;
+bs_need_size(utf32) -> false;
+bs_need_size(_) -> true.
+
+bin_size_calc(utf8, Src, _Size, _Unit) ->
+ {8,{bs_utf8_size,Src}};
+bin_size_calc(utf16, Src, _Size, _Unit) ->
+ {8,{bs_utf16_size,Src}};
+bin_size_calc(utf32, _Src, _Size, _Unit) ->
+ {8,#b_literal{val=4}};
+bin_size_calc(binary, Src, #b_literal{val=all}, Unit) ->
+ case Unit rem 8 of
+ 0 -> {8,{{bif,byte_size},Src}};
+ _ -> {1,{{bif,bit_size},Src}}
+ end;
+bin_size_calc(_Type, _Src, Size, Unit) ->
+ {Unit,Size}.
+
+fold_size_calc([{Unit,#b_literal{val=Size}}|T], Bits, Acc) ->
+ if
+ is_integer(Size) ->
+ fold_size_calc(T, Bits + Unit*Size, Acc);
+ true ->
+ error
+ end;
+fold_size_calc([{U,#b_var{}}=H|T], Bits, Acc) when U =:= 1; U =:= 8 ->
+ fold_size_calc(T, Bits, [H|Acc]);
+fold_size_calc([{U,#b_var{}=Var}|T], Bits, Acc) ->
+ fold_size_calc(T, Bits, [{1,{'*',#b_literal{val=U},Var}}|Acc]);
+fold_size_calc([{_,_}=H|T], Bits, Acc) ->
+ fold_size_calc(T, Bits, [H|Acc]);
+fold_size_calc([], Bits, Acc) ->
+ Bytes = Bits div 8,
+ RemBits = Bits rem 8,
+ Sizes = sort([{1,#b_literal{val=RemBits}},{8,#b_literal{val=Bytes}}|Acc]),
+ [Pair || {_,Sz}=Pair <- Sizes, Sz =/= #b_literal{val=0}].
+
+%%%
+%%% Utilities for creating the SSA types.
+%%%
+
+ssa_args(As, St) ->
+ [ssa_arg(A, St) || A <- As].
+
+ssa_arg(#k_var{name=V}, #cg{vars=Vars}) -> maps:get(V, Vars);
+ssa_arg(#k_literal{val=V}, _) -> #b_literal{val=V};
+ssa_arg(#k_atom{val=V}, _) -> #b_literal{val=V};
+ssa_arg(#k_float{val=V}, _) -> #b_literal{val=V};
+ssa_arg(#k_int{val=V}, _) -> #b_literal{val=V};
+ssa_arg(#k_nil{}, _) -> #b_literal{val=[]}.
+
+new_ssa_vars(Vs, St) ->
+ mapfoldl(fun(#k_var{name=V}, S) ->
+ new_ssa_var(V, S)
+ end, St, Vs).
+
+new_ssa_var(VarBase, #cg{lcount=Uniq,vars=Vars}=St0)
+ when is_atom(VarBase); is_integer(VarBase) ->
+ case Vars of
+ #{VarBase:=_} ->
+ Var = #b_var{name={VarBase,Uniq}},
+ St = St0#cg{lcount=Uniq+1,vars=Vars#{VarBase=>Var}},
+ {Var,St};
+ #{} ->
+ Var = #b_var{name=VarBase},
+ St = St0#cg{vars=Vars#{VarBase=>Var}},
+ {Var,St}
+ end.
+
+set_ssa_var(VarBase, Val, #cg{vars=Vars}=St)
+ when is_atom(VarBase); is_integer(VarBase) ->
+ St#cg{vars=Vars#{VarBase=>Val}}.
+
+%% new_label(St) -> {L,St}.
+
+new_label(#cg{lcount=Next}=St) ->
+ {Next,St#cg{lcount=Next+1}}.
+
+%% line_anno(Le) -> #{} | #{location:={File,Line}}.
+%% Create a location annotation, containing information about the
+%% current filename and line number. The annotation should be
+%% included in any operation that could cause an exception.
+
+line_anno(#k{a=Anno}) ->
+ line_anno_1(Anno).
+
+line_anno_1([Line,{file,Name}]) when is_integer(Line) ->
+ line_anno_2(Name, Line);
+line_anno_1([_|_]=A) ->
+ {Name,Line} = find_loc(A, no_file, 0),
+ line_anno_2(Name, Line);
+line_anno_1([]) ->
+ #{}.
+
+line_anno_2(no_file, _) ->
+ #{};
+line_anno_2(_, 0) ->
+ %% Missing line number or line number 0.
+ #{};
+line_anno_2(Name, Line) ->
+ #{location=>{Name,Line}}.
+
+find_loc([Line|T], File, _) when is_integer(Line) ->
+ find_loc(T, File, Line);
+find_loc([{file,File}|T], _, Line) ->
+ find_loc(T, File, Line);
+find_loc([_|T], File, Line) ->
+ find_loc(T, File, Line);
+find_loc([], File, Line) -> {File,Line}.
+
+flatmapfoldl(F, Accu0, [Hd|Tail]) ->
+ {R,Accu1} = F(Hd, Accu0),
+ {Rs,Accu2} = flatmapfoldl(F, Accu1, Tail),
+ {R++Rs,Accu2};
+flatmapfoldl(_, Accu, []) -> {[],Accu}.
+
+%%%
+%%% Finalize the code.
+%%%
+
+finalize(Asm0, St0) ->
+ Asm1 = fix_phis(Asm0),
+ {Asm,St} = fix_sets(Asm1, [], St0),
+ {build_map(Asm),St}.
+
+fix_phis(Is) ->
+ fix_phis_1(Is, none, #{}).
+
+fix_phis_1([{label,L},#cg_phi{vars=[]}=Phi|Is0], _Lbl, Map0) ->
+ case maps:is_key(L, Map0) of
+ false ->
+ %% No #cg_break{} references this label. Nothing else can
+ %% reference it, so it can be safely be removed.
+ {Is,Map} = drop_upto_label(Is0, Map0),
+ fix_phis_1(Is, none, Map);
+ true ->
+ %% There is a break referencing this label; probably caused
+ %% by a try/catch whose return value is ignored.
+ [{label,L}|fix_phis_1([Phi|Is0], L, Map0)]
+ end;
+fix_phis_1([{label,L}=I|Is], _Lbl, Map) ->
+ [I|fix_phis_1(Is, L, Map)];
+fix_phis_1([#cg_unreachable{}|Is0], _Lbl, Map0) ->
+ {Is,Map} = drop_upto_label(Is0, Map0),
+ fix_phis_1(Is, none, Map);
+fix_phis_1([#cg_break{args=Args,phi=Target}|Is], Lbl, Map) when is_integer(Lbl) ->
+ Pairs1 = case Map of
+ #{Target:=Pairs0} -> Pairs0;
+ #{} -> []
+ end,
+ Pairs = [[{Arg,Lbl} || Arg <- Args]|Pairs1],
+ I = make_uncond_branch(Target),
+ [I|fix_phis_1(Is, none, Map#{Target=>Pairs})];
+fix_phis_1([#cg_phi{vars=Vars}|Is0], Lbl, Map0) ->
+ Pairs = maps:get(Lbl, Map0),
+ Map1 = maps:remove(Lbl, Map0),
+ case gen_phis(Vars, Pairs) of
+ [#b_set{op=phi,args=[]}] ->
+ {Is,Map} = drop_upto_label(Is0, Map1),
+ Ret = #b_ret{arg=#b_literal{val=unreachable}},
+ [Ret|fix_phis_1(Is, none, Map)];
+ Phis ->
+ Phis ++ fix_phis_1(Is0, Lbl, Map1)
+ end;
+fix_phis_1([I|Is], Lbl, Map) ->
+ [I|fix_phis_1(Is, Lbl, Map)];
+fix_phis_1([], _, Map) ->
+ [] = maps:to_list(Map), %Assertion.
+ [].
+
+gen_phis([V|Vs], Preds0) ->
+ {Pairs,Preds} = collect_preds(Preds0, [], []),
+ [#b_set{op=phi,dst=V,args=Pairs}|gen_phis(Vs, Preds)];
+gen_phis([], _) -> [].
+
+collect_preds([[First|Rest]|T], ColAcc, RestAcc) ->
+ collect_preds(T, [First|ColAcc], [Rest|RestAcc]);
+collect_preds([], ColAcc, RestAcc) ->
+ {keysort(2, ColAcc),RestAcc}.
+
+fix_sets([#b_set{dst=none}=Set|Is], Acc, St0) ->
+ {Dst,St} = new_ssa_var('@ssa_ignored', St0),
+ I = Set#b_set{dst=Dst},
+ fix_sets(Is, [I|Acc], St);
+fix_sets([I|Is], Acc, St) ->
+ fix_sets(Is, [I|Acc], St);
+fix_sets([], Acc, St) ->
+ {reverse(Acc),St}.
+
+build_map(Is) ->
+ Blocks = build_graph_1(Is, [], []),
+ maps:from_list(Blocks).
+
+build_graph_1([{label,L}|Is], Lbls, []) ->
+ build_graph_1(Is, [L|Lbls], []);
+build_graph_1([{label,L}|Is], Lbls, [_|_]=BlockAcc) ->
+ make_blocks(Lbls, BlockAcc) ++ build_graph_1(Is, [L], []);
+build_graph_1([I|Is], Lbls, BlockAcc) ->
+ build_graph_1(Is, Lbls, [I|BlockAcc]);
+build_graph_1([], Lbls, BlockAcc) ->
+ make_blocks(Lbls, BlockAcc).
+
+make_blocks(Lbls, [Last|Is0]) ->
+ Is = reverse(Is0),
+ Block = #b_blk{is=Is,last=Last},
+ [{L,Block} || L <- Lbls].
+
+drop_upto_label([{label,_}|_]=Is, Map) ->
+ {Is,Map};
+drop_upto_label([#cg_break{phi=Target}|Is], Map) ->
+ Pairs = case Map of
+ #{Target:=Pairs0} -> Pairs0;
+ #{} -> []
+ end,
+ drop_upto_label(Is, Map#{Target=>Pairs});
+drop_upto_label([_|Is], Map) ->
+ drop_upto_label(Is, Map).
+
+k_get_anno(Thing) -> element(2, Thing).
diff --git a/lib/compiler/src/beam_listing.erl b/lib/compiler/src/beam_listing.erl
index 518b958794..8a0ce5b50a 100644
--- a/lib/compiler/src/beam_listing.erl
+++ b/lib/compiler/src/beam_listing.erl
@@ -23,6 +23,7 @@
-include("core_parse.hrl").
-include("v3_kernel.hrl").
+-include("beam_ssa.hrl").
-include("beam_disasm.hrl").
-import(lists, [foreach/2]).
@@ -41,6 +42,12 @@ module(File, #k_mdef{}=Kern) ->
%% This is a kernel module.
io:put_chars(File, v3_kernel_pp:format(Kern));
%%io:put_chars(File, io_lib:format("~p~n", [Kern]));
+module(File, #b_module{name=Mod,exports=Exp,attributes=Attr,body=Fs}) ->
+ io:format(File, "module ~p.\n", [Mod]),
+ io:format(File, "exports ~p.\n", [Exp]),
+ io:format(File, "attributes ~p.\n\n", [Attr]),
+ PP = [beam_ssa_pp:format_function(F) || F <- Fs],
+ io:put_chars(File, lists:join($\n, PP));
module(Stream, {Mod,Exp,Attr,Code,NumLabels}) ->
%% This is output from v3_codegen.
io:format(Stream, "{module, ~p}. %% version = ~w\n",
diff --git a/lib/compiler/src/beam_peep.erl b/lib/compiler/src/beam_peep.erl
index 2b8dd40e29..74da6aa704 100644
--- a/lib/compiler/src/beam_peep.erl
+++ b/lib/compiler/src/beam_peep.erl
@@ -101,23 +101,13 @@ peep([{select,Op,R,F,Vls0}|Is], SeenTests0, Acc0) ->
I = {jump,F},
peep([I|Is], gb_sets:empty(), Acc0);
[{atom,_}=Value,Lbl] when Op =:= select_val ->
- %% Single value left. Convert to regular test and pop redundant tests.
+ %% Single value left. Convert to regular test.
Is1 = [{test,is_eq_exact,F,[R,Value]},{jump,Lbl}|Is],
- case Acc0 of
- [{test,is_atom,F,[R]}|Acc] ->
- peep(Is1, SeenTests0, Acc);
- _ ->
- peep(Is1, SeenTests0, Acc0)
- end;
+ peep(Is1, SeenTests0, Acc0);
[{integer,_}=Value,Lbl] when Op =:= select_val ->
- %% Single value left. Convert to regular test and pop redundant tests.
+ %% Single value left. Convert to regular test.
Is1 = [{test,is_eq_exact,F,[R,Value]},{jump,Lbl}|Is],
- case Acc0 of
- [{test,is_integer,F,[R]}|Acc] ->
- peep(Is1, SeenTests0, Acc);
- _ ->
- peep(Is1, SeenTests0, Acc0)
- end;
+ peep(Is1, SeenTests0, Acc0);
[Arity,Lbl] when Op =:= select_tuple_arity ->
%% Single value left. Convert to regular test
Is1 = [{test,test_arity,F,[R,Arity]},{jump,Lbl}|Is],
@@ -126,6 +116,21 @@ peep([{select,Op,R,F,Vls0}|Is], SeenTests0, Acc0) ->
I = {select,Op,R,F,Vls},
peep(Is, gb_sets:empty(), [I|Acc0])
end;
+peep([{get_map_elements,Fail,Src,List}=I|Is], _SeenTests, Acc0) ->
+ SeenTests = gb_sets:empty(),
+ case simplify_get_map_elements(Fail, Src, List, Acc0) of
+ {ok,Acc} ->
+ peep(Is, SeenTests, Acc);
+ error ->
+ peep(Is, SeenTests, [I|Acc0])
+ end;
+peep([{test,has_map_fields,Fail,Ops}=I|Is], SeenTests, Acc0) ->
+ case simplify_has_map_fields(Fail, Ops, Acc0) of
+ {ok,Acc} ->
+ peep(Is, SeenTests, Acc);
+ error ->
+ peep(Is, SeenTests, [I|Acc0])
+ end;
peep([{test,Op,_,Ops}=I|Is], SeenTests0, Acc) ->
case beam_utils:is_pure_test(I) of
false ->
@@ -176,3 +181,39 @@ prune_redundant_values([_Val,F|Vls], F) ->
prune_redundant_values([Val,Lbl|Vls], F) ->
[Val,Lbl|prune_redundant_values(Vls, F)];
prune_redundant_values([], _) -> [].
+
+simplify_get_map_elements(Fail, Src, {list,[Key,Dst]},
+ [{get_map_elements,Fail,Src,{list,List1}}|Acc]) ->
+ case are_keys_literals([Key]) andalso are_keys_literals(List1) of
+ true ->
+ case member(Key, List1) of
+ true ->
+ %% The key is already in the other list. That is
+ %% very unusual, because there are optimizations to get
+ %% rid of duplicate keys. Therefore, don't try to
+ %% do anything smart here; just keep the
+ %% get_map_elements instructions separate.
+ error;
+ false ->
+ List = [Key,Dst|List1],
+ {ok,[{get_map_elements,Fail,Src,{list,List}}|Acc]}
+ end;
+ false ->
+ error
+ end;
+simplify_get_map_elements(_, _, _, _) -> error.
+
+simplify_has_map_fields(Fail, [Src|Keys0],
+ [{test,has_map_fields,Fail,[Src|Keys1]}|Acc]) ->
+ case are_keys_literals(Keys0) andalso are_keys_literals(Keys1) of
+ true ->
+ Keys = Keys0 ++ Keys1,
+ {ok,[{test,has_map_fields,Fail,[Src|Keys]}|Acc]};
+ false ->
+ error
+ end;
+simplify_has_map_fields(_, _, _) -> error.
+
+are_keys_literals([{x,_}|_]) -> false;
+are_keys_literals([{y,_}|_]) -> false;
+are_keys_literals([_|_]) -> true.
diff --git a/lib/compiler/src/beam_split.erl b/lib/compiler/src/beam_split.erl
index 809e49b3d0..7b18946ae0 100644
--- a/lib/compiler/src/beam_split.erl
+++ b/lib/compiler/src/beam_split.erl
@@ -44,10 +44,6 @@ split_blocks([I|Is], Acc) ->
split_blocks(Is, [I|Acc]);
split_blocks([], Acc) -> reverse(Acc).
-split_block([{set,[R],[_,_,_]=As,{bif,is_record,{f,Lbl}}}|Is], Bl, Acc) ->
- %% is_record/3 must be translated by beam_clean; therefore,
- %% it must be outside of any block.
- split_block(Is, [], [{bif,is_record,{f,Lbl},As,R}|make_block(Bl, Acc)]);
split_block([{set,[R],As,{bif,N,{f,Lbl}=Fail}}|Is], Bl, Acc) when Lbl =/= 0 ->
split_block(Is, [], [{bif,N,Fail,As,R}|make_block(Bl, Acc)]);
split_block([{set,[],[],{line,_}=Line},
diff --git a/lib/compiler/src/beam_ssa.erl b/lib/compiler/src/beam_ssa.erl
new file mode 100644
index 0000000000..a2766a0501
--- /dev/null
+++ b/lib/compiler/src/beam_ssa.erl
@@ -0,0 +1,616 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2018. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+%% Purpose: Type definitions and utilities for the SSA format.
+
+-module(beam_ssa).
+-export([add_anno/3,get_anno/2,
+ clobbers_xregs/1,def/2,def_used/2,dominators/1,
+ flatmapfold_instrs_rpo/4,
+ fold_po/3,fold_po/4,fold_rpo/3,fold_rpo/4,
+ fold_instrs_rpo/4,
+ linearize/1,
+ mapfold_instrs_rpo/4,
+ predecessors/1,
+ rename_vars/3,
+ rpo/1,rpo/2,
+ split_blocks/3,
+ successors/1,successors/2,
+ update_phi_labels/4,used/1]).
+
+-export_type([b_module/0,b_function/0,b_blk/0,b_set/0,
+ b_ret/0,b_br/0,b_switch/0,terminator/0,
+ b_var/0,b_literal/0,b_remote/0,b_local/0,
+ value/0,argument/0,label/0,
+ var_name/0,var_base/0,literal_value/0,
+ op/0,anno/0,block_map/0]).
+
+-include("beam_ssa.hrl").
+
+-type b_module() :: #b_module{}.
+-type b_function() :: #b_function{}.
+-type b_blk() :: #b_blk{}.
+-type b_set() :: #b_set{}.
+
+-type b_br() :: #b_br{}.
+-type b_ret() :: #b_ret{}.
+-type b_switch() :: #b_switch{}.
+-type terminator() :: b_br() | b_ret() | b_switch().
+
+-type b_var() :: #b_var{}.
+-type b_literal() :: #b_literal{}.
+-type b_remote() :: #b_remote{}.
+-type b_local() :: #b_local{}.
+
+-type value() :: b_var() | b_literal().
+-type phi_value() :: {value(),label()}.
+-type argument() :: value() | b_remote() | b_local() | phi_value().
+-type label() :: non_neg_integer().
+
+-type var_name() :: var_base() | {var_base(),non_neg_integer()}.
+-type var_base() :: atom() | non_neg_integer().
+
+-type literal_value() :: atom() | integer() | float() | list() |
+ nil() | tuple() | map() | binary().
+
+-type op() :: {'bif',atom()} | {'float',float_op()} | prim_op() | cg_prim_op().
+-type anno() :: #{atom() := any()}.
+
+-type block_map() :: #{label():=b_blk()}.
+
+%% Note: By default, dialyzer will collapse this type to atom().
+%% To avoid the collapsing, change the value of SET_LIMIT to 50 in the
+%% file erl_types.erl in the hipe application.
+
+-type prim_op() :: 'bs_add' | 'bs_extract' | 'bs_init' | 'bs_init_writable' |
+ 'bs_match' | 'bs_put' | 'bs_start_match' | 'bs_test_tail' |
+ 'bs_utf16_size' | 'bs_utf8_size' | 'build_stacktrace' |
+ 'call' | 'catch_end' | 'context_to_binary' |
+ 'extract' |
+ 'get_hd' | 'get_map_element' | 'get_tl' | 'get_tuple_element' |
+ 'has_map_field' |
+ 'is_nonempty_list' | 'is_tagged_tuple' |
+ 'kill_try_tag' |
+ 'landingpad' |
+ 'make_fun' | 'new_try_tag' |
+ 'peek_message' | 'phi' | 'put_list' | 'put_map' | 'put_tuple' |
+ 'raw_raise' | 'recv_next' | 'remove_message' | 'resume' |
+ 'set_tuple_element' | 'succeeded' |
+ 'timeout' |
+ 'wait' | 'wait_timeout'.
+
+-type float_op() :: 'checkerror' | 'clearerror' | 'convert' | 'get' | 'put' |
+ '+' | '-' | '*' | '/'.
+
+%% Primops only used internally during code generation.
+-type cg_prim_op() :: 'bs_get' | 'bs_match_string' | 'bs_restore' | 'bs_skip' |
+'copy' | 'put_tuple_arity' | 'put_tuple_element'.
+
+-import(lists, [foldl/3,mapfoldl/3,reverse/1]).
+
+-spec add_anno(Key, Value, Construct) -> Construct when
+ Key :: atom(),
+ Value :: any(),
+ Construct :: b_function() | b_blk() | b_set() | terminator().
+
+add_anno(Key, Val, #b_function{anno=Anno}=Bl) ->
+ Bl#b_function{anno=Anno#{Key=>Val}};
+add_anno(Key, Val, #b_blk{anno=Anno}=Bl) ->
+ Bl#b_blk{anno=Anno#{Key=>Val}};
+add_anno(Key, Val, #b_set{anno=Anno}=Bl) ->
+ Bl#b_set{anno=Anno#{Key=>Val}};
+add_anno(Key, Val, #b_br{anno=Anno}=Bl) ->
+ Bl#b_br{anno=Anno#{Key=>Val}};
+add_anno(Key, Val, #b_ret{anno=Anno}=Bl) ->
+ Bl#b_ret{anno=Anno#{Key=>Val}};
+add_anno(Key, Val, #b_switch{anno=Anno}=Bl) ->
+ Bl#b_switch{anno=Anno#{Key=>Val}}.
+
+-spec get_anno(atom(), b_blk()|b_set()|terminator()) -> any().
+
+get_anno(Key, Construct) ->
+ maps:get(Key, get_anno(Construct)).
+
+get_anno(#b_blk{anno=Anno}) -> Anno;
+get_anno(#b_set{anno=Anno}) -> Anno;
+get_anno(#b_br{anno=Anno}) -> Anno;
+get_anno(#b_ret{anno=Anno}) -> Anno;
+get_anno(#b_switch{anno=Anno}) -> Anno.
+
+%% clobbers_xregs(#b_set{}) -> true|false.
+%% Test whether the instruction invalidates all X registers.
+
+-spec clobbers_xregs(b_set()) -> boolean().
+
+clobbers_xregs(#b_set{op=Op}) ->
+ case Op of
+ bs_init_writable -> true;
+ build_stacktrace -> true;
+ call -> true;
+ landingpad -> true;
+ make_fun -> true;
+ peek_message -> true;
+ raw_raise -> true;
+ _ -> false
+ end.
+
+-spec predecessors(Blocks) -> #{BlockNumber:=[Predecessor]} when
+ Blocks :: block_map(),
+ BlockNumber :: label(),
+ Predecessor :: label().
+
+predecessors(Blocks) ->
+ P0 = [{S,L} || {L,Blk} <- maps:to_list(Blocks),
+ S <- successors(Blk)],
+ P1 = sofs:relation(P0),
+ P2 = sofs:rel2fam(P1),
+ P3 = sofs:to_external(P2),
+ P = [{0,[]}|P3],
+ maps:from_list(P).
+
+-spec successors(b_blk()) -> [label()].
+
+successors(#b_blk{last=Terminator}) ->
+ case Terminator of
+ #b_br{bool=#b_literal{val=true},succ=Succ} ->
+ [Succ];
+ #b_br{bool=#b_literal{val=false},fail=Fail} ->
+ [Fail];
+ #b_br{succ=Succ,fail=Fail} ->
+ [Fail,Succ];
+ #b_switch{fail=Fail,list=List} ->
+ [Fail|[L || {_,L} <- List]];
+ #b_ret{} ->
+ []
+ end.
+
+-spec successors(label(), block_map()) -> [label()].
+
+successors(L, Blocks) ->
+ successors(maps:get(L, Blocks)).
+
+-spec def(Ls, Blocks) -> Def when
+ Ls :: [label()],
+ Blocks :: block_map(),
+ Def :: ordsets:ordset(var_name()).
+
+def(Ls, Blocks) ->
+ Top = rpo(Ls, Blocks),
+ Blks = [maps:get(L, Blocks) || L <- Top],
+ def_1(Blks, []).
+
+-spec def_used(Ls, Blocks) -> {Def,Used} when
+ Ls :: [label()],
+ Blocks :: block_map(),
+ Def :: ordsets:ordset(var_name()),
+ Used :: ordsets:ordset(var_name()).
+
+def_used(Ls, Blocks) ->
+ Top = rpo(Ls, Blocks),
+ Blks = [maps:get(L, Blocks) || L <- Top],
+ Preds = gb_sets:from_list(Top),
+ def_used_1(Blks, Preds, [], gb_sets:empty()).
+
+-spec dominators(Blocks) -> Result when
+ Blocks :: block_map(),
+ Result :: #{label():=ordsets:ordset(label())}.
+
+dominators(Blocks) ->
+ Preds = predecessors(Blocks),
+ Top0 = rpo(Blocks),
+ Top = [{L,maps:get(L, Preds)} || L <- Top0],
+
+ %% The flow graph for an Erlang function is reducible, and
+ %% therefore one traversal in reverse postorder is sufficient.
+ iter_dominators(Top, #{}).
+
+-spec fold_instrs_rpo(Fun, From, Acc0, Blocks) -> any() when
+ Fun :: fun((b_blk()|terminator(), any()) -> any()),
+ From :: [label()],
+ Acc0 :: any(),
+ Blocks :: block_map().
+
+fold_instrs_rpo(Fun, From, Acc0, Blocks) ->
+ Top = rpo(From, Blocks),
+ fold_instrs_rpo_1(Top, Fun, Blocks, Acc0).
+
+-spec mapfold_instrs_rpo(Fun, From, Acc0, Blocks0) -> {Blocks,Acc} when
+ Fun :: fun((b_blk()|terminator(), any()) -> any()),
+ From :: [label()],
+ Acc0 :: any(),
+ Acc :: any(),
+ Blocks0 :: block_map(),
+ Blocks :: block_map().
+
+mapfold_instrs_rpo(Fun, From, Acc0, Blocks) ->
+ Top = rpo(From, Blocks),
+ mapfold_instrs_rpo_1(Top, Fun, Blocks, Acc0).
+
+-spec flatmapfold_instrs_rpo(Fun, From, Acc0, Blocks0) -> {Blocks,Acc} when
+ Fun :: fun((b_blk()|terminator(), any()) -> any()),
+ From :: [label()],
+ Acc0 :: any(),
+ Acc :: any(),
+ Blocks0 :: block_map(),
+ Blocks :: block_map().
+
+flatmapfold_instrs_rpo(Fun, From, Acc0, Blocks) ->
+ Top = rpo(From, Blocks),
+ flatmapfold_instrs_rpo_1(Top, Fun, Blocks, Acc0).
+
+-type fold_fun() :: fun((label(), b_blk(), any()) -> any()).
+
+%% fold_rpo(Fun, [Label], Acc0, Blocks) -> Acc.
+%% Fold over all blocks a reverse postorder traversal of the block
+%% graph; that is, first visit a block, then visit its successors.
+
+-spec fold_rpo(Fun, Acc0, Blocks) -> any() when
+ Fun :: fold_fun(),
+ Acc0 :: any(),
+ Blocks :: #{label():=b_blk()}.
+
+fold_rpo(Fun, Acc0, Blocks) ->
+ fold_rpo(Fun, [0], Acc0, Blocks).
+
+%% fold_rpo(Fun, [Label], Acc0, Blocks) -> Acc. Fold over all blocks
+%% reachable from a given set of labels in a reverse postorder
+%% traversal of the block graph; that is, first visit a block, then
+%% visit its successors.
+
+-spec fold_rpo(Fun, Labels, Acc0, Blocks) -> any() when
+ Fun :: fold_fun(),
+ Labels :: [label()],
+ Acc0 :: any(),
+ Blocks :: #{label():=b_blk()}.
+
+fold_rpo(Fun, From, Acc0, Blocks) ->
+ Top = rpo(From, Blocks),
+ fold_rpo_1(Top, Fun, Blocks, Acc0).
+
+%% fold_po(Fun, Acc0, Blocks) -> Acc.
+%% Fold over all blocks in a postorder traversal of the block graph;
+%% that is, first visit all successors of block, then the block
+%% itself.
+
+-spec fold_po(Fun, Acc0, Blocks) -> any() when
+ Fun :: fold_fun(),
+ Acc0 :: any(),
+ Blocks :: #{label():=b_blk()}.
+
+%% fold_po(Fun, From, Acc0, Blocks) -> Acc.
+%% Fold over the blocks reachable from the block numbers given
+%% by From in a postorder traversal of the block graph.
+
+fold_po(Fun, Acc0, Blocks) ->
+ fold_po(Fun, [0], Acc0, Blocks).
+
+-spec fold_po(Fun, Labels, Acc0, Blocks) -> any() when
+ Fun :: fold_fun(),
+ Labels :: [label()],
+ Acc0 :: any(),
+ Blocks :: block_map().
+
+fold_po(Fun, From, Acc0, Blocks) ->
+ Top = reverse(rpo(From, Blocks)),
+ fold_rpo_1(Top, Fun, Blocks, Acc0).
+
+%% linearize(Blocks) -> [{BlockLabel,#b_blk{}}].
+%% Linearize the intermediate representation of the code.
+
+-spec linearize(Blocks) -> Linear when
+ Blocks :: block_map(),
+ Linear :: [{label(),b_blk()}].
+
+linearize(Blocks) ->
+ Seen = gb_sets:empty(),
+ {Linear,_} = linearize_1([0], Blocks, Seen, []),
+ Linear.
+
+-spec rpo(Blocks) -> [Label] when
+ Blocks :: block_map(),
+ Label :: label().
+
+rpo(Blocks) ->
+ rpo([0], Blocks).
+
+-spec rpo(From, Blocks) -> Labels when
+ From :: [label()],
+ Blocks :: block_map(),
+ Labels :: [label()].
+
+rpo(From, Blocks) ->
+ Seen = gb_sets:empty(),
+ {Ls,_} = rpo_1(From, Blocks, Seen, []),
+ Ls.
+
+-spec rename_vars(Rename, [label()], block_map()) -> block_map() when
+ Rename :: [{var_name(),value()}] | #{var_name():=value()}.
+
+rename_vars(Rename, From, Blocks) when is_list(Rename) ->
+ rename_vars(maps:from_list(Rename), From, Blocks);
+rename_vars(Rename, From, Blocks) when is_map(Rename)->
+ Top = rpo(From, Blocks),
+ Preds = gb_sets:from_list(Top),
+ F = fun(#b_set{op=phi,args=Args0}=Set) ->
+ Args = rename_phi_vars(Args0, Preds, Rename),
+ Set#b_set{args=Args};
+ (#b_set{args=Args0}=Set) ->
+ Args = [rename_var(A, Rename) || A <- Args0],
+ Set#b_set{args=Args};
+ (#b_switch{arg=Bool}=Sw) ->
+ Sw#b_switch{arg=rename_var(Bool, Rename)};
+ (#b_br{bool=Bool}=Br) ->
+ Br#b_br{bool=rename_var(Bool, Rename)};
+ (#b_ret{arg=Arg}=Ret) ->
+ Ret#b_ret{arg=rename_var(Arg, Rename)}
+ end,
+ map_instrs_1(Top, F, Blocks).
+
+%% split_blocks(Predicate, Blocks0, Count0) -> {Blocks,Count}.
+%% Call Predicate(Instruction) for each instruction in all
+%% blocks. If Predicate/1 returns true, split the block
+%% before this instruction.
+
+-spec split_blocks(Pred, Blocks0, Count0) -> {Blocks,Count} when
+ Pred :: fun((b_set()) -> boolean()),
+ Blocks :: block_map(),
+ Count0 :: beam_ssa:label(),
+ Blocks0 :: block_map(),
+ Blocks :: block_map(),
+ Count :: beam_ssa:label().
+
+split_blocks(P, Blocks, Count) ->
+ Ls = beam_ssa:rpo(Blocks),
+ split_blocks_1(Ls, P, Blocks, Count).
+
+%% update_phi_labels([BlockLabel], Old, New, Blocks0) -> Blocks.
+%% In the given blocks, replace label Old in with New in all
+%% phi nodes. This is useful after merging or splitting
+%% blocks.
+
+-spec update_phi_labels(From, Old, New, Blocks0) -> Blocks when
+ From :: [label()],
+ Old :: label(),
+ New :: label(),
+ Blocks0 :: block_map(),
+ Blocks :: block_map().
+
+update_phi_labels([L|Ls], Old, New, Blocks0) ->
+ case Blocks0 of
+ #{L:=#b_blk{is=[#b_set{op=phi}|_]=Is0}=Blk0} ->
+ Is = update_phi_labels_is(Is0, Old, New),
+ Blk = Blk0#b_blk{is=Is},
+ Blocks = Blocks0#{L:=Blk},
+ update_phi_labels(Ls, Old, New, Blocks);
+ #{L:=#b_blk{}} ->
+ %% No phi nodes in this block.
+ update_phi_labels(Ls, Old, New, Blocks0)
+ end;
+update_phi_labels([], _, _, Blocks) -> Blocks.
+
+-spec used(b_blk() | b_set() | terminator()) -> [var_name()].
+
+used(#b_blk{is=Is,last=Last}) ->
+ used_1([Last|Is], ordsets:new());
+used(#b_br{bool=#b_var{name=V}}) ->
+ [V];
+used(#b_ret{arg=#b_var{name=V}}) ->
+ [V];
+used(#b_set{op=phi,args=Args}) ->
+ ordsets:from_list([V || {#b_var{name=V},_} <- Args]);
+used(#b_set{args=Args}) ->
+ ordsets:from_list(used_args(Args));
+used(#b_switch{arg=#b_var{name=V}}) ->
+ [V];
+used(_) -> [].
+
+%%%
+%%% Internal functions.
+%%%
+
+def_used_1([#b_blk{is=Is,last=Last}|Bs], Preds, Def0, Used0) ->
+ {Def,Used1} = def_used_is(Is, Preds, Def0, Used0),
+ Used = gb_sets:union(gb_sets:from_list(used(Last)), Used1),
+ def_used_1(Bs, Preds, Def, Used);
+def_used_1([], _Preds, Def, Used) ->
+ {ordsets:from_list(Def),gb_sets:to_list(Used)}.
+
+def_used_is([#b_set{op=phi,dst=#b_var{name=Dst},args=Args}|Is],
+ Preds, Def0, Used0) ->
+ Def = [Dst|Def0],
+ %% We must be careful to only include variables that will
+ %% be used when arriving from one of the predecessor blocks
+ %% in Preds.
+ Used1 = [V || {#b_var{name=V},L} <- Args,
+ gb_sets:is_member(L, Preds)],
+ Used = gb_sets:union(gb_sets:from_list(Used1), Used0),
+ def_used_is(Is, Preds, Def, Used);
+def_used_is([#b_set{dst=#b_var{name=Dst}}=I|Is], Preds, Def0, Used0) ->
+ Def = [Dst|Def0],
+ Used = gb_sets:union(gb_sets:from_list(used(I)), Used0),
+ def_used_is(Is, Preds, Def, Used);
+def_used_is([], _Preds, Def, Used) ->
+ {Def,Used}.
+
+def_1([#b_blk{is=Is}|Bs], Def0) ->
+ Def = def_is(Is, Def0),
+ def_1(Bs, Def);
+def_1([], Def) ->
+ ordsets:from_list(Def).
+
+def_is([#b_set{dst=#b_var{name=Dst}}|Is], Def) ->
+ def_is(Is, [Dst|Def]);
+def_is([], Def) -> Def.
+
+iter_dominators([{0,[]}|Ls], _Doms) ->
+ Dom = [0],
+ iter_dominators(Ls, #{0=>Dom});
+iter_dominators([{L,Preds}|Ls], Doms) ->
+ DomPreds = [maps:get(P, Doms) || P <- Preds, maps:is_key(P, Doms)],
+ Dom = ordsets:add_element(L, ordsets:intersection(DomPreds)),
+ iter_dominators(Ls, Doms#{L=>Dom});
+iter_dominators([], Doms) -> Doms.
+
+fold_rpo_1([L|Ls], Fun, Blocks, Acc0) ->
+ Block = maps:get(L, Blocks),
+ Acc = Fun(L, Block, Acc0),
+ fold_rpo_1(Ls, Fun, Blocks, Acc);
+fold_rpo_1([], _, _, Acc) -> Acc.
+
+fold_instrs_rpo_1([L|Ls], Fun, Blocks, Acc0) ->
+ #b_blk{is=Is,last=Last} = maps:get(L, Blocks),
+ Acc1 = foldl(Fun, Acc0, Is),
+ Acc = Fun(Last, Acc1),
+ fold_instrs_rpo_1(Ls, Fun, Blocks, Acc);
+fold_instrs_rpo_1([], _, _, Acc) -> Acc.
+
+mapfold_instrs_rpo_1([L|Ls], Fun, Blocks0, Acc0) ->
+ #b_blk{is=Is0,last=Last0} = Block0 = maps:get(L, Blocks0),
+ {Is,Acc1} = mapfoldl(Fun, Acc0, Is0),
+ {Last,Acc} = Fun(Last0, Acc1),
+ Block = Block0#b_blk{is=Is,last=Last},
+ Blocks = maps:put(L, Block, Blocks0),
+ mapfold_instrs_rpo_1(Ls, Fun, Blocks, Acc);
+mapfold_instrs_rpo_1([], _, Blocks, Acc) ->
+ {Blocks,Acc}.
+
+flatmapfold_instrs_rpo_1([L|Ls], Fun, Blocks0, Acc0) ->
+ #b_blk{is=Is0,last=Last0} = Block0 = maps:get(L, Blocks0),
+ {Is,Acc1} = flatmapfoldl(Fun, Acc0, Is0),
+ {[Last],Acc} = Fun(Last0, Acc1),
+ Block = Block0#b_blk{is=Is,last=Last},
+ Blocks = maps:put(L, Block, Blocks0),
+ flatmapfold_instrs_rpo_1(Ls, Fun, Blocks, Acc);
+flatmapfold_instrs_rpo_1([], _, Blocks, Acc) ->
+ {Blocks,Acc}.
+
+linearize_1([L|Ls], Blocks, Seen0, Acc0) ->
+ case gb_sets:is_member(L, Seen0) of
+ true ->
+ linearize_1(Ls, Blocks, Seen0, Acc0);
+ false ->
+ Seen1 = gb_sets:insert(L, Seen0),
+ Block = maps:get(L, Blocks),
+ Successors = successors(Block),
+ {Acc,Seen} = linearize_1(Successors, Blocks, Seen1, Acc0),
+ linearize_1(Ls, Blocks, Seen, [{L,Block}|Acc])
+ end;
+linearize_1([], _, Seen, Acc) ->
+ {Acc,Seen}.
+
+rpo_1([L|Ls], Blocks, Seen0, Acc0) ->
+ case gb_sets:is_member(L, Seen0) of
+ true ->
+ rpo_1(Ls, Blocks, Seen0, Acc0);
+ false ->
+ Block = maps:get(L, Blocks),
+ Seen1 = gb_sets:insert(L, Seen0),
+ Successors = successors(Block),
+ {Acc,Seen} = rpo_1(Successors, Blocks, Seen1, Acc0),
+ rpo_1(Ls, Blocks, Seen, [L|Acc])
+ end;
+rpo_1([], _, Seen, Acc) ->
+ {Acc,Seen}.
+
+rename_var(#b_var{name=Old}=V, Rename) ->
+ case Rename of
+ #{Old:=New} -> New;
+ #{} -> V
+ end;
+rename_var(#b_remote{mod=Mod0,name=Name0}=Remote, Rename) ->
+ Mod = rename_var(Mod0, Rename),
+ Name = rename_var(Name0, Rename),
+ Remote#b_remote{mod=Mod,name=Name};
+rename_var(Old, _) -> Old.
+
+rename_phi_vars([{Var,L}|As], Preds, Ren) ->
+ case gb_sets:is_member(L, Preds) of
+ true ->
+ [{rename_var(Var, Ren),L}|rename_phi_vars(As, Preds, Ren)];
+ false ->
+ [{Var,L}|rename_phi_vars(As, Preds, Ren)]
+ end;
+rename_phi_vars([], _, _) -> [].
+
+map_instrs_1([L|Ls], Fun, Blocks0) ->
+ #b_blk{is=Is0,last=Last0} = Blk0 = maps:get(L, Blocks0),
+ Is = [Fun(I) || I <- Is0],
+ Last = Fun(Last0),
+ Blk = Blk0#b_blk{is=Is,last=Last},
+ Blocks = maps:put(L, Blk, Blocks0),
+ map_instrs_1(Ls, Fun, Blocks);
+map_instrs_1([], _, Blocks) -> Blocks.
+
+flatmapfoldl(F, Accu0, [Hd|Tail]) ->
+ {R,Accu1} = F(Hd, Accu0),
+ {Rs,Accu2} = flatmapfoldl(F, Accu1, Tail),
+ {R++Rs,Accu2};
+flatmapfoldl(_, Accu, []) -> {[],Accu}.
+
+split_blocks_1([L|Ls], P, Blocks0, Count0) ->
+ #b_blk{is=Is0} = Blk = maps:get(L, Blocks0),
+ case split_blocks_is(Is0, P, []) of
+ {yes,Bef,Aft} ->
+ NewLbl = Count0,
+ Count = Count0 + 1,
+ Br = #b_br{bool=#b_literal{val=true},succ=NewLbl,fail=NewLbl},
+ BefBlk = Blk#b_blk{is=Bef,last=Br},
+ NewBlk = Blk#b_blk{is=Aft},
+ Blocks1 = Blocks0#{L:=BefBlk,NewLbl=>NewBlk},
+ Successors = beam_ssa:successors(NewBlk),
+ Blocks = beam_ssa:update_phi_labels(Successors, L, NewLbl, Blocks1),
+ split_blocks_1([NewLbl|Ls], P, Blocks, Count);
+ no ->
+ split_blocks_1(Ls, P, Blocks0, Count0)
+ end;
+split_blocks_1([], _, Blocks, Count) ->
+ {Blocks,Count}.
+
+split_blocks_is([I|Is], P, []) ->
+ split_blocks_is(Is, P, [I]);
+split_blocks_is([I|Is], P, Acc) ->
+ case P(I) of
+ true ->
+ {yes,reverse(Acc),[I|Is]};
+ false ->
+ split_blocks_is(Is, P, [I|Acc])
+ end;
+split_blocks_is([], _, _) -> no.
+
+update_phi_labels_is([#b_set{op=phi,args=Args0}=I0|Is], Old, New) ->
+ Args = [{Arg,rename_label(Lbl, Old, New)} || {Arg,Lbl} <- Args0],
+ I = I0#b_set{args=Args},
+ [I|update_phi_labels_is(Is, Old, New)];
+update_phi_labels_is(Is, _, _) -> Is.
+
+rename_label(Old, Old, New) -> New;
+rename_label(Lbl, _Old, _New) -> Lbl.
+
+used_args([#b_var{name=V}|As]) ->
+ [V|used_args(As)];
+used_args([#b_remote{mod=Mod,name=Name}|As]) ->
+ used_args([Mod,Name|As]);
+used_args([_|As]) ->
+ used_args(As);
+used_args([]) -> [].
+
+used_1([H|T], Used0) ->
+ Used = ordsets:union(used(H), Used0),
+ used_1(T, Used);
+used_1([], Used) -> Used.
diff --git a/lib/compiler/src/beam_ssa.hrl b/lib/compiler/src/beam_ssa.hrl
new file mode 100644
index 0000000000..fa76b08453
--- /dev/null
+++ b/lib/compiler/src/beam_ssa.hrl
@@ -0,0 +1,66 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2018. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+-record(b_module, {anno=#{} :: beam_ssa:anno(),
+ name :: module(),
+ exports :: [{atom(),arity()}],
+ attributes :: list(),
+ body :: [beam_ssa:b_function()]}).
+-record(b_function, {anno=#{} :: beam_ssa:anno(),
+ args :: [beam_ssa:b_var()],
+ bs :: #{beam_ssa:label():=beam_ssa:b_blk()},
+ cnt :: beam_ssa:label()}).
+
+-record(b_blk, {anno=#{} :: beam_ssa:anno(),
+ is :: [beam_ssa:b_set()],
+ last :: beam_ssa:terminator()}).
+-record(b_set, {anno=#{} :: beam_ssa:anno(),
+ dst=none :: 'none'|beam_ssa:b_var(),
+ op :: beam_ssa:op(),
+ args=[] :: [beam_ssa:argument()]}).
+
+%% Terminators.
+-record(b_ret, {anno=#{} :: beam_ssa:anno(),
+ arg :: beam_ssa:value()}).
+
+-record(b_br, {anno=#{},
+ bool :: beam_ssa:value(),
+ succ :: beam_ssa:label(),
+ fail :: beam_ssa:label()}).
+
+-record(b_switch, {anno=#{} :: beam_ssa:anno(),
+ arg :: beam_ssa:value(),
+ fail :: beam_ssa:label(),
+ list :: [{beam_ssa:b_literal(),beam_ssa:label()}]}).
+
+%% Values.
+-record(b_var, {name :: beam_ssa:var_name()}).
+
+-record(b_literal, {val :: beam_ssa:literal_value()}).
+
+-record(b_remote, {mod :: beam_ssa:value(),
+ name :: beam_ssa:value(),
+ arity :: non_neg_integer()}).
+
+-record(b_local, {name :: beam_ssa:b_literal(),
+ arity :: non_neg_integer()}).
+
+%% If this block exists, it calls erlang:error(badarg).
+-define(BADARG_BLOCK, 1).
diff --git a/lib/compiler/src/beam_ssa_codegen.erl b/lib/compiler/src/beam_ssa_codegen.erl
new file mode 100644
index 0000000000..357352269c
--- /dev/null
+++ b/lib/compiler/src/beam_ssa_codegen.erl
@@ -0,0 +1,1827 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2018. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+%% Purpose: Generate BEAM assembly code from the SSA format.
+
+-module(beam_ssa_codegen).
+
+-export([module/2]).
+-export([classify_heap_need/2]). %Called from beam_ssa_pre_codegen.
+
+-export_type([ssa_register/0]).
+
+-include("beam_ssa.hrl").
+
+-import(lists, [foldl/3,keymember/3,keysort/2,last/1,map/2,mapfoldl/3,
+ reverse/1,reverse/2,sort/1,splitwith/2,takewhile/2]).
+
+-record(cg, {lcount=1 :: beam_label(), %Label counter
+ functable=#{} :: #{fa()=>beam_label()},
+ labels=#{} :: #{ssa_label()=>0|beam_label()},
+ used_labels=gb_sets:empty() :: gb_sets:set(ssa_label()),
+ regs=#{} :: #{beam_ssa:var_name()=>ssa_register()},
+ ultimate_fail=1 :: beam_label(),
+ catches=gb_sets:empty() :: gb_sets:set(ssa_label())
+ }).
+
+-spec module(beam_ssa:b_module(), [compile:option()]) ->
+ {'ok',beam_asm:module_code()}.
+
+module(#b_module{name=Mod,exports=Es,attributes=Attrs,body=Fs}, _Opts) ->
+ {Asm,St} = functions(Fs, {atom,Mod}),
+ {ok,{Mod,Es,Attrs,Asm,St#cg.lcount}}.
+
+-record(need, {h=0 :: non_neg_integer(),
+ f=0 :: non_neg_integer()}).
+
+-record(cg_blk, {anno=#{} :: anno(),
+ is=[] :: [instruction()],
+ last :: terminator()}).
+
+-record(cg_set, {anno=#{} :: anno(),
+ dst :: b_var(),
+ op :: beam_ssa:op(),
+ args :: [beam_ssa:argument() | xreg()]}).
+
+-record(cg_alloc, {anno=#{} :: anno(),
+ stack=none :: 'none' | pos_integer(),
+ words=#need{} :: #need{},
+ live :: 'undefined' | pos_integer(),
+ def_yregs=[] :: [yreg()]
+ }).
+
+-record(cg_br, {bool :: beam_ssa:value(),
+ succ :: ssa_label(),
+ fail :: ssa_label()
+ }).
+-record(cg_ret, {arg :: cg_value(),
+ dealloc=none :: 'none' | pos_integer()
+ }).
+-record(cg_switch, {arg :: cg_value(),
+ fail :: ssa_label(),
+ list :: [sw_list_item()]
+ }).
+
+-type fa() :: {beam_asm:function_name(),arity()}.
+-type ssa_label() :: beam_ssa:label().
+-type beam_label() :: beam_asm:label().
+
+-type anno() :: beam_ssa:anno().
+
+-type b_var() :: beam_ssa:b_var().
+-type b_literal() :: beam_ssa:b_literal().
+
+-type cg_value() :: beam_ssa:value() | xreg().
+
+-type cg_set() :: #cg_set{}.
+-type cg_alloc() :: #cg_alloc{}.
+
+-type instruction() :: cg_set() | cg_alloc().
+
+-type cg_br() :: #cg_br{}.
+-type cg_ret() :: #cg_ret{}.
+-type cg_switch() :: #cg_switch{}.
+-type terminator() :: cg_br() | cg_ret() | cg_switch().
+
+-type sw_list_item() :: {b_literal(),ssa_label()}.
+
+-type reg_num() :: beam_asm:reg_num().
+-type xreg() :: {'x',reg_num()}.
+-type yreg() :: {'y',reg_num()}.
+
+-type ssa_register() :: xreg() | yreg() | {'fr',reg_num()} | {'z',reg_num()}.
+
+functions(Forms, AtomMod) ->
+ mapfoldl(fun (F, St) -> function(F, AtomMod, St) end, #cg{lcount=1}, Forms).
+
+function(#b_function{anno=Anno,bs=Blocks}, AtomMod, St0) ->
+ #{func_info:={_,Name,Arity}} = Anno,
+ try
+ assert_badarg_block(Blocks), %Assertion.
+ Regs = maps:get(registers, Anno),
+ St1 = St0#cg{labels=#{},used_labels=gb_sets:empty(),
+ regs=Regs},
+ {Fi,St2} = new_label(St1), %FuncInfo label
+ {Entry,St3} = local_func_label(Name, Arity, St2),
+ {Ult,St4} = new_label(St3), %Ultimate failure
+ Labels = (St4#cg.labels)#{0=>Entry,?BADARG_BLOCK=>0},
+ St5 = St4#cg{labels=Labels,used_labels=gb_sets:singleton(Entry),
+ ultimate_fail=Ult},
+ {Body,St} = cg_fun(Blocks, St5),
+ Asm = [{label,Fi},line(Anno),
+ {func_info,AtomMod,{atom,Name},Arity}] ++ Body ++
+ [{label,Ult},if_end],
+ Func = {function,Name,Arity,Entry,Asm},
+ {Func,St}
+ catch
+ Class:Error:Stack ->
+ io:fwrite("Function: ~w/~w\n", [Name,Arity]),
+ erlang:raise(Class, Error, Stack)
+ end.
+
+assert_badarg_block(Blocks) ->
+ %% Assertion: ?BADARG_BLOCK must be the call erlang:error(badarg).
+ case Blocks of
+ #{?BADARG_BLOCK:=Blk} ->
+ #b_blk{is=[#b_set{op=call,dst=Ret,
+ args=[#b_remote{mod=#b_literal{val=erlang},
+ name=#b_literal{val=error}},
+ #b_literal{val=badarg}]}],
+ last=#b_ret{arg=Ret}} = Blk,
+ ok;
+ #{} ->
+ %% ?BADARG_BLOCK has been removed because it was never used.
+ ok
+ end.
+
+cg_fun(Blocks, St0) ->
+ Linear0 = linearize(Blocks),
+ St = collect_catch_labels(Linear0, St0),
+ Linear1 = need_heap(Linear0),
+ Linear2 = prefer_xregs(Linear1, St),
+ Linear3 = liveness(Linear2, St),
+ Linear4 = defined(Linear3, St),
+ Linear = opt_allocate(Linear4, St),
+ cg_linear(Linear, St).
+
+%% collect_catch_labels(Linear, St) -> St.
+%% Collect all catch labels (labels for blocks that begin
+%% with 'landingpad' instructions) for later use.
+
+collect_catch_labels(Linear, St) ->
+ Labels = collect_catch_labels_1(Linear),
+ St#cg{catches=gb_sets:from_list(Labels)}.
+
+collect_catch_labels_1([{L,#cg_blk{is=[#cg_set{op=landingpad}|_]}}|Bs]) ->
+ [L|collect_catch_labels_1(Bs)];
+collect_catch_labels_1([_|Bs]) ->
+ collect_catch_labels_1(Bs);
+collect_catch_labels_1([]) -> [].
+
+%% need_heap([{BlockLabel,Block]) -> [{BlockLabel,Block}].
+%% Insert need_heap instructions in the instruction list. Try to be smart and
+%% collect them together as much as possible.
+
+need_heap(Bs0) ->
+ Bs1 = need_heap_allocs(Bs0, #{}),
+ {Bs,#need{h=0,f=0}} = need_heap_blks(reverse(Bs1), #need{}, []),
+ Bs.
+
+need_heap_allocs([{L,#cg_blk{is=Is0,last=Terminator}=Blk0}|Bs], Counts0) ->
+ Next = next_block(Bs),
+ Successors = successors(Terminator),
+ Counts = foldl(fun(S, Cnts) ->
+ case Cnts of
+ #{S:=C} -> Cnts#{S:=C+1};
+ #{} when S =:= Next -> Cnts#{S=>1};
+ #{} -> Cnts#{S=>42}
+ end
+ end, Counts0, Successors),
+ case Counts of
+ #{L:=1} ->
+ [{L,Blk0}|need_heap_allocs(Bs, Counts)];
+ #{L:=_} ->
+ %% This block has multiple predecessors. Force an allocation
+ %% in this block so that the predecessors don't need to do
+ %% an allocation on behalf of this block.
+ Is = case need_heap_never(Is0) of
+ true -> Is0;
+ false -> [#cg_alloc{}|Is0]
+ end,
+ Blk = Blk0#cg_blk{is=Is},
+ [{L,Blk}|need_heap_allocs(Bs, Counts)];
+ #{} ->
+ [{L,Blk0}|need_heap_allocs(Bs, Counts)]
+ end;
+need_heap_allocs([], _) -> [].
+
+need_heap_never([#cg_alloc{}|_]) -> true;
+need_heap_never([#cg_set{op=recv_next}|_]) -> true;
+need_heap_never([#cg_set{op=wait}|_]) -> true;
+need_heap_never(_) -> false.
+
+need_heap_blks([{L,#cg_blk{is=Is0}=Blk0}|Bs], H0, Acc) ->
+ {Is1,H1} = need_heap_is(reverse(Is0), H0, []),
+ {Ns,H} = need_heap_terminator(Bs, H1),
+ Is = Ns ++ Is1,
+ Blk = Blk0#cg_blk{is=Is},
+ need_heap_blks(Bs, H, [{L,Blk}|Acc]);
+need_heap_blks([], H, Acc) ->
+ {Acc,H}.
+
+need_heap_is([#cg_alloc{words=Words}=Alloc0|Is], N, Acc) ->
+ Alloc = Alloc0#cg_alloc{words=add_heap_words(N, Words)},
+ need_heap_is(Is, #need{}, [Alloc|Acc]);
+need_heap_is([#cg_set{op=Op,args=Args}=I|Is], N, Acc) ->
+ case classify_heap_need(Op, Args) of
+ {put,Words} ->
+ %% Pass through adding to needed heap.
+ need_heap_is(Is, add_heap_words(N, Words), [I|Acc]);
+ put_float ->
+ need_heap_is(Is, add_heap_float(N), [I|Acc]);
+ neutral ->
+ need_heap_is(Is, N, [I|Acc]);
+ gc ->
+ need_heap_is(Is, #need{}, [I]++need_heap_need(N)++Acc)
+ end;
+need_heap_is([], N, Acc) ->
+ {Acc,N}.
+
+need_heap_terminator([{_,#cg_blk{last=#cg_br{succ=Same,fail=Same}}}|_], N) ->
+ {[],N};
+need_heap_terminator([{_,#cg_blk{}}|_], N) ->
+ {need_heap_need(N),#need{}};
+need_heap_terminator([], H) ->
+ {need_heap_need(H),#need{}}.
+
+need_heap_need(#need{h=0,f=0}) -> [];
+need_heap_need(#need{}=N) -> [#cg_alloc{words=N}].
+
+add_heap_words(#need{h=H1,f=F1}, #need{h=H2,f=F2}) ->
+ #need{h=H1+H2,f=F1+F2};
+add_heap_words(#need{h=Heap}=N, Words) when is_integer(Words) ->
+ N#need{h=Heap+Words}.
+
+add_heap_float(#need{f=F}=N) ->
+ N#need{f=F+1}.
+
+%% classify_heap_need(Operation, Arguments) ->
+%% gc | neutral | {put,Words} | put_float.
+%% Classify the heap need for this instruction. The return
+%% values have the following meaning.
+%%
+%% {put,Words} means that the instruction will use Words words to build
+%% something on the heap.
+%%
+%% 'put_float' means that the instruction will build one floating point
+%% number on the heap.
+%%
+%% 'gc' means that that the instruction can potentially do a GC or throw an
+%% exception. That means that an allocation instruction for any building
+%% must be placed after this instruction.
+%%
+%% 'neutral' means that the instruction does nothing to disturb the heap.
+
+-spec classify_heap_need(beam_ssa:op(), [beam_ssa:value()]) ->
+ 'gc' | 'neutral' |
+ {'put',non_neg_integer()} | 'put_float'.
+
+classify_heap_need(put_list, _) ->
+ {put,2};
+classify_heap_need(put_tuple_arity, [#b_literal{val=Words}]) ->
+ {put,Words+1};
+classify_heap_need({bif,Name}, Args) ->
+ case is_gc_bif(Name, Args) of
+ false -> neutral;
+ true -> gc
+ end;
+classify_heap_need({float,Op}, _Args) ->
+ case Op of
+ get -> put_float;
+ _ -> neutral
+ end;
+classify_heap_need(Name, _Args) ->
+ classify_heap_need(Name).
+
+%% classify_heap_need(Operation) -> gc | neutral.
+%% Return either 'gc' or 'neutral'.
+%%
+%% 'gc' means that that the instruction can potentially do a GC or throw an
+%% exception. That means that an allocation instruction for any building
+%% must be placed after this instruction.
+%%
+%% 'neutral' means that the instruction does nothing to disturb the heap.
+%%
+%% Note: Only handle operations in this function that are not handled
+%% by classify_heap_need/2.
+
+classify_heap_need(bs_add) -> gc;
+classify_heap_need(bs_get) -> gc;
+classify_heap_need(bs_init) -> gc;
+classify_heap_need(bs_init_writable) -> gc;
+classify_heap_need(bs_match_string) -> gc;
+classify_heap_need(bs_put) -> neutral;
+classify_heap_need(bs_restore) -> neutral;
+classify_heap_need(bs_save) -> neutral;
+classify_heap_need(bs_skip) -> gc;
+classify_heap_need(bs_start_match) -> neutral;
+classify_heap_need(bs_test_tail) -> neutral;
+classify_heap_need(bs_utf16_size) -> neutral;
+classify_heap_need(bs_utf8_size) -> neutral;
+classify_heap_need(build_stacktrace) -> gc;
+classify_heap_need(call) -> gc;
+classify_heap_need(catch_end) -> gc;
+classify_heap_need(context_to_binary) -> gc;
+classify_heap_need(copy) -> neutral;
+classify_heap_need(extract) -> gc;
+classify_heap_need(get_hd) -> neutral;
+classify_heap_need(get_map_element) -> neutral;
+classify_heap_need(get_tl) -> neutral;
+classify_heap_need(get_tuple_element) -> neutral;
+classify_heap_need(has_map_field) -> neutral;
+classify_heap_need(is_nonempty_list) -> neutral;
+classify_heap_need(is_tagged_tuple) -> neutral;
+classify_heap_need(kill_try_tag) -> gc;
+classify_heap_need(landingpad) -> gc;
+classify_heap_need(make_fun) -> gc;
+classify_heap_need(new_try_tag) -> gc;
+classify_heap_need(peek_message) -> gc;
+classify_heap_need(put_map) -> gc;
+classify_heap_need(put_tuple_elements) -> neutral;
+classify_heap_need(raw_raise) -> gc;
+classify_heap_need(recv_next) -> gc;
+classify_heap_need(remove_message) -> neutral;
+classify_heap_need(resume) -> gc;
+classify_heap_need(set_tuple_element) -> gc;
+classify_heap_need(succeeded) -> neutral;
+classify_heap_need(timeout) -> gc;
+classify_heap_need(wait) -> gc;
+classify_heap_need(wait_timeout) -> gc.
+
+%%%
+%%% Because beam_ssa_pre_codegen has inserted 'copy' instructions to copy
+%%% variables that must be saved on the stack, a value can for some time
+%%% be in both an X register and a Y register.
+%%%
+%%% Here we will keep track of variables that have the same value and
+%%% rewrite instructions to use the variable that refers to the X
+%%% register instead of the Y register. That could improve performance,
+%%% since the BEAM interpreter have more optimized instructions
+%%% operating on X registers than on Y registers.
+%%%
+%%% 'call' and 'make_fun' are handled somewhat specially. If a value
+%%% already is in the correct X register, the X register will always
+%%% be used instead of the Y register. However, if there are one or more
+%%% values in the wrong X registers, the X registers variables will be
+%%% used only if that does not cause more 'move' instructions to be
+%%% be emitted than if the Y register variables were used.
+%%%
+%%% Here are some examples. The first example shows how a 'move' from
+%%% an Y register is eliminated:
+%%%
+%%% move x0 y1
+%%% move y1 x0 %%Will be eliminated.
+%%%
+%%% call f/1
+%%%
+%%% Here is an example when x0 and x1 must be swapped to load the argument
+%%% registers. Here the 'call' instruction will use the Y registers to
+%%% avoid introducing an extra 'move' insruction:
+%%%
+%%% move x0 y0
+%%% move x1 y1
+%%%
+%%% move y0 x1
+%%% move y1 x0
+%%%
+%%% call f/2
+%%%
+%%% Using the X register to load the argument registers would need
+%%% an extra 'move' instruction like this:
+%%%
+%%% move x0 y0
+%%% move x1 y1
+%%%
+%%% move x1 x2
+%%% move x0 x1
+%%% move x2 x0
+%%%
+%%% call f/2
+%%%
+
+prefer_xregs(Linear, St) ->
+ prefer_xregs(Linear, St, #{0=>#{}}).
+
+prefer_xregs([{L,#cg_blk{is=Is0,last=Last0}=Blk0}|Bs], St, Map0) ->
+ Copies0 = maps:get(L, Map0),
+ {Is,Copies} = prefer_xregs_is(Is0, St, Copies0, []),
+ Last = prefer_xregs_terminator(Last0, Copies, St),
+ Blk = Blk0#cg_blk{is=Is,last=Last},
+ Successors = successors(Last),
+ Map = prefer_xregs_successors(Successors, Copies, Map0),
+ [{L,Blk}|prefer_xregs(Bs, St, Map)];
+prefer_xregs([], _St, _Map) -> [].
+
+prefer_xregs_successors([L|Ls], Copies0, Map0) ->
+ case Map0 of
+ #{L:=Copies1} ->
+ Copies = merge_copies(Copies0, Copies1),
+ Map = Map0#{L:=Copies},
+ prefer_xregs_successors(Ls, Copies0, Map);
+ #{} ->
+ Map = Map0#{L=>Copies0},
+ prefer_xregs_successors(Ls, Copies0, Map)
+ end;
+prefer_xregs_successors([], _, Map) -> Map.
+
+prefer_xregs_is([#cg_alloc{}=I|Is], St, Copies0, Acc) ->
+ Copies = case I of
+ #cg_alloc{stack=none,words=#need{h=0,f=0}} ->
+ Copies0;
+ #cg_alloc{} ->
+ #{}
+ end,
+ prefer_xregs_is(Is, St, Copies, [I|Acc]);
+prefer_xregs_is([#cg_set{op=copy,dst=Dst,args=[Src]}=I|Is], St, Copies0, Acc) ->
+ Copies1 = prefer_xregs_prune(I, Copies0, St),
+ Copies = case beam_args([Src,Dst], St) of
+ [Same,Same] -> Copies1;
+ [_,_] -> Copies1#{Dst=>Src}
+ end,
+ prefer_xregs_is(Is, St, Copies, [I|Acc]);
+prefer_xregs_is([#cg_set{op=call,dst=Dst}=I0|Is], St, Copies, Acc) ->
+ I = prefer_xregs_call(I0, Copies, St),
+ prefer_xregs_is(Is, St, #{Dst=>{x,0}}, [I|Acc]);
+prefer_xregs_is([#cg_set{op=make_fun,dst=Dst}=I0|Is], St, Copies, Acc) ->
+ I = prefer_xregs_call(I0, Copies, St),
+ prefer_xregs_is(Is, St, #{Dst=>{x,0}}, [I|Acc]);
+prefer_xregs_is([#cg_set{op=set_tuple_element}=I|Is], St, Copies, Acc) ->
+ %% FIXME: HiPE translates the following code segment incorrectly:
+ %% {call_ext,3,{extfunc,erlang,setelement,3}}.
+ %% {move,{x,0},{y,3}}.
+ %% {set_tuple_element,{y,1},{y,3},1}.
+ %% Therefore, skip the translation of the arguments for set_tuple_element.
+ prefer_xregs_is(Is, St, Copies, [I|Acc]);
+prefer_xregs_is([#cg_set{args=Args0}=I0|Is], St, Copies0, Acc) ->
+ Args = [do_prefer_xreg(A, Copies0, St) || A <- Args0],
+ I = I0#cg_set{args=Args},
+ Copies = prefer_xregs_prune(I, Copies0, St),
+ prefer_xregs_is(Is, St, Copies, [I|Acc]);
+prefer_xregs_is([], _St, Copies, Acc) ->
+ {reverse(Acc),Copies}.
+
+prefer_xregs_terminator(#cg_br{bool=Arg0}=I, Copies, St) ->
+ Arg = do_prefer_xreg(Arg0, Copies, St),
+ I#cg_br{bool=Arg};
+prefer_xregs_terminator(#cg_ret{arg=Arg0}=I, Copies, St) ->
+ Arg = do_prefer_xreg(Arg0, Copies, St),
+ I#cg_ret{arg=Arg};
+prefer_xregs_terminator(#cg_switch{arg=Arg0}=I, Copies, St) ->
+ Arg = do_prefer_xreg(Arg0, Copies, St),
+ I#cg_switch{arg=Arg}.
+
+prefer_xregs_prune(#cg_set{anno=#{clobbers:=true}}, _, _) ->
+ #{};
+prefer_xregs_prune(#cg_set{dst=Dst}, Copies, St) ->
+ DstReg = beam_arg(Dst, St),
+ F = fun(_, Alias) ->
+ beam_arg(Alias, St) =/= DstReg
+ end,
+ maps:filter(F, Copies).
+
+%% prefer_xregs_call(Instruction, Copies, St) -> Instruction.
+%% Given a 'call' or 'make_fun' instruction, minimize the number
+%% of 'move' instructions to set up the argument registers.
+%% Prefer using X registers over Y registers, unless that will
+%% result in more 'move' instructions.
+
+prefer_xregs_call(#cg_set{args=[_]}=I, _Copies, _St) ->
+ I;
+prefer_xregs_call(#cg_set{args=[F|Args0]}=I, Copies, St) ->
+ case Args0 of
+ [A0] ->
+ %% Only one argument. Always prefer the X register
+ %% if available.
+ A = do_prefer_xreg(A0, Copies, St),
+ I#cg_set{args=[F,A]};
+ [_|_] ->
+ %% Two or more arguments. Try rewriting arguments in
+ %% two ways and see which way produces the least
+ %% number of 'move' instructions.
+ Args1 = prefer_xregs_call_1(Args0, Copies, 0, St),
+ Args2 = [do_prefer_xreg(A, Copies, St) || A <- Args0],
+ case {count_moves(Args1, St),count_moves(Args2, St)} of
+ {N1,N2} when N1 < N2 ->
+ %% There will be fewer 'move' instructions if
+ %% we keep using Y registers.
+ I#cg_set{args=[F|Args1]};
+ {_,_} ->
+ %% Always use the values in X registers.
+ I#cg_set{args=[F|Args2]}
+ end
+ end.
+
+count_moves(Args, St) ->
+ length(setup_args(beam_args(Args, St))).
+
+prefer_xregs_call_1([#b_var{}=A|As], Copies, X, St) ->
+ case {beam_arg(A, St),Copies} of
+ {{y,_},#{A:=Other}} ->
+ case beam_arg(Other, St) of
+ {x,X} ->
+ %% This value is already in the correct X register.
+ %% It is always benefical to use the X register variable.
+ [Other|prefer_xregs_call_1(As, Copies, X+1, St)];
+ _ ->
+ %% This value is another X register. Keep using
+ %% the Y register variable.
+ [A|prefer_xregs_call_1(As, Copies, X+1, St)]
+ end;
+ {_,_} ->
+ %% The value is not available in an X register.
+ [A|prefer_xregs_call_1(As, Copies, X+1, St)]
+ end;
+prefer_xregs_call_1([A|As], Copies, X, St) ->
+ [A|prefer_xregs_call_1(As, Copies, X+1, St)];
+prefer_xregs_call_1([], _, _, _) -> [].
+
+do_prefer_xreg(#b_var{}=A, Copies, St) ->
+ case {beam_arg(A, St),Copies} of
+ {{y,_},#{A:=Copy}} ->
+ Copy;
+ {_,_} ->
+ A
+ end;
+do_prefer_xreg(A, _, _) -> A.
+
+merge_copies(Copies0, Copies1) when map_size(Copies0) =< map_size(Copies1) ->
+ maps:filter(fun(K, V) ->
+ case Copies1 of
+ #{K:=V} -> true;
+ #{} -> false
+ end
+ end, Copies0);
+merge_copies(Copies0, Copies1) ->
+ merge_copies(Copies1, Copies0).
+
+
+%%%
+%%% Add annotations for the number of live registers.
+%%%
+
+liveness(Linear, #cg{regs=Regs}) ->
+ liveness(reverse(Linear), #{}, Regs, []).
+
+liveness([{L,#cg_blk{is=Is0,last=Last0}=Blk0}|Bs], LiveMap0, Regs, Acc) ->
+ Successors = liveness_successors(Last0),
+ Live0 = ordsets:union([liveness_get(S, LiveMap0) || S <- Successors]),
+ Live1 = liveness_terminator(Last0, Live0),
+ {Is,Live} = liveness_is(reverse(Is0), Regs, Live1, []),
+ LiveMap = LiveMap0#{L=>Live},
+ Blk = Blk0#cg_blk{is=Is},
+ liveness(Bs, LiveMap, Regs, [{L,Blk}|Acc]);
+liveness([], _LiveMap, _Regs, Acc) -> Acc.
+
+liveness_get(S, LiveMap) ->
+ case LiveMap of
+ #{S:=Live} -> Live;
+ #{} -> []
+ end.
+
+liveness_successors(Terminator) ->
+ successors(Terminator) -- [?BADARG_BLOCK].
+
+liveness_is([#cg_alloc{}=I0|Is], Regs, Live, Acc) ->
+ I = I0#cg_alloc{live=num_live(Live, Regs)},
+ liveness_is(Is, Regs, Live, [I|Acc]);
+liveness_is([#cg_set{dst=Dst0,args=Args}=I0|Is], Regs, Live0, Acc) ->
+ #b_var{name=Dst} = Dst0,
+ Live1 = liveness_clobber(I0, Live0, Regs),
+ I1 = liveness_yregs_anno(I0, Live1, Regs),
+ Live2 = liveness_args(Args, Live1),
+ Live = ordsets:del_element(Dst, Live2),
+ I = liveness_anno(I1, Live, Regs),
+ liveness_is(Is, Regs, Live, [I|Acc]);
+liveness_is([], _, Live, Acc) ->
+ {Acc,Live}.
+
+liveness_terminator(#cg_br{bool=Arg}, Live) ->
+ liveness_terminator_1(Arg, Live);
+liveness_terminator(#cg_switch{arg=Arg}, Live) ->
+ liveness_terminator_1(Arg, Live);
+liveness_terminator(#cg_ret{arg=Arg}, Live) ->
+ liveness_terminator_1(Arg, Live).
+
+liveness_terminator_1(#b_var{name=V}, Live) ->
+ ordsets:add_element(V, Live);
+liveness_terminator_1(#b_literal{}, Live) ->
+ Live;
+liveness_terminator_1(Reg, Live) ->
+ _ = verify_beam_register(Reg),
+ ordsets:add_element(Reg, Live).
+
+liveness_args([#b_var{name=V}|As], Live) ->
+ liveness_args(As, ordsets:add_element(V, Live));
+liveness_args([#b_remote{mod=Mod,name=Name}|As], Live) ->
+ liveness_args([Mod,Name|As], Live);
+liveness_args([A|As], Live) ->
+ case is_beam_register(A) of
+ true ->
+ liveness_args(As, ordsets:add_element(A, Live));
+ false ->
+ liveness_args(As, Live)
+ end;
+liveness_args([], Live) -> Live.
+
+liveness_anno(#cg_set{op=Op}=I, Live, Regs) ->
+ case need_live_anno(Op) of
+ true ->
+ NumLive = num_live(Live, Regs),
+ Anno = (I#cg_set.anno)#{live=>NumLive},
+ I#cg_set{anno=Anno};
+ false ->
+ I
+ end.
+
+liveness_yregs_anno(#cg_set{op=Op,dst=#b_var{name=Dst}}=I, Live0, Regs) ->
+ case need_live_anno(Op) of
+ true ->
+ Live = ordsets:del_element(Dst, Live0),
+ LiveYregs = [V || V <- Live, is_yreg(V, Regs)],
+ Anno = (I#cg_set.anno)#{live_yregs=>LiveYregs},
+ I#cg_set{anno=Anno};
+ false ->
+ I
+ end.
+
+liveness_clobber(#cg_set{anno=Anno}, Live, Regs) ->
+ case Anno of
+ #{clobbers:=true} ->
+ [R || R <- Live, is_yreg(R, Regs)];
+ _ ->
+ Live
+ end.
+
+is_yreg(R, Regs) ->
+ case Regs of
+ #{R:={y,_}} -> true;
+ #{} -> false
+ end.
+
+num_live(Live, Regs) ->
+ Rs = ordsets:from_list([get_register(V, Regs) || V <- Live]),
+ num_live_1(Rs, 0).
+
+num_live_1([{x,X}|T], X) ->
+ num_live_1(T, X+1);
+num_live_1([{x,_}|_]=T, X) ->
+ %% error({hole,{x,X},expected,Next});
+ num_live_1(T, X+1);
+num_live_1([{y,_}|_], X) ->
+ X;
+num_live_1([{z,_}|_], X) ->
+ X;
+num_live_1([{fr,_}|T], X) ->
+ num_live_1(T, X);
+num_live_1([], X) ->
+ X.
+
+get_live(#cg_set{anno=#{live:=Live}}) ->
+ Live.
+
+%% need_live_anno(Operation) -> true|false.
+%% Return 'true' if the instruction needs a 'live' annotation with
+%% the number live X registers, or 'false' otherwise.
+
+need_live_anno(Op) ->
+ case Op of
+ {bif,_} -> true;
+ bs_get -> true;
+ bs_init -> true;
+ bs_start_match -> true;
+ bs_skip -> true;
+ call -> true;
+ put_map -> true;
+ _ -> false
+ end.
+
+%%%
+%%% Add annotations for defined Y registers.
+%%%
+
+defined(Linear, #cg{regs=Regs}) ->
+ def(Linear, #{}, Regs).
+
+def([{L,#cg_blk{is=Is0,last=Last}=Blk0}|Bs], DefMap0, Regs) ->
+ Def0 = def_get(L, DefMap0),
+ {Is,Def} = def_is(Is0, Regs, Def0, []),
+ Successors = successors(Last),
+ DefMap = def_successors(Successors, Def, DefMap0),
+ Blk = Blk0#cg_blk{is=Is},
+ [{L,Blk}|def(Bs, DefMap, Regs)];
+def([], _, _) -> [].
+
+def_get(L, DefMap) ->
+ case DefMap of
+ #{L:=Def} -> Def;
+ #{} -> []
+ end.
+
+def_is([#cg_alloc{anno=Anno0}=I0|Is], Regs, Def, Acc) ->
+ I = I0#cg_alloc{anno=Anno0#{def_yregs=>Def}},
+ def_is(Is, Regs, Def, [I|Acc]);
+def_is([#cg_set{op=kill_try_tag,args=[#b_var{name=Tag}]}=I|Is], Regs, Def0, Acc) ->
+ Def = ordsets:del_element(Tag, Def0),
+ def_is(Is, Regs, Def, [I|Acc]);
+def_is([#cg_set{op=catch_end,args=[#b_var{name=Tag}|_]}=I|Is], Regs, Def0, Acc) ->
+ Def = ordsets:del_element(Tag, Def0),
+ def_is(Is, Regs, Def, [I|Acc]);
+def_is([#cg_set{anno=Anno0,op=call,dst=#b_var{name=Dst}}=I0|Is],
+ Regs, Def0, Acc) ->
+ #{live_yregs:=LiveYregVars} = Anno0,
+ LiveRegs = gb_sets:from_list([maps:get(V, Regs) || V <- LiveYregVars]),
+ Kill0 = ordsets:subtract(Def0, LiveYregVars),
+
+ %% Kill0 is the set of variables that have just died. However, the registers
+ %% used for killed variables may have been reused, so we must check that the
+ %% registers to be killed are not used by other variables.
+ Kill = [K || K <- Kill0, not gb_sets:is_element(maps:get(K, Regs), LiveRegs)],
+ Anno = Anno0#{def_yregs=>Def0,kill_yregs=>Kill},
+ I = I0#cg_set{anno=Anno},
+ Def1 = ordsets:subtract(Def0, Kill),
+ Def = def_add_yreg(Dst, Def1, Regs),
+ def_is(Is, Regs, Def, [I|Acc]);
+def_is([#cg_set{anno=Anno0,op={bif,Bif},dst=#b_var{name=Dst},args=Args}=I0|Is],
+ Regs, Def0, Acc) ->
+ Arity = length(Args),
+ I = case is_gc_bif(Bif, Args) orelse not erl_bifs:is_safe(erlang, Bif, Arity) of
+ true ->
+ I0#cg_set{anno=Anno0#{def_yregs=>Def0}};
+ false ->
+ I0
+ end,
+ Def = def_add_yreg(Dst, Def0, Regs),
+ def_is(Is, Regs, Def, [I|Acc]);
+def_is([#cg_set{anno=Anno0,dst=#b_var{name=Dst}}=I0|Is], Regs, Def0, Acc) ->
+ I = case need_y_init(I0) of
+ true ->
+ I0#cg_set{anno=Anno0#{def_yregs=>Def0}};
+ false ->
+ I0
+ end,
+ Def = def_add_yreg(Dst, Def0, Regs),
+ def_is(Is, Regs, Def, [I|Acc]);
+def_is([], _, Def, Acc) ->
+ {reverse(Acc),Def}.
+
+def_add_yreg(Dst, Def, Regs) ->
+ case is_yreg(Dst, Regs) of
+ true -> ordsets:add_element(Dst, Def);
+ false -> Def
+ end.
+
+def_successors([S|Ss], Def0, DefMap) ->
+ case DefMap of
+ #{S:=Def1} ->
+ Def = ordsets:intersection(Def0, Def1),
+ def_successors(Ss, Def0, DefMap#{S:=Def});
+ #{} ->
+ def_successors(Ss, Def0, DefMap#{S=>Def0})
+ end;
+def_successors([], _, DefMap) -> DefMap.
+
+%% need_y_init(#cg_set{}) -> true|false.
+%% Return true if this instructions needs initialized Y registers
+%% (because the instruction may do a GC or cause an exception
+%% so that the stack will be scanned), or false otherwise.
+
+need_y_init(#cg_set{anno=#{clobbers:=Clobbers}}) -> Clobbers;
+need_y_init(#cg_set{op=bs_get}) -> true;
+need_y_init(#cg_set{op=bs_init}) -> true;
+need_y_init(#cg_set{op=bs_skip,args=[#b_literal{val=Type}|_]}) ->
+ case Type of
+ utf8 -> true;
+ utf16 -> true;
+ utf32 -> true;
+ _ -> false
+ end;
+need_y_init(#cg_set{op=bs_start_match}) -> true;
+need_y_init(#cg_set{op=put_map}) -> true;
+need_y_init(#cg_set{}) -> false.
+
+%% opt_allocate([{BlockLabel,Block}], #st{}) -> [BeamInstruction].
+%% Update the def_yregs field of each #cg_alloc{} that allocates
+%% a stack frame. #cg_alloc.def_yregs will list all Y registers
+%% that will be initialized by the subsequent code (thus, the
+%% listed Y registers don't require init/1 instructions).
+
+opt_allocate(Linear, #cg{regs=Regs}) ->
+ opt_allocate_1(Linear, Regs).
+
+opt_allocate_1([{L,#cg_blk{is=[#cg_alloc{stack=Stk}=I0|Is]}=Blk0}|Bs]=Bs0, Regs)
+ when is_integer(Stk) ->
+ Yregs = opt_alloc_def(Bs0, gb_sets:singleton(L), []),
+ I = I0#cg_alloc{def_yregs=Yregs},
+ [{L,Blk0#cg_blk{is=[I|Is]}}|opt_allocate_1(Bs, Regs)];
+opt_allocate_1([B|Bs], Regs) ->
+ [B|opt_allocate_1(Bs, Regs)];
+opt_allocate_1([], _) -> [].
+
+opt_alloc_def([{L,#cg_blk{is=Is,last=Last}}|Bs], Ws0, Def0) ->
+ case gb_sets:is_member(L, Ws0) of
+ false ->
+ opt_alloc_def(Bs, Ws0, Def0);
+ true ->
+ case opt_allocate_is(Is) of
+ none ->
+ Succ = successors(Last),
+ Ws = gb_sets:union(Ws0, gb_sets:from_list(Succ)),
+ opt_alloc_def(Bs, Ws, Def0);
+ Def1 when is_list(Def1) ->
+ Def = [Def1|Def0],
+ opt_alloc_def(Bs, Ws0, Def)
+ end
+ end;
+opt_alloc_def([], _, Def) ->
+ ordsets:intersection(Def).
+
+opt_allocate_is([#cg_set{anno=Anno}|Is]) ->
+ case Anno of
+ #{def_yregs:=Yregs} ->
+ Yregs;
+ #{} ->
+ opt_allocate_is(Is)
+ end;
+opt_allocate_is([#cg_alloc{anno=#{def_yregs:=Yregs},stack=none}|_]) ->
+ Yregs;
+opt_allocate_is([#cg_alloc{}|Is]) ->
+ opt_allocate_is(Is);
+opt_allocate_is([]) -> none.
+
+%%%
+%%% Here follows the main code generation functions.
+%%%
+
+%% cg_linear([{BlockLabel,Block}]) -> [BeamInstruction].
+%% Generate BEAM instructions.
+
+cg_linear([{L,#cg_blk{anno=#{recv_set:=L}=Anno0}=B0}|Bs], St0) ->
+ Anno = maps:remove(recv_set, Anno0),
+ B = B0#cg_blk{anno=Anno},
+ {Is,St1} = cg_linear([{L,B}|Bs], St0),
+ {Fail,St} = use_block_label(L, St1),
+ {[{recv_set,Fail}|Is],St};
+cg_linear([{L,#cg_blk{is=Is0,last=Last}}|Bs], St0) ->
+ Next = next_block(Bs),
+ St1 = new_block_label(L, St0),
+ {Is1,St2} = cg_block(Is0, Last, Next, St1),
+ {Is2,St} = cg_linear(Bs, St2),
+ {def_block_label(L, St)++Is1++Is2,St};
+cg_linear([], St) -> {[],St}.
+
+cg_block([#cg_set{op=recv_next}], #cg_br{succ=Lr0}, _Next, St0) ->
+ {Lr,St} = use_block_label(Lr0, St0),
+ {[{loop_rec_end,Lr}],St};
+cg_block([#cg_set{op=wait}], #cg_br{succ=Lr0}, _Next, St0) ->
+ {Lr,St} = use_block_label(Lr0, St0),
+ {[{wait,Lr}],St};
+cg_block(Is0, Last, Next, St0) ->
+ case Last of
+ #cg_br{succ=Next,fail=Next} ->
+ cg_block(Is0, none, St0);
+ #cg_br{succ=Same,fail=Same} ->
+ {Fail,St1} = use_block_label(Same, St0),
+ {Is,St} = cg_block(Is0, none, St1),
+ {Is++[jump(Fail)],St};
+ #cg_br{bool=Bool,succ=Next,fail=Fail0} ->
+ {Fail,St1} = use_block_label(Fail0, St0),
+ {Is,St} = cg_block(Is0, {Bool,Fail}, St1),
+ {Is,St};
+ #cg_br{bool=Bool,succ=Succ0,fail=Fail0} ->
+ {[Succ,Fail],St1} = use_block_labels([Succ0,Fail0], St0),
+ {Is,St} = cg_block(Is0, {Bool,Fail}, St1),
+ {Is++[jump(Succ)],St};
+ #cg_ret{arg=Src0,dealloc=N} ->
+ Src = beam_arg(Src0, St0),
+ cg_block(Is0, {return,Src,N}, St0);
+ #cg_switch{} ->
+ cg_switch(Is0, Last, St0)
+ end.
+
+cg_switch(Is0, Last, St0) ->
+ #cg_switch{arg=Src0,fail=Fail0,list=List0} = Last,
+ Src = beam_arg(Src0, St0),
+ {Fail1,St1} = use_block_label(Fail0, St0),
+ Fail = ensure_label(Fail1, St1),
+ {List1,St2} =
+ flatmapfoldl(fun({V,L}, S0) ->
+ {Lbl,S} = use_block_label(L, S0),
+ {[beam_arg(V, S),Lbl],S}
+ end, St1, List0),
+ {Is1,St} = cg_block(Is0, none, St2),
+ case reverse(Is1) of
+ [{bif,tuple_size,_,[Tuple],{z,_}=Src}|More] ->
+ List = map(fun({integer,Arity}) -> Arity;
+ ({f,_}=F) -> F
+ end, List1),
+ Is = reverse(More, [{select_tuple_arity,Tuple,Fail,{list,List}}]),
+ {Is,St};
+ _ ->
+ SelectVal = {select_val,Src,Fail,{list,List1}},
+ {Is1 ++ [SelectVal],St}
+ end.
+
+jump({f,_}=Fail) ->
+ {jump,Fail};
+jump({catch_tag,Fail}) ->
+ {jump,Fail}.
+
+bif_fail({f,_}=Fail) -> Fail;
+bif_fail({catch_tag,_}) -> {f,0}.
+
+next_block([]) -> none;
+next_block([{Next,_}|_]) -> Next.
+
+ensure_label(Fail0, #cg{ultimate_fail=Lbl}) ->
+ case bif_fail(Fail0) of
+ {f,0} -> {f,Lbl};
+ {f,_}=Fail -> Fail
+ end.
+
+cg_block([#cg_set{anno=#{recv_mark:=L}=Anno0}=I0|T], Context, St0) ->
+ Anno = maps:remove(recv_mark, Anno0),
+ I = I0#cg_set{anno=Anno},
+ {Is,St1} = cg_block([I|T], Context, St0),
+ {Fail,St} = use_block_label(L, St1),
+ {[{recv_mark,Fail}|Is],St};
+cg_block([#cg_set{op=new_try_tag,dst=Tag,args=Args}], {Tag,Fail0}, St) ->
+ {catch_tag,Fail} = Fail0,
+ [Reg,{atom,Kind}] = beam_args([Tag|Args], St),
+ {[{Kind,Reg,Fail}],St};
+cg_block([#cg_set{anno=Anno,op={bif,Name},dst=Dst0,args=Args0}=I,
+ #cg_set{op=succeeded,dst=Bool}], {Bool,Fail0}, St) ->
+ [Dst|Args] = beam_args([Dst0|Args0], St),
+ Line0 = call_line(body, {extfunc,erlang,Name,length(Args)}, Anno),
+ Fail = bif_fail(Fail0),
+ Line = case Fail of
+ {f,0} -> Line0;
+ {f,_} -> []
+ end,
+ case is_gc_bif(Name, Args) of
+ true ->
+ Live = get_live(I),
+ Kill = kill_yregs(Anno, St),
+ {Kill++Line++[{gc_bif,Name,Fail,Live,Args,Dst}],St};
+ false ->
+ {Line++[{bif,Name,Fail,Args,Dst}],St}
+ end;
+cg_block([#cg_set{op={bif,tuple_size},dst=Arity0,args=[Tuple0]},
+ #cg_set{op={bif,'=:='},dst=Bool,args=[Arity0,#b_literal{val=Ar}]}=Eq],
+ {Bool,Fail}=Context, St0) ->
+ Tuple = beam_arg(Tuple0, St0),
+ case beam_arg(Arity0, St0) of
+ {z,_} ->
+ %% The size will only be used once. Combine to a test_arity instruction.
+ Test = {test,test_arity,ensure_label(Fail, St0),[Tuple,Ar]},
+ {[Test],St0};
+ Arity ->
+ %% The size will be used more than once. Must do an explicit
+ %% BIF call followed by the '==' test.
+ TupleSize = {bif,tuple_size,{f,0},[Tuple],Arity},
+ {Is,St} = cg_block([Eq], Context, St0),
+ {[TupleSize|Is],St}
+ end;
+cg_block([#cg_set{op={bif,Name},dst=Dst0,args=Args0}]=Is0, {Dst0,Fail}, St0) ->
+ [Dst|Args] = beam_args([Dst0|Args0], St0),
+ case Dst of
+ {z,_} ->
+ %% The result of the BIF call will only be used once. Convert to
+ %% a test instruction.
+ Test = bif_to_test(Name, Args, ensure_label(Fail, St0)),
+ {Test,St0};
+ _ ->
+ %% Must explicitly call the BIF since the result will be used
+ %% more than once.
+ {Is1,St1} = cg_block(Is0, none, St0),
+ {Is2,St} = cg_block([], {Dst0,Fail}, St1),
+ {Is1++Is2,St}
+ end;
+cg_block([#cg_set{anno=Anno,op={bif,Name},dst=Dst0,args=Args0}=I|T],
+ Context, St0) ->
+ [Dst|Args] = beam_args([Dst0|Args0], St0),
+ {Is0,St} = cg_block(T, Context, St0),
+ case is_gc_bif(Name, Args) of
+ true ->
+ Line = call_line(body, {extfunc,erlang,Name,length(Args)}, Anno),
+ Live = get_live(I),
+ Kill = kill_yregs(Anno, St),
+ Is = Kill++Line++[{gc_bif,Name,{f,0},Live,Args,Dst}|Is0],
+ {Is,St};
+ false ->
+ Is = [{bif,Name,{f,0},Args,Dst}|Is0],
+ {Is,St}
+ end;
+cg_block([#cg_set{op=bs_init,dst=Dst0,args=Args0,anno=Anno}=I,
+ #cg_set{op=succeeded,dst=Bool}], {Bool,Fail0}, St) ->
+ Fail = bif_fail(Fail0),
+ Line = line(Anno),
+ [#b_literal{val=Kind}|Args1] = Args0,
+ case Kind of
+ new ->
+ [Dst,Size,{integer,Unit}] = beam_args([Dst0|Args1], St),
+ Live = get_live(I),
+ {[Line|cg_bs_init(Dst, Size, Unit, Live, Fail)],St};
+ private_append ->
+ [Dst,Src,Bits,{integer,Unit}] = beam_args([Dst0|Args1], St),
+ Flags = {field_flags,[]},
+ Is = [Line,{bs_private_append,Fail,Bits,Unit,Src,Flags,Dst}],
+ {Is,St};
+ append ->
+ [Dst,Src,Bits,{integer,Unit}] = beam_args([Dst0|Args1], St),
+ Flags = {field_flags,[]},
+ Live = get_live(I),
+ Is = [Line,{bs_append,Fail,Bits,0,Live,Unit,Src,Flags,Dst}],
+ {Is,St}
+ end;
+cg_block([#cg_set{anno=Anno,op=bs_start_match,dst=Ctx0,args=[Bin0]}=I,
+ #cg_set{op=succeeded,dst=Bool}], {Bool,Fail}, St) ->
+ #{num_slots:=Slots} = Anno,
+ [Dst,Bin1] = beam_args([Ctx0,Bin0], St),
+ {Bin,Pre} = force_reg(Bin1, Dst),
+ Live = get_live(I),
+ Is = Pre ++ [{test,bs_start_match2,Fail,Live,[Bin,Slots],Dst}],
+ {Is,St};
+cg_block([#cg_set{op=bs_get}=Set,
+ #cg_set{op=succeeded,dst=Bool}], {Bool,Fail}, St) ->
+ {cg_bs_get(Fail, Set, St),St};
+cg_block([#cg_set{op=bs_match_string,args=[CtxVar,#b_literal{val=String}]},
+ #cg_set{op=succeeded,dst=Bool}], {Bool,Fail}, St) ->
+ CtxReg = beam_arg(CtxVar, St),
+ Is = [{test,bs_match_string,Fail,[CtxReg,String]}],
+ {Is,St};
+cg_block([#cg_set{dst=Dst0,op=landingpad,args=Args0}|T], Context, St0) ->
+ [Dst,{atom,Kind},Tag] = beam_args([Dst0|Args0], St0),
+ case Kind of
+ 'catch' ->
+ cg_catch(Dst, T, Context, St0);
+ 'try' ->
+ cg_try(Dst, Tag, T, Context, St0)
+ end;
+cg_block([#cg_set{op=kill_try_tag,args=Args0}|Is], Context, St0) ->
+ [Reg] = beam_args(Args0, St0),
+ {Is0,St} = cg_block(Is, Context, St0),
+ {[{try_end,Reg}|Is0],St};
+cg_block([#cg_set{op=catch_end,dst=Dst0,args=Args0}|Is], Context, St0) ->
+ [Dst,Reg,{x,0}] = beam_args([Dst0|Args0], St0),
+ {Is0,St} = cg_block(Is, Context, St0),
+ {[{catch_end,Reg}|copy({x,0}, Dst)++Is0],St};
+cg_block([#cg_set{op=call}=I,
+ #cg_set{op=succeeded,dst=Bool}], {Bool,_Fail}, St) ->
+ %% A call in try/catch block.
+ cg_block([I], none, St);
+cg_block([#cg_set{op=Op,dst=Dst0,args=Args0}=I,
+ #cg_set{op=succeeded,dst=Bool}], {Bool,Fail}, St) ->
+ [Dst|Args] = beam_args([Dst0|Args0], St),
+ {cg_test(Op, bif_fail(Fail), Args, Dst, I),St};
+cg_block([#cg_set{op=bs_put,dst=Bool,args=Args0}], {Bool,Fail}, St) ->
+ Args = beam_args(Args0, St),
+ {cg_bs_put(bif_fail(Fail), Args),St};
+cg_block([#cg_set{op=bs_test_tail,dst=Bool,args=Args0}], {Bool,Fail}, St) ->
+ [Ctx,{integer,Bits}] = beam_args(Args0, St),
+ {[{test,bs_test_tail2,bif_fail(Fail),[Ctx,Bits]}],St};
+cg_block([#cg_set{op={float,checkerror},dst=Bool}], {Bool,Fail}, St) ->
+ {[{fcheckerror,bif_fail(Fail)}],St};
+cg_block([#cg_set{op=is_tagged_tuple,dst=Bool,args=Args0}], {Bool,Fail}, St) ->
+ [Src,{integer,Arity},Tag] = beam_args(Args0, St),
+ {[{test,is_tagged_tuple,ensure_label(Fail, St),[Src,Arity,Tag]}],St};
+cg_block([#cg_set{op=is_nonempty_list,dst=Bool,args=Args0}], {Bool,Fail}, St) ->
+ Args = beam_args(Args0, St),
+ {[{test,is_nonempty_list,ensure_label(Fail, St),Args}],St};
+cg_block([#cg_set{op=has_map_field,dst=Bool,args=Args0}], {Bool,Fail}, St) ->
+ [Src,Key] = beam_args(Args0, St),
+ {[{test,has_map_fields,Fail,Src,{list,[Key]}}],St};
+cg_block([#cg_set{op=call}=Call], {_Bool,_Fail}=Context, St0) ->
+ {Is0,St1} = cg_call(Call, body, none, St0),
+ {Is1,St} = cg_block([], Context, St1),
+ {Is0++Is1,St};
+cg_block([#cg_set{op=call,dst=Dst0}=Call], Context, St) ->
+ Dst = beam_arg(Dst0, St),
+ case Context of
+ {return,Dst,_} ->
+ cg_call(Call, tail, Context, St);
+ _ ->
+ cg_call(Call, body, Context, St)
+ end;
+cg_block([#cg_set{op=call}=Call|T], Context, St0) ->
+ {Is0,St1} = cg_call(Call, body, none, St0),
+ {Is1,St} = cg_block(T, Context, St1),
+ {Is0++Is1,St};
+cg_block([#cg_set{op=make_fun,dst=Dst0,args=[Local|Args0]}|T],
+ Context, St0) ->
+ #b_local{name=#b_literal{val=Func},arity=Arity} = Local,
+ [Dst|Args] = beam_args([Dst0|Args0], St0),
+ {FuncLbl,St1} = local_func_label(Func, Arity, St0),
+ Is0 = setup_args(Args) ++
+ [{make_fun2,{f,FuncLbl},0,0,length(Args)}|copy({x,0}, Dst)],
+ {Is1,St} = cg_block(T, Context, St1),
+ {Is0++Is1,St};
+cg_block([#cg_set{op=copy}|_]=T0, Context, St0) ->
+ {Is0,T} = cg_copy(T0, St0),
+ {Is1,St} = cg_block(T, Context, St0),
+ Is = Is0 ++ Is1,
+ case is_call(T) of
+ {yes,Arity} ->
+ {opt_call_moves(Is, Arity),St};
+ no ->
+ {Is,St}
+ end;
+cg_block([#cg_set{op=Op,dst=Dst0,args=Args0}=Set], none, St) ->
+ [Dst|Args] = beam_args([Dst0|Args0], St),
+ Is = cg_instr(Op, Args, Dst, Set),
+ {Is,St};
+cg_block([#cg_set{op=Op,dst=Dst0,args=Args0}=Set|T], Context, St0) ->
+ [Dst|Args] = beam_args([Dst0|Args0], St0),
+ Is0 = cg_instr(Op, Args, Dst, Set),
+ {Is1,St} = cg_block(T, Context, St0),
+ {Is0++Is1,St};
+cg_block([#cg_alloc{}=Alloc|T], Context, St0) ->
+ Is0 = cg_alloc(Alloc, St0),
+ {Is1,St} = cg_block(T, Context, St0),
+ {Is0++Is1,St};
+cg_block([], {return,Arg,none}, St) ->
+ Is = copy(Arg, {x,0}) ++ [return],
+ {Is,St};
+cg_block([], {return,Arg,N}, St) ->
+ Is = copy(Arg, {x,0}) ++ [{deallocate,N},return],
+ {Is,St};
+cg_block([], none, St) ->
+ {[],St};
+cg_block([], {Bool0,Fail}, St) ->
+ [Bool] = beam_args([Bool0], St),
+ {[{test,is_eq_exact,Fail,[Bool,{atom,true}]}],St}.
+
+cg_copy(T0, St) ->
+ {Copies,T} = splitwith(fun(#cg_set{op=copy}) -> true;
+ (_) -> false
+ end, T0),
+ Moves0 = cg_copy_1(Copies, St),
+ Moves1 = [Move || {move,Src,Dst}=Move <- Moves0, Src =/= Dst],
+ Scratch = {x,1022},
+ Moves = order_moves(Moves1, Scratch),
+ {Moves,T}.
+
+cg_copy_1([#cg_set{dst=Dst0,args=Args}|T], St) ->
+ [Dst,Src] = beam_args([Dst0|Args], St),
+ Copies = cg_copy_1(T, St),
+ case keymember(Dst, 3, Copies) of
+ true ->
+ %% Will be overwritten. Don't generate a move instruction.
+ Copies;
+ false ->
+ [{move,Src,Dst}|Copies]
+ end;
+cg_copy_1([], _St) -> [].
+
+bif_to_test('not', [Var], Fail) ->
+ [{test,is_eq_exact,Fail,[Var,{atom,false}]}];
+bif_to_test(Name, Args, Fail) ->
+ [beam_utils:bif_to_test(Name, Args, Fail)].
+
+opt_call_moves(Is0, Arity) ->
+ {Moves0,Is} = splitwith(fun({move,_,_}) -> true;
+ (_) -> false
+ end, Is0),
+ Moves = opt_call_moves_1(Moves0, Arity),
+ Moves ++ Is.
+
+opt_call_moves_1([{move,Src,{x,_}=Tmp}=M1,{move,Tmp,Dst}=M2|Is], Arity) ->
+ case is_killed(Tmp, Is, Arity) of
+ true ->
+ %% The X register Tmp is never used again. We can collapse
+ %% the two move instruction into one.
+ [{move,Src,Dst}|opt_call_moves_1(Is, Arity)];
+ false ->
+ [M1|opt_call_moves_1([M2|Is], Arity)]
+ end;
+opt_call_moves_1([M|Ms], Arity) ->
+ [M|opt_call_moves_1(Ms, Arity)];
+opt_call_moves_1([], _Arity) -> [].
+
+is_killed(R, [{move,R,_}|_], _) ->
+ false;
+is_killed(R, [{move,_,R}|_], _) ->
+ true;
+is_killed(R, [{move,_,_}|Is], Arity) ->
+ is_killed(R, Is, Arity);
+is_killed({x,X}, [], Arity) ->
+ X >= Arity.
+
+cg_alloc(#cg_alloc{stack=none,words=#need{h=0,f=0}}, _St) ->
+ [];
+cg_alloc(#cg_alloc{stack=none,words=Need,live=Live}, _St) ->
+ [{test_heap,alloc(Need),Live}];
+cg_alloc(#cg_alloc{stack=Stk,words=Need,live=Live,def_yregs=DefYregs},
+ #cg{regs=Regs}) when is_integer(Stk) ->
+ Alloc = alloc(Need),
+ All = [{y,Y} || Y <- lists:seq(0, Stk-1)],
+ Def = ordsets:from_list([maps:get(V, Regs) || V <- DefYregs]),
+ NeedInit = ordsets:subtract(All, Def),
+ NoZero = length(Def)*2 > Stk,
+ I = case {NoZero,Alloc} of
+ {true,0} -> {allocate,Stk,Live};
+ {true,_} -> {allocate_heap,Stk,Alloc,Live};
+ {false,0} -> {allocate_zero,Stk,Live};
+ {false,_} -> {allocate_heap_zero,Stk,Alloc,Live}
+ end,
+ [I|case NoZero of
+ true -> [{init,Y} || Y <- NeedInit];
+ false -> []
+ end].
+
+alloc(#need{h=Words,f=0}) ->
+ Words;
+alloc(#need{h=Words,f=Floats}) ->
+ {alloc,[{words,Words},{floats,Floats}]}.
+
+is_call([#cg_set{op=call,args=[#b_var{}|Args]}|_]) ->
+ {yes,1+length(Args)};
+is_call([#cg_set{op=call,args=[_|Args]}|_]) ->
+ {yes,length(Args)};
+is_call([#cg_set{op=make_fun,args=[_|Args]}|_]) ->
+ {yes,length(Args)};
+is_call(_) ->
+ no.
+
+cg_call(#cg_set{anno=Anno,op=call,dst=Dst0,args=[#b_local{}=Func0|Args0]},
+ Where, Context, St0) ->
+ [Dst|Args] = beam_args([Dst0|Args0], St0),
+ #b_local{name=Name0,arity=Arity} = Func0,
+ {atom,Name} = beam_arg(Name0, St0),
+ {FuncLbl,St} = local_func_label(Name, Arity, St0),
+ Line = call_line(Where, local, Anno),
+ Call = build_call(call, Arity, {f,FuncLbl}, Context, Dst),
+ Is = setup_args(Args, Anno, Context, St) ++ Line ++ Call,
+ {Is,St};
+cg_call(#cg_set{anno=Anno,op=call,dst=Dst0,args=[#b_remote{}=Func0|Args0]},
+ Where, Context, St) ->
+ [Dst|Args] = beam_args([Dst0|Args0], St),
+ #b_remote{mod=Mod0,name=Name0,arity=Arity} = Func0,
+ case {beam_arg(Mod0, St),beam_arg(Name0, St)} of
+ {{atom,Mod},{atom,Name}} ->
+ Func = {extfunc,Mod,Name,Arity},
+ Line = call_line(Where, Func, Anno),
+ Call = build_call(call_ext, Arity, Func, Context, Dst),
+ Is = setup_args(Args, Anno, Context, St) ++ Line ++ Call,
+ {Is,St};
+ {Mod,Name} ->
+ Apply = build_apply(Arity, Context, Dst),
+ Is = setup_args(Args++[Mod,Name], Anno, Context, St) ++
+ [line(Anno)] ++ Apply,
+ {Is,St}
+ end;
+cg_call(#cg_set{anno=Anno,op=call,dst=Dst0,args=Args0},
+ Where, Context, St) ->
+ [Dst,Func|Args] = beam_args([Dst0|Args0], St),
+ Line = call_line(Where, Func, Anno),
+ Arity = length(Args),
+ Call = build_call(call_fun, Arity, Func, Context, Dst),
+ Is = setup_args(Args++[Func], Anno, Context, St) ++ Line ++ Call,
+ {Is,St}.
+
+build_call(call_fun, Arity, _Func, none, Dst) ->
+ [{call_fun,Arity}|copy({x,0}, Dst)];
+build_call(call_fun, Arity, _Func, {return,Dst,N}, Dst) when is_integer(N) ->
+ [{call_fun,Arity},{deallocate,N},return];
+build_call(call_fun, Arity, _Func, {return,Val,N}, _Dst) when is_integer(N) ->
+ [{call_fun,Arity},{move,Val,{x,0}},{deallocate,N},return];
+build_call(call_ext, 2, {extfunc,erlang,'!',2}, none, Dst) ->
+ [send|copy({x,0}, Dst)];
+build_call(call_ext, 2, {extfunc,erlang,'!',2}, {return,Dst,N}, Dst)
+ when is_integer(N) ->
+ [send,{deallocate,N},return];
+build_call(Prefix, Arity, Func, {return,Dst,none}, Dst) ->
+ I = case Prefix of
+ call -> call_only;
+ call_ext -> call_ext_only
+ end,
+ [{I,Arity,Func}];
+build_call(call_ext, Arity, {extfunc,Mod,Name,Arity}=Func, {return,_,none}, _Dst) ->
+ true = erl_bifs:is_exit_bif(Mod, Name, Arity), %Assertion.
+ [{call_ext_only,Arity,Func}];
+build_call(Prefix, Arity, Func, {return,Dst,N}, Dst) when is_integer(N) ->
+ I = case Prefix of
+ call -> call_last;
+ call_ext -> call_ext_last
+ end,
+ [{I,Arity,Func,N}];
+build_call(I, Arity, Func, {return,Val,N}, _Dst) when is_integer(N) ->
+ [{I,Arity,Func}|copy(Val, {x,0})++[{deallocate,N},return]];
+build_call(I, Arity, Func, none, Dst) ->
+ [{I,Arity,Func}|copy({x,0}, Dst)].
+
+build_apply(Arity, {return,Dst,N}, Dst) when is_integer(N) ->
+ [{apply_last,Arity,N}];
+build_apply(Arity, {return,Val,N}, _Dst) when is_integer(N) ->
+ [{apply,Arity}|copy(Val, {x,0})++[{deallocate,N},return]];
+build_apply(Arity, none, Dst) ->
+ [{apply,Arity}|copy({x,0}, Dst)].
+
+cg_instr(put_map, [{atom,assoc},SrcMap|Ss], Dst, Set) ->
+ Live = get_live(Set),
+ [{put_map_assoc,{f,0},SrcMap,Dst,Live,{list,Ss}}];
+cg_instr(Op, Args, Dst, _Set) ->
+ cg_instr(Op, Args, Dst).
+
+cg_instr(bs_init_writable, Args, Dst) ->
+ setup_args(Args) ++ [bs_init_writable|copy({x,0}, Dst)];
+cg_instr(bs_restore, [Ctx,Slot], _Dst) ->
+ case Slot of
+ {integer,N} ->
+ [{bs_restore2,Ctx,N}];
+ {atom,start} ->
+ [{bs_restore2,Ctx,Slot}]
+ end;
+cg_instr(bs_save, [Ctx,Slot], _Dst) ->
+ {integer,N} = Slot,
+ [{bs_save2,Ctx,N}];
+cg_instr(build_stacktrace, Args, Dst) ->
+ setup_args(Args) ++ [build_stacktrace|copy({x,0}, Dst)];
+cg_instr(context_to_binary, [Src], _Dst) ->
+ [{bs_context_to_binary,Src}];
+cg_instr(set_tuple_element=Op, [New,Tuple,{integer,Index}], _Dst) ->
+ [{Op,New,Tuple,Index}];
+cg_instr({float,clearerror}, [], _Dst) ->
+ [fclearerror];
+cg_instr({float,get}, [Src], Dst) ->
+ [{fmove,Src,Dst}];
+cg_instr({float,put}, [Src], Dst) ->
+ [{fmove,Src,Dst}];
+cg_instr(get_hd=Op, [Src], Dst) ->
+ [{Op,Src,Dst}];
+cg_instr(get_tl=Op, [Src], Dst) ->
+ [{Op,Src,Dst}];
+cg_instr(get_tuple_element=Op, [Src,{integer,N}], Dst) ->
+ [{Op,Src,N,Dst}];
+cg_instr(put_list=Op, [Hd,Tl], Dst) ->
+ [{Op,Hd,Tl,Dst}];
+cg_instr(put_tuple_arity, [{integer,Arity}], Dst) ->
+ [{put_tuple,Arity,Dst}];
+cg_instr(put_tuple_elements, Elements, _Dst) ->
+ [{put,E} || E <- Elements];
+cg_instr(raw_raise, Args, Dst) ->
+ setup_args(Args) ++ [raw_raise|copy({x,0}, Dst)];
+cg_instr(remove_message, [], _Dst) ->
+ [remove_message];
+cg_instr(resume, [A,B], _Dst) ->
+ [{bif,raise,{f,0},[A,B],{x,0}}];
+cg_instr(timeout, [], _Dst) ->
+ [timeout].
+
+cg_test(bs_add=Op, Fail, [Src1,Src2,{integer,Unit}], Dst, _I) ->
+ [{Op,Fail,[Src1,Src2,Unit],Dst}];
+cg_test(bs_skip, Fail, Args, _Dst, I) ->
+ cg_bs_skip(Fail, Args, I);
+cg_test(bs_utf8_size=Op, Fail, [Src], Dst, _I) ->
+ [{Op,Fail,Src,Dst}];
+cg_test(bs_utf16_size=Op, Fail, [Src], Dst, _I) ->
+ [{Op,Fail,Src,Dst}];
+cg_test({float,convert}, Fail, [Src], Dst, _I) ->
+ {f,0} = Fail, %Assertion.
+ [{fconv,Src,Dst}];
+cg_test({float,Op0}, Fail, Args, Dst, #cg_set{anno=Anno}) ->
+ Op = case Op0 of
+ '+' -> fadd;
+ '-' when length(Args) =:= 2 -> fsub;
+ '-' -> fnegate;
+ '*' -> fmul;
+ '/' -> fdiv
+ end,
+ [line(Anno),{bif,Op,Fail,Args,Dst}];
+cg_test(get_map_element, Fail, [Map,Key], Dst, _I) ->
+ [{get_map_elements,Fail,Map,{list,[Key,Dst]}}];
+cg_test(peek_message, Fail, [], Dst, _I) ->
+ [{loop_rec,Fail,{x,0}}|copy({x,0}, Dst)];
+cg_test(put_map, Fail, [{atom,exact},SrcMap|Ss], Dst, Set) ->
+ Live = get_live(Set),
+ [{put_map_exact,Fail,SrcMap,Dst,Live,{list,Ss}}];
+cg_test(wait_timeout, Fail, [Timeout], _Dst, _) ->
+ case Timeout of
+ {atom,infinity} ->
+ [{wait,Fail}];
+ _ ->
+ [{wait_timeout,Fail,Timeout}]
+ end.
+
+cg_bs_get(Fail, #cg_set{dst=Dst0,args=[#b_literal{val=Type}|Ss0]}=Set, St) ->
+ Op = case Type of
+ integer -> bs_get_integer2;
+ float -> bs_get_float2;
+ binary -> bs_get_binary2;
+ utf8 -> bs_get_utf8;
+ utf16 -> bs_get_utf16;
+ utf32 -> bs_get_utf32
+ end,
+ [Dst|Ss1] = beam_args([Dst0|Ss0], St),
+ Ss = case Ss1 of
+ [Ctx,{literal,Flags},Size,{integer,Unit}] ->
+ %% Plain integer/float/binary.
+ [Ctx,Size,Unit,field_flags(Flags, Set)];
+ [Ctx,{literal,Flags}] ->
+ %% Utf8/16/32.
+ [Ctx,field_flags(Flags, Set)]
+ end,
+ Live = get_live(Set),
+ [{test,Op,Fail,Live,Ss,Dst}].
+
+cg_bs_skip(Fail, [{atom,Type}|Ss0], Set) ->
+ Op = case Type of
+ utf8 -> bs_skip_utf8;
+ utf16 -> bs_skip_utf16;
+ utf32 -> bs_skip_utf32;
+ _ -> bs_skip_bits2
+ end,
+ Live = get_live(Set),
+ Ss = case Ss0 of
+ [Ctx,{literal,Flags},Size,{integer,Unit}] ->
+ %% Plain integer/float/binary.
+ [Ctx,Size,Unit,field_flags(Flags, Set)];
+ [Ctx,{literal,Flags}] ->
+ %% Utf8/16/32.
+ [Ctx,Live,field_flags(Flags, Set)]
+ end,
+ case {Type,Ss} of
+ {binary,[_,{atom,all},1,_]} ->
+ [];
+ {binary,[R,{atom,all},U,_]} ->
+ [{test,bs_test_unit,Fail,[R,U]}];
+ {_,_} ->
+ [{test,Op,Fail,Ss}]
+ end.
+
+field_flags(Flags, #cg_set{anno=#{location:={File,Line}}}) ->
+ {field_flags,[{anno,[Line,{file,File}]}|Flags]};
+field_flags(Flags, _) ->
+ {field_flags,Flags}.
+
+cg_bs_put(Fail, [{atom,Type},{literal,Flags}|Args]) ->
+ Op = case Type of
+ integer -> bs_put_integer;
+ float -> bs_put_float;
+ binary -> bs_put_binary;
+ utf8 -> bs_put_utf8;
+ utf16 -> bs_put_utf16;
+ utf32 -> bs_put_utf32
+ end,
+ case Args of
+ [Src,Size,{integer,Unit}] ->
+ [{Op,Fail,Size,Unit,{field_flags,Flags},Src}];
+ [Src] ->
+ [{Op,Fail,{field_flags,Flags},Src}]
+ end.
+
+cg_bs_init(Dst, Size0, Unit, Live, Fail) ->
+ Op = case Unit of
+ 1 -> bs_init_bits;
+ 8 -> bs_init2
+ end,
+ Size = cg_bs_init_size(Size0),
+ [{Op,Fail,Size,0,Live,{field_flags,[]},Dst}].
+
+cg_bs_init_size({x,_}=R) -> R;
+cg_bs_init_size({y,_}=R) -> R;
+cg_bs_init_size({integer,Int}) -> Int.
+
+cg_catch(Agg, T0, Context, St0) ->
+ {Moves,T1} = cg_extract(T0, Agg, St0),
+ {T,St} = cg_block(T1, Context, St0),
+ {Moves++T,St}.
+
+cg_try(Agg, Tag, T0, Context, St0) ->
+ {Moves0,T1} = cg_extract(T0, Agg, St0),
+ Moves = order_moves(Moves0, {x,3}),
+ [#cg_set{op=kill_try_tag}|T2] = T1,
+ {T,St} = cg_block(T2, Context, St0),
+ {[{try_case,Tag}|Moves++T],St}.
+
+cg_extract([#cg_set{op=extract,dst=Dst0,args=Args0}|Is0], Agg, St) ->
+ [Dst,Agg,{integer,X}] = beam_args([Dst0|Args0], St),
+ {Ds,Is} = cg_extract(Is0, Agg, St),
+ case keymember(Dst, 3, Ds) of
+ true ->
+ %% This destination will be overwritten.
+ {Ds,Is};
+ false ->
+ {copy({x,X}, Dst)++Ds,Is}
+ end;
+cg_extract(Is, _, _) ->
+ {[],Is}.
+
+copy(Src, Src) -> [];
+copy(Src, Dst) -> [{move,Src,Dst}].
+
+force_reg({literal,_}=Lit, Reg) ->
+ {Reg,[{move,Lit,Reg}]};
+force_reg({Kind,_}=R, _) when Kind =:= x; Kind =:= y ->
+ {R,[]}.
+
+%% successors(Terminator) -> [Successor].
+%% Return an ordset of all successors for the given terminator.
+
+successors(#cg_br{succ=Succ,fail=Fail}) ->
+ ordsets:from_list([Succ,Fail]);
+successors(#cg_switch{fail=Fail,list=List}) ->
+ ordsets:from_list([Fail|[Lbl || {_,Lbl} <- List]]);
+successors(#cg_ret{}) -> [].
+
+%% linearize(Blocks) -> [{BlockLabel,#cg_blk{}}].
+%% Linearize the intermediate representation of the code. Also
+%% translate blocks from the SSA records to internal record types
+%% used only in this module.
+
+linearize(Blocks) ->
+ Linear = beam_ssa:linearize(Blocks),
+ linearize_1(Linear, Blocks).
+
+linearize_1([{?BADARG_BLOCK,_}|Ls], Blocks) ->
+ linearize_1(Ls, Blocks);
+linearize_1([{L,Block0}|Ls], Blocks) ->
+ Block = translate_block(L, Block0, Blocks),
+ [{L,Block}|linearize_1(Ls, Blocks)];
+linearize_1([], _Blocks) -> [].
+
+%% translate_block(BlockLabel, #b_blk{}, Blocks) -> #cg_blk{}.
+%% Translate a block to the internal records used in this module.
+%% Also eliminate phi nodes, replacing them with 'copy' instructions
+%% in the predecessor blocks.
+
+translate_block(L, #b_blk{anno=Anno,is=Is0,last=Last0}, Blocks) ->
+ Last = translate_terminator(Last0),
+ PhiCopies = translate_phis(L, Last, Blocks),
+ Is1 = translate_is(Is0, PhiCopies),
+ Is = case Anno of
+ #{frame_size:=Size} ->
+ Alloc = #cg_alloc{stack=Size},
+ [Alloc|Is1];
+ #{} -> Is1
+ end,
+ #cg_blk{anno=Anno,is=Is,last=Last}.
+
+translate_is([#b_set{op=phi}|Is], Tail) ->
+ translate_is(Is, Tail);
+translate_is([#b_set{anno=Anno0,op=Op,dst=Dst,args=Args}=I|Is], Tail) ->
+ Anno = case beam_ssa:clobbers_xregs(I) of
+ true -> Anno0#{clobbers=>true};
+ false -> Anno0
+ end,
+ [#cg_set{anno=Anno,op=Op,dst=Dst,args=Args}|translate_is(Is, Tail)];
+translate_is([], Tail) -> Tail.
+
+translate_terminator(#b_ret{anno=Anno,arg=Arg}) ->
+ Dealloc = case Anno of
+ #{deallocate:=N} -> N;
+ #{} -> none
+ end,
+ #cg_ret{arg=Arg,dealloc=Dealloc};
+translate_terminator(#b_br{bool=#b_literal{val=true},succ=Succ}) ->
+ #cg_br{bool=#b_literal{val=true},succ=Succ,fail=Succ};
+translate_terminator(#b_br{bool=#b_literal{val=false},fail=Fail}) ->
+ #cg_br{bool=#b_literal{val=true},succ=Fail,fail=Fail};
+translate_terminator(#b_br{bool=Bool,succ=Succ,fail=Fail}) ->
+ #cg_br{bool=Bool,succ=Succ,fail=Fail};
+translate_terminator(#b_switch{arg=Bool,fail=Fail,list=List}) ->
+ #cg_switch{arg=Bool,fail=Fail,list=List}.
+
+translate_phis(L, #cg_br{succ=Target,fail=Target}, Blocks) ->
+ #b_blk{is=Is} = maps:get(Target, Blocks),
+ Phis = takewhile(fun(#b_set{op=phi}) -> true;
+ (#b_set{}) -> false
+ end, Is),
+ phi_copies(Phis, L);
+translate_phis(_, _, _) -> [].
+
+phi_copies([#b_set{dst=Dst,args=PhiArgs}|Sets], L) ->
+ CopyArgs = [V || {V,Target} <- PhiArgs, Target =:= L],
+ [#cg_set{op=copy,dst=Dst,args=CopyArgs}|phi_copies(Sets, L)];
+phi_copies([], _) -> [].
+
+%% setup_args(Args, Anno, Context) -> [Instruction].
+%% setup_args(Args) -> [Instruction].
+%% Set up X registers for a call.
+
+setup_args(Args, Anno, none, St) ->
+ setup_args(Args) ++ kill_yregs(Anno, St);
+setup_args(Args, _, _, _) ->
+ setup_args(Args).
+
+setup_args([]) ->
+ [];
+setup_args([_|_]=Args) ->
+ Moves = gen_moves(Args, 0, []),
+ Scratch = {x,1+last(sort([length(Args)-1|[X || {x,X} <- Args]]))},
+ order_moves(Moves, Scratch).
+
+%% kill_yregs(Anno, #cg{}) -> [{kill,{y,Y}}].
+%% Kill Y registers that will not be used again.
+
+kill_yregs(#{kill_yregs:=Kill}, #cg{regs=Regs}) ->
+ ordsets:from_list([{kill,maps:get(V, Regs)} || V <- Kill]);
+kill_yregs(#{}, #cg{}) -> [].
+
+%% gen_moves(As, I, Acc)
+%% Generate the basic move instruction to move the arguments
+%% to their proper registers. The list will be sorted on
+%% destinations. (I.e. the move to {x,0} will be first --
+%% see the comment to order_moves/2.)
+
+gen_moves([A|As], I, Acc) ->
+ gen_moves(As, I+1, copy(A, {x,I}) ++ Acc);
+gen_moves([], _, Acc) ->
+ keysort(3, Acc).
+
+%% order_moves([Move], ScratchReg) -> [Move]
+%% Orders move instruction so that source registers are not
+%% destroyed before they are used. If there are cycles
+%% (such as {move,{x,0},{x,1}}, {move,{x,1},{x,1}}),
+%% the scratch register is used to break up the cycle.
+%% If possible, the first move of the input list is placed
+%% last in the result list (to make the move to {x,0} occur
+%% just before the call to allow the Beam loader to coalesce
+%% the instructions).
+
+order_moves(Ms, Scr) -> order_moves(Ms, Scr, []).
+
+order_moves([{move,_,_}=M|Ms0], ScrReg, Acc0) ->
+ {Chain,Ms} = collect_chain(Ms0, [M], ScrReg),
+ Acc = reverse(Chain, Acc0),
+ order_moves(Ms, ScrReg, Acc);
+order_moves([], _, Acc) -> Acc.
+
+collect_chain(Ms, Path, ScrReg) ->
+ collect_chain(Ms, Path, [], ScrReg).
+
+collect_chain([{move,Src,Same}=M|Ms0], [{move,Same,_}|_]=Path, Others, ScrReg) ->
+ case keymember(Src, 3, Path) of
+ false ->
+ collect_chain(reverse(Others, Ms0), [M|Path], [], ScrReg);
+ true ->
+ %% There is a cycle, which we must break up.
+ {break_up_cycle(M, Path, ScrReg),reverse(Others, Ms0)}
+ end;
+collect_chain([M|Ms], Path, Others, ScrReg) ->
+ collect_chain(Ms, Path, [M|Others], ScrReg);
+collect_chain([], Path, Others, _) ->
+ {Path,Others}.
+
+break_up_cycle({move,Src,_}=M, Path, ScrReg) ->
+ [{move,ScrReg,Src},M|break_up_cycle1(Src, Path, ScrReg)].
+
+break_up_cycle1(Dst, [{move,Src,Dst}|Path], ScrReg) ->
+ [{move,Src,ScrReg}|Path];
+break_up_cycle1(Dst, [M|Path], LastMove) ->
+ [M|break_up_cycle1(Dst, Path, LastMove)].
+
+%%%
+%%% General utility functions.
+%%%
+
+verify_beam_register({x,_}=Reg) -> Reg.
+
+is_beam_register({x,_}) -> true;
+is_beam_register(_) -> false.
+
+get_register(V, Regs) ->
+ case is_beam_register(V) of
+ true -> V;
+ false -> maps:get(V, Regs)
+ end.
+
+beam_args(As, St) ->
+ [beam_arg(A, St) || A <- As].
+
+beam_arg(#b_var{name=Name}, #cg{regs=Regs}) ->
+ maps:get(Name, Regs);
+beam_arg(#b_literal{val=Val}, _) ->
+ if
+ is_atom(Val) -> {atom,Val};
+ is_float(Val) -> {float,Val};
+ is_integer(Val) -> {integer,Val};
+ Val =:= [] -> nil;
+ true -> {literal,Val}
+ end;
+beam_arg(Reg, _) ->
+ verify_beam_register(Reg).
+
+new_block_label(L, St0) ->
+ {_Lbl,St} = label_for_block(L, St0),
+ St.
+
+def_block_label(L, #cg{labels=Labels,used_labels=Used}) ->
+ Lbl = maps:get(L, Labels),
+ case gb_sets:is_member(Lbl, Used) of
+ false -> [];
+ true -> [{label,Lbl}]
+ end.
+
+use_block_labels(Ls, St) ->
+ mapfoldl(fun use_block_label/2, St, Ls).
+
+use_block_label(L, #cg{used_labels=Used,catches=Catches}=St0) ->
+ {Lbl,St} = label_for_block(L, St0),
+ case gb_sets:is_member(L, Catches) of
+ true ->
+ {{catch_tag,{f,Lbl}},
+ St#cg{used_labels=gb_sets:add(Lbl, Used)}};
+ false ->
+ {{f,Lbl},St#cg{used_labels=gb_sets:add(Lbl, Used)}}
+ end.
+
+label_for_block(L, #cg{labels=Labels0}=St0) ->
+ case Labels0 of
+ #{L:=Lbl} ->
+ {Lbl,St0};
+ #{} ->
+ {Lbl,St} = new_label(St0),
+ Labels = Labels0#{L=>Lbl},
+ {Lbl,St#cg{labels=Labels}}
+ end.
+
+%% local_func_label(Name, Arity, State) -> {Label,State'}
+%% local_func_label({Name,Arity}, State) -> {Label,State'}
+%% Get the function entry label for a local function.
+
+local_func_label(Name, Arity, St) ->
+ local_func_label({Name,Arity}, St).
+
+local_func_label(Key, #cg{functable=Map}=St0) ->
+ case Map of
+ #{Key := Label} ->
+ {Label,St0};
+ _ ->
+ {Label,St} = new_label(St0),
+ {Label,St#cg{functable=Map#{Key => Label}}}
+ end.
+
+%% is_gc_bif(Name, Args) -> true|false.
+%% Determines whether the BIF Name/Arity might do a GC.
+
+-spec is_gc_bif(atom(), [beam_ssa:value()]) -> boolean().
+
+is_gc_bif(hd, [_]) -> false;
+is_gc_bif(tl, [_]) -> false;
+is_gc_bif(self, []) -> false;
+is_gc_bif(node, []) -> false;
+is_gc_bif(node, [_]) -> false;
+is_gc_bif(element, [_,_]) -> false;
+is_gc_bif(get, [_]) -> false;
+is_gc_bif(is_map_key, [_,_]) -> false;
+is_gc_bif(map_get, [_,_]) -> false;
+is_gc_bif(tuple_size, [_]) -> false;
+is_gc_bif(Bif, Args) ->
+ Arity = length(Args),
+ not (erl_internal:bool_op(Bif, Arity) orelse
+ erl_internal:new_type_test(Bif, Arity) orelse
+ erl_internal:comp_op(Bif, Arity)).
+
+%% new_label(St) -> {L,St}.
+
+new_label(#cg{lcount=Next}=St) ->
+ {Next,St#cg{lcount=Next+1}}.
+
+%% call_line(tail|body, Func, Anno) -> [] | [{line,...}].
+%% Produce a line instruction if it will be needed by the
+%% call to Func.
+
+call_line(_Context, {extfunc,Mod,Name,Arity}, Anno) ->
+ case erl_bifs:is_safe(Mod, Name, Arity) of
+ false ->
+ %% The call could be to a BIF.
+ %% We'll need a line instruction in case the
+ %% BIF call fails.
+ [line(Anno)];
+ true ->
+ %% Call to a safe BIF. Since it cannot fail,
+ %% we don't need any line instruction here.
+ []
+ end;
+call_line(body, _, Anno) ->
+ [line(Anno)];
+call_line(tail, local, _) ->
+ %% Tail-recursive call to a local function. A line
+ %% instruction will not be useful.
+ [];
+call_line(tail, _, Anno) ->
+ %% Call to a fun.
+ [line(Anno)].
+
+%% line(Le) -> {line,[] | {location,File,Line}}
+%% Create a line instruction, containing information about
+%% the current filename and line number. A line information
+%% instruction should be placed before any operation that could
+%% cause an exception.
+
+line(#{location:={File,Line}}) ->
+ {line,[{location,File,Line}]};
+line(#{}) ->
+ {line,[]}.
+
+flatmapfoldl(F, Accu0, [Hd|Tail]) ->
+ {R,Accu1} = F(Hd, Accu0),
+ {Rs,Accu2} = flatmapfoldl(F, Accu1, Tail),
+ {R++Rs,Accu2};
+flatmapfoldl(_, Accu, []) -> {[],Accu}.
diff --git a/lib/compiler/src/beam_ssa_lint.erl b/lib/compiler/src/beam_ssa_lint.erl
new file mode 100644
index 0000000000..a003607dab
--- /dev/null
+++ b/lib/compiler/src/beam_ssa_lint.erl
@@ -0,0 +1,349 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2018. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+%% Purpose: Internal consistency checks for the beam_ssa format.
+
+-module(beam_ssa_lint).
+
+-export([module/2, format_error/1]).
+
+-import(lists, [append/1, foldl/3, foreach/2]).
+
+-include("beam_ssa.hrl").
+
+-spec module(#b_module{}, [compile:option()]) ->
+ {'ok',#b_module{}} | {'error',list()}.
+module(#b_module{body=Fs,name=Name}=Mod0, _Options) ->
+ Es0 = append([validate_function(F) || F <- Fs]),
+ case [{?MODULE,E} || E <- Es0] of
+ [] ->
+ {ok, Mod0};
+ [_|_]=Es ->
+ {error,[{atom_to_list(Name), Es}]}
+ end.
+
+-spec format_error(term()) -> iolist().
+format_error({{_M,F,A},{redefined_variable, Name, Old, I}}) ->
+ io_lib:format("~p/~p: Variable ~ts (~ts) redefined by ~ts",
+ [F, A, format_var(Name), format_instr(Old), format_instr(I)]);
+format_error({{_M,F,A},{missing_phi_paths, Paths, I}}) ->
+ io_lib:format("~p/~p: Phi node ~ts doesn't define a value for these "
+ "branches: ~w",
+ [F, A, format_instr(I), Paths]);
+format_error({{_M,F,A},{garbage_phi_paths, Paths, I}}) ->
+ io_lib:format("~p/~p: Phi node ~ts defines a value for these unreachable "
+ "or non-existent branches: ~w",
+ [F, A, format_instr(I), Paths]);
+format_error({{_M,F,A},{unknown_phi_variable, Name, {From, _To}, I}}) ->
+ io_lib:format("~p/~p: Variable ~ts used in phi node ~ts is undefined on "
+ "branch ~w",
+ [F, A, format_var(Name), format_instr(I), From]);
+format_error({{_M,F,A},{unknown_block, Label, I}}) ->
+ io_lib:format("~p/~p: Unknown block ~p referenced in ~ts",
+ [F, A, Label, I]);
+format_error({{_M,F,A},{unknown_variable, Name, I}}) ->
+ io_lib:format("~p/~p: Unbound variable ~ts used in ~ts",
+ [F, A, format_var(Name), format_instr(I)]);
+format_error({{_M,F,A},{phi_inside_block, Name, Id}}) ->
+ io_lib:format("~p/~p: Phi node defining ~ts is not at start of block ~p",
+ [F, A, format_var(Name), Id]);
+format_error({{_M,F,A},{undefined_label_in_phi, Label, I}}) ->
+ io_lib:format("~p/~p: Unknown block label ~p in phi node ~ts",
+ [F, A, Label, format_instr(I)]).
+
+format_instr(I) ->
+ [$',beam_ssa_pp:format_instr(I),$'].
+
+format_var(V) ->
+ beam_ssa_pp:format_var(#b_var{name=V}).
+
+validate_function(F) ->
+ try
+ validate_variables(F),
+ []
+ catch
+ throw:Reason ->
+ #{func_info:=MFA} = F#b_function.anno,
+ [{MFA,Reason}];
+ Class:Error:Stack ->
+ io:fwrite("Function: ~p\n", [F#b_function.anno]),
+ erlang:raise(Class, Error, Stack)
+ end.
+
+-type defined_vars() :: gb_sets:set(beam_ssa:var_name()).
+
+-record(vvars,
+ {blocks :: #{ beam_ssa:label() => beam_ssa:b_blk() },
+ branch_def_vars :: #{
+ %% Describes the variable state at the time of this exact branch (phi
+ %% node validation).
+ {From :: beam_ssa:label(), To :: beam_ssa:label()} => defined_vars(),
+ %% Describes the variable state common to all branches leading to this
+ %% label (un/redefined variable validation).
+ beam_ssa:label() => defined_vars() },
+ defined_vars :: defined_vars()}).
+
+-spec validate_variables(beam_ssa:b_function()) -> ok.
+validate_variables(#b_function{ args = Args, bs = Blocks }) ->
+ %% Prefill the mapping with function arguments.
+ ArgNames = vvars_get_varnames(Args),
+ DefVars = gb_sets:from_list(ArgNames),
+ Entry = 0,
+
+ State = #vvars{blocks = Blocks,
+ branch_def_vars = #{ Entry => DefVars },
+ defined_vars = DefVars},
+ ok = vvars_assert_unique(Blocks, ArgNames),
+ vvars_phi_nodes(vvars_block(Entry, State)).
+
+%% Checks the uniqueness of all variables across all blocks.
+-spec vvars_assert_unique(Blocks, [beam_ssa:var_name()]) -> ok when
+ Blocks :: #{ beam_ssa:label() => beam_ssa:b_blk() }.
+vvars_assert_unique(Blocks, Args) ->
+ BlockIs = [Is || #b_blk{is=Is} <- maps:values(Blocks)],
+ Defined0 = maps:from_list([{V,argument} || V <- Args]),
+ _ = foldl(fun(Is, Defined) ->
+ vvars_assert_unique_1(Is, Defined)
+ end, Defined0, BlockIs),
+ ok.
+
+-spec vvars_assert_unique_1(Is, Defined) -> ok when
+ Is :: list(beam_ssa:b_set()),
+ Defined :: #{ beam_ssa:var_name() => beam_ssa:b_set() }.
+vvars_assert_unique_1([#b_set{dst=#b_var{name=DstName}}=I|Is], Defined) ->
+ case Defined of
+ #{DstName:=Old} -> throw({redefined_variable, DstName, Old, I});
+ _ -> vvars_assert_unique_1(Is, Defined#{DstName=>I})
+ end;
+vvars_assert_unique_1([], Defined) ->
+ Defined.
+
+-spec vvars_phi_nodes(State :: #vvars{}) -> ok.
+vvars_phi_nodes(#vvars{ blocks = Blocks }=State) ->
+ _ = [vvars_phi_nodes_1(Is, Id, State) ||
+ {Id, #b_blk{ is = Is }} <- maps:to_list(Blocks)],
+ ok.
+
+-spec vvars_phi_nodes_1(Is, Id, State) -> ok when
+ Is :: list(beam_ssa:b_set()),
+ Id :: beam_ssa:label(),
+ State :: #vvars{}.
+vvars_phi_nodes_1([#b_set{ op = phi, args = Phis }=I | Is], Id, State) ->
+ ok = vvars_assert_phi_paths(Phis, I, Id, State),
+ ok = vvars_assert_phi_vars(Phis, I, Id, State),
+ vvars_phi_nodes_1(Is, Id, State);
+vvars_phi_nodes_1([_ | Is], Id, _State) ->
+ case [Dst || #b_set{op=phi,dst=#b_var{name=Dst}} <- Is] of
+ [Name|_] ->
+ throw({phi_inside_block, Name, Id});
+ [] ->
+ ok
+ end;
+vvars_phi_nodes_1([], _Id, _State) ->
+ ok.
+
+%% Checks whether all paths leading to this phi node are represented, and that
+%% it doesn't reference any non-existent paths.
+-spec vvars_assert_phi_paths(Phis, I, Id, State) -> ok when
+ Phis :: list({beam_ssa:argument(), beam_ssa:label()}),
+ Id :: beam_ssa:label(),
+ I :: beam_ssa:b_set(),
+ State :: #vvars{}.
+vvars_assert_phi_paths(Phis, I, Id, State) ->
+ BranchKeys = maps:keys(State#vvars.branch_def_vars),
+ RequiredPaths = ordsets:from_list([From || {From, To} <- BranchKeys, To =:= Id]),
+ ProvidedPaths = ordsets:from_list([From || {_Value, From} <- Phis]),
+ case ordsets:subtract(RequiredPaths, ProvidedPaths) of
+ [_|_]=MissingPaths -> throw({missing_phi_paths, MissingPaths, I});
+ [] -> ok
+ end.
+ %% %% The following test is sometimes useful to find missing optimizations.
+ %% %% It is commented out, though, because it can be triggered by
+ %% %% by weird but legal code.
+ %% case ordsets:subtract(ProvidedPaths, RequiredPaths) of
+ %% [_|_]=GarbagePaths -> throw({garbage_phi_paths, GarbagePaths, I});
+ %% [] -> ok
+ %% end.
+
+%% Checks whether all variables used in this phi node are defined in the branch
+%% they arrived on.
+-spec vvars_assert_phi_vars(Phis, I, Id, State) -> ok when
+ Phis :: list({beam_ssa:argument(), beam_ssa:label()}),
+ Id :: beam_ssa:label(),
+ I :: beam_ssa:b_set(),
+ State :: #vvars{}.
+vvars_assert_phi_vars(Phis, I, Id, #vvars{blocks=Blocks,
+ branch_def_vars=BranchDefVars}) ->
+ Vars = [{Var, From} || {#b_var{}=Var, From} <- Phis],
+ foreach(fun({#b_var{name=VarName}, From}) ->
+ BranchKey = {From, Id},
+ case BranchDefVars of
+ #{BranchKey:=DefVars} ->
+ case gb_sets:is_member(VarName, DefVars) of
+ true -> ok;
+ false -> throw({unknown_variable, VarName, I})
+ end;
+ #{} ->
+ throw({unknown_phi_variable, VarName, BranchKey, I})
+ end
+ end, Vars),
+ Labels = [From || {#b_literal{},From} <- Phis],
+ foreach(fun(Label) ->
+ case Blocks of
+ #{Label:=_} ->
+ ok;
+ #{} ->
+ throw({undefined_label_in_phi, Label, I})
+ end
+ end, Labels).
+
+-spec vvars_block(Id, State) -> #vvars{} when
+ Id :: beam_ssa:label(),
+ State :: #vvars{}.
+vvars_block(Id, State0) ->
+ #{ Id := #b_blk{ is = Is, last = Terminator} } = State0#vvars.blocks,
+ #{ Id := DefVars } = State0#vvars.branch_def_vars,
+ State = State0#vvars{ defined_vars = DefVars },
+ vvars_terminator(Terminator, Id, vvars_block_1(Is, State)).
+
+-spec vvars_block_1(Blocks, State) -> #vvars{} when
+ Blocks :: list(beam_ssa:b_blk()),
+ State :: #vvars{}.
+vvars_block_1([], State) ->
+ State;
+vvars_block_1([#b_set{ dst = #b_var{ name = DstName }, op = phi } | Is], State0) ->
+ %% We don't check phi node arguments at this point since we may not have
+ %% visited their definition yet. They'll be handled later on in
+ %% vvars_phi_nodes/1 after all blocks are processed.
+ vvars_block_1(Is, vvars_save_var(DstName, State0));
+vvars_block_1([#b_set{ dst = #b_var{ name = DstName }, args = Args }=I | Is], State0) ->
+ ok = vvars_assert_args(Args, I, State0),
+ vvars_block_1(Is, vvars_save_var(DstName, State0)).
+
+-spec vvars_terminator(Terminator, From, State) -> #vvars{} when
+ Terminator :: beam_ssa:terminator(),
+ From :: beam_ssa:label(),
+ State :: #vvars{}.
+vvars_terminator(#b_ret{ arg = Arg }=I, _From, State) ->
+ ok = vvars_assert_args([Arg], I, State),
+ State;
+vvars_terminator(#b_switch{arg=Arg,fail=Fail,list=Switch}=I, From, State) ->
+ ok = vvars_assert_args([Arg], I, State),
+ ok = vvars_assert_args([A || {A,_Lbl} <- Switch], I, State),
+ Labels = [Fail | [Lbl || {_Arg, Lbl} <- Switch]],
+ ok = vvars_assert_labels(Labels, I, State),
+ vvars_terminator_1(Labels, From, State);
+vvars_terminator(#b_br{bool=#b_literal{val=true},succ=Succ}=I, From, State) ->
+ Labels = [Succ],
+ ok = vvars_assert_labels(Labels, I, State),
+ vvars_terminator_1(Labels, From, State);
+vvars_terminator(#b_br{bool=#b_literal{val=false},fail=Fail}=I, From, State) ->
+ Labels = [Fail],
+ ok = vvars_assert_labels(Labels, I, State),
+ vvars_terminator_1(Labels, From, State);
+vvars_terminator(#b_br{ bool = Arg, succ = Succ, fail = Fail }=I, From, State) ->
+ ok = vvars_assert_args([Arg], I, State),
+ Labels = [Fail, Succ],
+ ok = vvars_assert_labels(Labels, I, State),
+ vvars_terminator_1(Labels, From, State).
+
+-spec vvars_terminator_1(Labels, From, State) -> #vvars{} when
+ Labels :: list(beam_ssa:label()),
+ From :: beam_ssa:label(),
+ State :: #vvars{}.
+vvars_terminator_1(Labels0, From, State0) ->
+ %% Filter out all branches that have already been taken. This should result
+ %% in either all of Labels0 or an empty list.
+ Labels = [To || To <- Labels0,
+ not maps:is_key({From, To}, State0#vvars.branch_def_vars)],
+ true = Labels =:= Labels0 orelse Labels =:= [], %Assertion
+ State1 = foldl(fun(To, State) ->
+ vvars_save_branch(From, To, State)
+ end, State0, Labels),
+ foldl(fun(To, State) ->
+ vvars_block(To, State)
+ end, State1, Labels).
+
+%% Gets all variable names in args, ignoring literals etc
+-spec vvars_get_varnames(Args) -> list(beam_ssa:var_name()) when
+ Args :: list(beam_ssa:argument()).
+vvars_get_varnames(Args) ->
+ [Name || #b_var{ name = Name } <- Args].
+
+%% Checks that all variables in Args are defined in all paths leading to the
+%% current State.
+-spec vvars_assert_args(Args, I, State) -> ok when
+ Args :: list(beam_ssa:argument()),
+ I :: beam_ssa:terminator() | beam_ssa:b_set(),
+ State :: #vvars{}.
+vvars_assert_args(Args, I, #vvars{defined_vars=DefVars}=State) ->
+ foreach(fun(#b_remote{mod=Mod,name=Name}) ->
+ vvars_assert_args([Mod,Name], I, State);
+ (#b_var{name=Name}) ->
+ case gb_sets:is_member(Name, DefVars) of
+ true -> ok;
+ false -> throw({unknown_variable,Name,I})
+ end;
+ (_) -> ok
+ end, Args).
+
+%% Checks that all given labels are defined in State.
+-spec vvars_assert_labels(Labels, I, State) -> ok when
+ Labels :: list(beam_ssa:label()),
+ I :: beam_ssa:terminator(),
+ State :: #vvars{}.
+vvars_assert_labels(Labels, I, #vvars{blocks=Blocks}) ->
+ foreach(fun(Label) ->
+ case maps:is_key(Label, Blocks) of
+ false -> throw({unknown_block, Label, I});
+ true -> ok
+ end
+ end, Labels).
+
+-spec vvars_save_branch(From, To, State) -> #vvars{} when
+ From :: beam_ssa:label(),
+ To :: beam_ssa:label(),
+ State :: #vvars{}.
+vvars_save_branch(From, To, State) ->
+ DefVars = State#vvars.defined_vars,
+ Branches0 = State#vvars.branch_def_vars,
+ case Branches0 of
+ #{ To := LblDefVars } ->
+ MergedVars = vvars_merge_branches(DefVars, LblDefVars),
+
+ Branches = Branches0#{ To => MergedVars, {From, To} => DefVars },
+ State#vvars { branch_def_vars = Branches };
+ _ ->
+ Branches = Branches0#{ To => DefVars, {From, To} => DefVars },
+ State#vvars { branch_def_vars = Branches }
+ end.
+
+-spec vvars_merge_branches(New, Existing) -> defined_vars() when
+ New :: defined_vars(),
+ Existing :: defined_vars().
+vvars_merge_branches(New, Existing) ->
+ gb_sets:intersection(New, Existing).
+
+-spec vvars_save_var(VarName, State) -> #vvars{} when
+ VarName :: beam_ssa:var_name(),
+ State :: #vvars{}.
+vvars_save_var(VarName, State0) ->
+ %% vvars_assert_unique guarantees that variables are never set twice.
+ DefVars = gb_sets:insert(VarName, State0#vvars.defined_vars),
+ State0#vvars{ defined_vars = DefVars }.
diff --git a/lib/compiler/src/beam_ssa_opt.erl b/lib/compiler/src/beam_ssa_opt.erl
new file mode 100644
index 0000000000..da466a3316
--- /dev/null
+++ b/lib/compiler/src/beam_ssa_opt.erl
@@ -0,0 +1,1307 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2018. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+-module(beam_ssa_opt).
+-export([module/2]).
+
+-include("beam_ssa.hrl").
+-import(lists, [all/2,append/1,foldl/3,keyfind/3,member/2,reverse/1,reverse/2,
+ splitwith/2,takewhile/2,unzip/1]).
+
+-spec module(beam_ssa:b_module(), [compile:option()]) ->
+ {'ok',beam_ssa:b_module()}.
+
+module(#b_module{body=Fs0}=Module, Opts) ->
+ Ps = passes(Opts),
+ Fs = functions(Fs0, Ps),
+ {ok,Module#b_module{body=Fs}}.
+
+functions([F|Fs], Ps) ->
+ [function(F, Ps)|functions(Fs, Ps)];
+functions([], _Ps) -> [].
+
+-type b_blk() :: beam_ssa:b_blk().
+-type b_var() :: beam_ssa:b_var().
+-type label() :: beam_ssa:label().
+
+-record(st, {ssa :: beam_ssa:block_map() | [{label(),b_blk()}],
+ args :: [b_var()],
+ cnt :: label()}).
+-define(PASS(N), {N,fun N/1}).
+
+passes(Opts0) ->
+ Ps = [?PASS(ssa_opt_split_blocks),
+ ?PASS(ssa_opt_element),
+ ?PASS(ssa_opt_linearize),
+ ?PASS(ssa_opt_record),
+ ?PASS(ssa_opt_cse),
+ ?PASS(ssa_opt_type),
+ ?PASS(ssa_opt_float),
+ ?PASS(ssa_opt_live),
+ ?PASS(ssa_opt_bsm),
+ ?PASS(ssa_opt_bsm_shortcut),
+ ?PASS(ssa_opt_misc),
+ ?PASS(ssa_opt_blockify),
+ ?PASS(ssa_opt_sink),
+ ?PASS(ssa_opt_merge_blocks)],
+ Negations = [{list_to_atom("no_"++atom_to_list(N)),N} ||
+ {N,_} <- Ps],
+ Opts = proplists:substitute_negations(Negations, Opts0),
+ [case proplists:get_value(Name, Opts, true) of
+ true ->
+ P;
+ false ->
+ {NoName,Name} = keyfind(Name, 2, Negations),
+ {NoName,fun(S) -> S end}
+ end || {Name,_}=P <- Ps].
+
+function(#b_function{anno=Anno,bs=Blocks0,args=Args,cnt=Count0}=F, Ps) ->
+ try
+ St = #st{ssa=Blocks0,args=Args,cnt=Count0},
+ #st{ssa=Blocks,cnt=Count} = compile:run_sub_passes(Ps, St),
+ F#b_function{bs=Blocks,cnt=Count}
+ catch
+ Class:Error:Stack ->
+ #{func_info:={_,Name,Arity}} = Anno,
+ io:fwrite("Function: ~w/~w\n", [Name,Arity]),
+ erlang:raise(Class, Error, Stack)
+ end.
+
+%%%
+%%% Trivial sub passes.
+%%%
+
+ssa_opt_linearize(#st{ssa=Blocks}=St) ->
+ St#st{ssa=beam_ssa:linearize(Blocks)}.
+
+ssa_opt_type(#st{ssa=Linear,args=Args}=St) ->
+ St#st{ssa=beam_ssa_type:opt(Linear, Args)}.
+
+ssa_opt_blockify(#st{ssa=Linear}=St) ->
+ St#st{ssa=maps:from_list(Linear)}.
+
+%%%
+%%% Split blocks before certain instructions to enable more optimizations.
+%%%
+%%% Splitting before element/2 enables the optimization that swaps
+%%% element/2 instructions.
+%%%
+%%% Splitting before call and make_fun instructions gives more opportunities
+%%% for sinking get_tuple_element instructions.
+%%%
+
+ssa_opt_split_blocks(#st{ssa=Blocks0,cnt=Count0}=St) ->
+ P = fun(#b_set{op={bif,element}}) -> true;
+ (#b_set{op=call}) -> true;
+ (#b_set{op=make_fun}) -> true;
+ (_) -> false
+ end,
+ {Blocks,Count} = beam_ssa:split_blocks(P, Blocks0, Count0),
+ St#st{ssa=Blocks,cnt=Count}.
+
+%%%
+%%% Order element/2 calls.
+%%%
+%%% Order an unbroken chain of element/2 calls for the same tuple
+%%% with the same failure label so that the highest element is
+%%% retrieved first. That will allow the other element/2 calls to
+%%% be replaced with get_tuple_element/3 instructions.
+%%%
+
+ssa_opt_element(#st{ssa=Blocks}=St) ->
+ %% Collect the information about element instructions in this
+ %% function.
+ GetEls = collect_element_calls(beam_ssa:linearize(Blocks)),
+
+ %% Collect the element instructions into chains. The
+ %% element calls in each chain are ordered in reverse
+ %% execution order.
+ Chains = collect_chains(GetEls, []),
+
+ %% For each chain, swap the first element call with the
+ %% element call with the highest index.
+ St#st{ssa=swap_element_calls(Chains, Blocks)}.
+
+collect_element_calls([{L,#b_blk{is=Is0,last=Last}}|Bs]) ->
+ case {Is0,Last} of
+ {[#b_set{op={bif,element},dst=Element,
+ args=[#b_literal{val=N},#b_var{}=Tuple]},
+ #b_set{op=succeeded,dst=Bool,args=[Element]}],
+ #b_br{bool=Bool,succ=Succ,fail=Fail}} ->
+ Info = {L,Succ,{Tuple,Fail},N},
+ [Info|collect_element_calls(Bs)];
+ {_,_} ->
+ collect_element_calls(Bs)
+ end;
+collect_element_calls([]) -> [].
+
+collect_chains([{This,_,V,_}=El|Els], [{_,This,V,_}|_]=Chain) ->
+ %% Add to the previous chain.
+ collect_chains(Els, [El|Chain]);
+collect_chains([El|Els], [_,_|_]=Chain) ->
+ %% Save the previous chain and start a new chain.
+ [Chain|collect_chains(Els, [El])];
+collect_chains([El|Els], _Chain) ->
+ %% The previous chain is too short; discard it and start a new.
+ collect_chains(Els, [El]);
+collect_chains([], [_,_|_]=Chain) ->
+ %% Save the last chain.
+ [Chain];
+collect_chains([], _) -> [].
+
+swap_element_calls([[{L,_,_,N}|_]=Chain|Chains], Blocks0) ->
+ Blocks = swap_element_calls_1(Chain, {N,L}, Blocks0),
+ swap_element_calls(Chains, Blocks);
+swap_element_calls([], Blocks) -> Blocks.
+
+swap_element_calls_1([{L1,_,_,N1}], {N2,L2}, Blocks) when N2 > N1 ->
+ %% We have reached the end of the chain, and the first
+ %% element instrution to be executed. Its index is lower
+ %% than the maximum index found while traversing the chain,
+ %% so we will need to swap the instructions.
+ #{L1:=Blk1,L2:=Blk2} = Blocks,
+ [#b_set{dst=Dst1}=GetEl1,Succ1] = Blk1#b_blk.is,
+ [#b_set{dst=Dst2}=GetEl2,Succ2] = Blk2#b_blk.is,
+ Is1 = [GetEl2,Succ1#b_set{args=[Dst2]}],
+ Is2 = [GetEl1,Succ2#b_set{args=[Dst1]}],
+ Blocks#{L1:=Blk1#b_blk{is=Is1},L2:=Blk2#b_blk{is=Is2}};
+swap_element_calls_1([{L,_,_,N1}|Els], {N2,_}, Blocks) when N1 > N2 ->
+ swap_element_calls_1(Els, {N2,L}, Blocks);
+swap_element_calls_1([_|Els], Highest, Blocks) ->
+ swap_element_calls_1(Els, Highest, Blocks);
+swap_element_calls_1([], _, Blocks) ->
+ %% Nothing to do. The element call with highest index
+ %% is already the first one to be executed.
+ Blocks.
+
+%%%
+%%% Record optimization.
+%%%
+%%% Replace tuple matching with an is_tagged_tuple instruction
+%%% when applicable.
+%%%
+
+ssa_opt_record(#st{ssa=Linear}=St) ->
+ Blocks = maps:from_list(Linear),
+ St#st{ssa=record_opt(Linear, Blocks)}.
+
+record_opt([{L,#b_blk{is=Is0,last=Last}=Blk0}|Bs], Blocks) ->
+ Is = record_opt_is(Is0, Last, Blocks),
+ Blk = Blk0#b_blk{is=Is},
+ [{L,Blk}|record_opt(Bs, Blocks)];
+record_opt([], _Blocks) -> [].
+
+record_opt_is([#b_set{op={bif,is_tuple},dst=#b_var{name=Bool},
+ args=[Tuple]}=Set],
+ Last, Blocks) ->
+ case is_tagged_tuple(Tuple, Bool, Last, Blocks) of
+ {yes,Size,Tag} ->
+ Args = [Tuple,Size,Tag],
+ [Set#b_set{op=is_tagged_tuple,args=Args}];
+ no ->
+ [Set]
+ end;
+record_opt_is([I|Is], Last, Blocks) ->
+ [I|record_opt_is(Is, Last, Blocks)];
+record_opt_is([], _Last, _Blocks) -> [].
+
+is_tagged_tuple(#b_var{name=Tuple}, Bool,
+ #b_br{bool=#b_var{name=Bool},succ=Succ,fail=Fail},
+ Blocks) ->
+ SuccBlk = maps:get(Succ, Blocks),
+ is_tagged_tuple_1(SuccBlk, Tuple, Fail, Blocks);
+is_tagged_tuple(_, _, _, _) -> no.
+
+is_tagged_tuple_1(#b_blk{is=Is,last=Last}, Tuple, Fail, Blocks) ->
+ case Is of
+ [#b_set{op={bif,tuple_size},dst=#b_var{name=ArityVar},
+ args=[#b_var{name=Tuple}]},
+ #b_set{op={bif,'=:='},
+ dst=#b_var{name=Bool},
+ args=[#b_var{name=ArityVar},
+ #b_literal{val=ArityVal}=Arity]}]
+ when is_integer(ArityVal) ->
+ case Last of
+ #b_br{bool=#b_var{name=Bool},succ=Succ,fail=Fail} ->
+ SuccBlk = maps:get(Succ, Blocks),
+ case is_tagged_tuple_2(SuccBlk, Tuple, Fail) of
+ no ->
+ no;
+ {yes,Tag} ->
+ {yes,Arity,Tag}
+ end;
+ _ ->
+ no
+ end;
+ _ ->
+ no
+ end.
+
+is_tagged_tuple_2(#b_blk{is=Is,
+ last=#b_br{bool=#b_var{name=Bool},fail=Fail}},
+ Tuple, Fail) ->
+ is_tagged_tuple_3(Is, Bool, Tuple);
+is_tagged_tuple_2(#b_blk{}, _, _) -> no.
+
+is_tagged_tuple_3([#b_set{op=get_tuple_element,
+ dst=#b_var{name=TagVar},
+ args=[#b_var{name=Tuple},#b_literal{val=0}]}|Is],
+ Bool, Tuple) ->
+ is_tagged_tuple_4(Is, Bool, TagVar);
+is_tagged_tuple_3([_|Is], Bool, Tuple) ->
+ is_tagged_tuple_3(Is, Bool, Tuple);
+is_tagged_tuple_3([], _, _) -> no.
+
+is_tagged_tuple_4([#b_set{op={bif,'=:='},dst=#b_var{name=Bool},
+ args=[#b_var{name=TagVar},
+ #b_literal{val=TagVal}=Tag]}],
+ Bool, TagVar) when is_atom(TagVal) ->
+ {yes,Tag};
+is_tagged_tuple_4([_|Is], Bool, TagVar) ->
+ is_tagged_tuple_4(Is, Bool, TagVar);
+is_tagged_tuple_4([], _, _) -> no.
+
+%%%
+%%% Common subexpression elimination (CSE).
+%%%
+%%% Eliminate repeated evaluation of identical expressions. To avoid
+%%% increasing the size of the stack frame, we don't eliminate
+%%% subexpressions across instructions that clobber the X registers.
+%%%
+
+ssa_opt_cse(#st{ssa=Linear}=St) ->
+ M = #{0=>#{}},
+ St#st{ssa=cse(Linear, #{}, M)}.
+
+cse([{L,#b_blk{is=Is0,last=Last0}=Blk}|Bs], Sub0, M0) ->
+ Es0 = maps:get(L, M0),
+ {Is1,Es,Sub} = cse_is(Is0, Es0, Sub0, []),
+ Last = sub(Last0, Sub),
+ M = cse_successors(Is1, Blk, Es, M0),
+ Is = reverse(Is1),
+ [{L,Blk#b_blk{is=Is,last=Last}}|cse(Bs, Sub, M)];
+cse([], _, _) -> [].
+
+cse_successors([#b_set{op=succeeded,args=[Src]},Bif|_], Blk, EsSucc, M0) ->
+ case cse_suitable(Bif) of
+ true ->
+ %% The previous instruction only has a valid value at the success branch.
+ %% We must remove the substitution for Src from the failure branch.
+ #b_blk{last=#b_br{succ=Succ,fail=Fail}} = Blk,
+ M = cse_successors_1([Succ], EsSucc, M0),
+ EsFail = maps:filter(fun(_, Val) -> Val =/= Src end, EsSucc),
+ cse_successors_1([Fail], EsFail, M);
+ false ->
+ %% There can't be any replacement for Src in EsSucc. No need for
+ %% any special handling.
+ cse_successors_1(beam_ssa:successors(Blk), EsSucc, M0)
+ end;
+cse_successors(_Is, Blk, Es, M) ->
+ cse_successors_1(beam_ssa:successors(Blk), Es, M).
+
+cse_successors_1([L|Ls], Es0, M) ->
+ case M of
+ #{L:=Es1} ->
+ Es = maps:filter(fun(Key, Value) ->
+ case Es1 of
+ #{Key:=Value} -> true;
+ #{} -> false
+ end
+ end, Es0),
+ cse_successors_1(Ls, Es0, M#{L:=Es});
+ #{} ->
+ cse_successors_1(Ls, Es0, M#{L=>Es0})
+ end;
+cse_successors_1([], _, M) -> M.
+
+cse_is([#b_set{op=succeeded,dst=Bool,args=[Src]}=I0|Is], Es, Sub0, Acc) ->
+ I = sub(I0, Sub0),
+ case I of
+ #b_set{args=[Src]} ->
+ cse_is(Is, Es, Sub0, [I|Acc]);
+ #b_set{} ->
+ %% The previous instruction has been eliminated. Eliminate the
+ %% 'succeeded' instruction too.
+ Sub = Sub0#{Bool=>#b_literal{val=true}},
+ cse_is(Is, Es, Sub, Acc)
+ end;
+cse_is([#b_set{dst=Dst}=I0|Is], Es0, Sub0, Acc) ->
+ I = sub(I0, Sub0),
+ case beam_ssa:clobbers_xregs(I) of
+ true ->
+ %% Retaining the expressions map across calls and other
+ %% clobbering instructions would work, but it would cause
+ %% the common subexpressions to be saved to Y registers,
+ %% which would probably increase the size of the stack
+ %% frame.
+ cse_is(Is, #{}, Sub0, [I|Acc]);
+ false ->
+ case cse_expr(I) of
+ none ->
+ %% Not suitable for CSE.
+ cse_is(Is, Es0, Sub0, [I|Acc]);
+ {ok,ExprKey} ->
+ case Es0 of
+ #{ExprKey:=Src} ->
+ Sub = Sub0#{Dst=>Src},
+ cse_is(Is, Es0, Sub, Acc);
+ #{} ->
+ Es = Es0#{ExprKey=>Dst},
+ cse_is(Is, Es, Sub0, [I|Acc])
+ end
+ end
+ end;
+cse_is([], Es, Sub, Acc) ->
+ {Acc,Es,Sub}.
+
+cse_expr(#b_set{op=Op,args=Args}=I) ->
+ case cse_suitable(I) of
+ true -> {ok,{Op,Args}};
+ false -> none
+ end.
+
+cse_suitable(#b_set{op=get_hd}) -> true;
+cse_suitable(#b_set{op=get_tl}) -> true;
+cse_suitable(#b_set{op=put_list}) -> true;
+cse_suitable(#b_set{op=put_tuple}) -> true;
+cse_suitable(#b_set{op={bif,Name},args=Args}) ->
+ %% Doing CSE for comparison operators would prevent
+ %% creation of 'test' instructions.
+ Arity = length(Args),
+ not (erl_internal:new_type_test(Name, Arity) orelse
+ erl_internal:comp_op(Name, Arity) orelse
+ erl_internal:bool_op(Name, Arity));
+cse_suitable(#b_set{}) -> false.
+
+%%%
+%%% Using floating point instructions.
+%%%
+%%% Use the special floating points version of arithmetic
+%%% instructions, if the operands are known to be floats or the result
+%%% of the operation will be a float.
+%%%
+%%% The float instructions were never used in guards before, so we
+%%% will take special care to keep not using them in guards. Using
+%%% them in guards would require a new version of the 'fconv'
+%%% instruction that would take a failure label. Since it is unlikely
+%%% that using float instructions in guards would be benefical, why
+%%% bother implementing a new instruction? Also, implementing float
+%%% instructions in guards in HiPE could turn out to be a lot of work.
+%%%
+
+-record(fs,
+ {s=undefined :: 'undefined' | 'cleared',
+ regs=#{} :: #{beam_ssa:b_var():=beam_ssa:b_var()},
+ fail=none :: 'none' | beam_ssa:label(),
+ ren=#{} :: #{beam_ssa:label():=beam_ssa:label()},
+ non_guards :: gb_sets:set(beam_ssa:label())
+ }).
+
+ssa_opt_float(#st{ssa=Linear0,cnt=Count0}=St) ->
+ NonGuards0 = float_non_guards(Linear0),
+ NonGuards = gb_sets:from_list(NonGuards0),
+ Fs = #fs{non_guards=NonGuards},
+ {Linear,Count} = float_opt(Linear0, Count0, Fs),
+ St#st{ssa=Linear,cnt=Count}.
+
+float_non_guards([{L,#b_blk{is=Is}}|Bs]) ->
+ case Is of
+ [#b_set{op=landingpad}|_] ->
+ [L|float_non_guards(Bs)];
+ _ ->
+ float_non_guards(Bs)
+ end;
+float_non_guards([]) -> [?BADARG_BLOCK].
+
+float_opt([{L,Blk0}|Bs], Count, Fs) ->
+ Blk = float_rename_phis(Blk0, Fs),
+ case float_need_flush(Blk, Fs) of
+ true ->
+ float_flush(L, Blk, Bs, Count, Fs);
+ false ->
+ float_opt_1(L, Blk, Bs, Count, Fs)
+ end;
+float_opt([], Count, _Fs) ->
+ {[],Count}.
+
+float_opt_1(L, #b_blk{last=#b_br{fail=F}}=Blk, Bs0,
+ Count0, #fs{non_guards=NonGuards}=Fs) ->
+ case gb_sets:is_member(F, NonGuards) of
+ true ->
+ %% This block is not inside a guard.
+ %% We can do the optimization.
+ float_opt_2(L, Blk, Bs0, Count0, Fs);
+ false ->
+ %% This block is inside a guard. Don't do
+ %% any floating point optimizations.
+ {Bs,Count} = float_opt(Bs0, Count0, Fs),
+ {[{L,Blk}|Bs],Count}
+ end;
+float_opt_1(L, Blk, Bs, Count, Fs) ->
+ float_opt_2(L, Blk, Bs, Count, Fs).
+
+float_opt_2(L, #b_blk{is=Is0}=Blk0, Bs0, Count0, Fs0) ->
+ case float_opt_is(Is0, Fs0, Count0, []) of
+ {Is1,Fs1,Count1} ->
+ Fs = float_fail_label(Blk0, Fs1),
+ Split = float_split_conv(Is1, Blk0),
+ {Blks0,Count2} = float_number(Split, L, Count1),
+ {Blks,Count3} = float_conv(Blks0, Fs#fs.fail, Count2),
+ {Bs,Count} = float_opt(Bs0, Count3, Fs),
+ {Blks++Bs,Count};
+ none ->
+ {Bs,Count} = float_opt(Bs0, Count0, Fs0),
+ {[{L,Blk0}|Bs],Count}
+ end.
+
+%% Split {float,convert} instructions into individual blocks.
+float_split_conv(Is0, Blk) ->
+ Br = #b_br{bool=#b_literal{val=true},succ=0,fail=0},
+ case splitwith(fun(#b_set{op=Op}) ->
+ Op =/= {float,convert}
+ end, Is0) of
+ {Is,[]} ->
+ [Blk#b_blk{is=Is}];
+ {[_|_]=Is1,[#b_set{op={float,convert}}=Conv|Is2]} ->
+ [#b_blk{is=Is1,last=Br},
+ #b_blk{is=[Conv],last=Br}|float_split_conv(Is2, Blk)];
+ {[],[#b_set{op={float,convert}}=Conv|Is1]} ->
+ [#b_blk{is=[Conv],last=Br}|float_split_conv(Is1, Blk)]
+ end.
+
+%% Number the blocks that were split.
+float_number([B|Bs0], FirstL, Count0) ->
+ {Bs,Count} = float_number(Bs0, Count0),
+ {[{FirstL,B}|Bs],Count}.
+
+float_number([B|Bs0], Count0) ->
+ {Bs,Count} = float_number(Bs0, Count0+1),
+ {[{Count0,B}|Bs],Count};
+float_number([], Count) ->
+ {[],Count}.
+
+%% Insert 'succeeded' instructions after each {float,convert}
+%% instruction.
+float_conv([{L,#b_blk{is=Is0}=Blk0}|Bs0], Fail, Count0) ->
+ case Is0 of
+ [#b_set{op={float,convert}}=Conv] ->
+ {Bool0,Count1} = new_reg('@ssa_bool', Count0),
+ Bool = #b_var{name=Bool0},
+ Succeeded = #b_set{op=succeeded,dst=Bool,
+ args=[Conv#b_set.dst]},
+ Is = [Conv,Succeeded],
+ [{NextL,_}|_] = Bs0,
+ Br = #b_br{bool=Bool,succ=NextL,fail=Fail},
+ Blk = Blk0#b_blk{is=Is,last=Br},
+ {Bs,Count} = float_conv(Bs0, Fail, Count1),
+ {[{L,Blk}|Bs],Count};
+ [_|_] ->
+ case Bs0 of
+ [{NextL,_}|_] ->
+ Br = #b_br{bool=#b_literal{val=true},
+ succ=NextL,fail=NextL},
+ Blk = Blk0#b_blk{last=Br},
+ {Bs,Count} = float_conv(Bs0, Fail, Count0),
+ {[{L,Blk}|Bs],Count};
+ [] ->
+ {[{L,Blk0}],Count0}
+ end
+ end.
+
+float_need_flush(#b_blk{is=Is}, #fs{s=cleared}) ->
+ case Is of
+ [#b_set{anno=#{float_op:=_}}|_] ->
+ false;
+ _ ->
+ true
+ end;
+float_need_flush(_, _) -> false.
+
+float_opt_is([#b_set{op=succeeded,args=[Src]}=I0],
+ #fs{regs=Rs}=Fs, Count, Acc) ->
+ case Rs of
+ #{Src:=Fr} ->
+ I = I0#b_set{args=[Fr]},
+ {reverse(Acc, [I]),Fs,Count};
+ #{} ->
+ {reverse(Acc, [I0]),Fs,Count}
+ end;
+float_opt_is([#b_set{anno=Anno0}=I0|Is0], Fs0, Count0, Acc) ->
+ case Anno0 of
+ #{float_op:=FTypes} ->
+ Anno = maps:remove(float_op, Anno0),
+ I1 = I0#b_set{anno=Anno},
+ {Is,Fs,Count} = float_make_op(I1, FTypes, Fs0, Count0),
+ float_opt_is(Is0, Fs, Count, reverse(Is, Acc));
+ #{} ->
+ float_opt_is(Is0, Fs0#fs{regs=#{}}, Count0, [I0|Acc])
+ end;
+float_opt_is([], Fs, _Count, _Acc) ->
+ #fs{s=undefined} = Fs, %Assertion.
+ none.
+
+float_rename_phis(#b_blk{is=Is}=Blk, #fs{ren=Ren}) ->
+ if
+ map_size(Ren) =:= 0 ->
+ Blk;
+ true ->
+ Blk#b_blk{is=float_rename_phis_1(Is, Ren)}
+ end.
+
+float_rename_phis_1([#b_set{op=phi,args=Args0}=I|Is], Ren) ->
+ Args = [float_phi_arg(Arg, Ren) || Arg <- Args0],
+ [I#b_set{args=Args}|float_rename_phis_1(Is, Ren)];
+float_rename_phis_1(Is, _) -> Is.
+
+float_phi_arg({Var,OldLbl}, Ren) ->
+ case Ren of
+ #{OldLbl:=NewLbl} ->
+ {Var,NewLbl};
+ #{} ->
+ {Var,OldLbl}
+ end.
+
+float_make_op(#b_set{op={bif,Op},dst=Dst,args=As0}=I0,
+ Ts, #fs{s=S,regs=Rs0}=Fs, Count0) ->
+ {As1,Rs1,Count1} = float_load(As0, Ts, Rs0, Count0, []),
+ {As,Is0} = unzip(As1),
+ {Fr,Count2} = new_reg('@fr', Count1),
+ FrDst = #b_var{name=Fr},
+ I = I0#b_set{op={float,Op},dst=FrDst,args=As},
+ Rs = Rs1#{Dst=>FrDst},
+ Is = append(Is0) ++ [I],
+ case S of
+ undefined ->
+ {Ignore,Count} = new_reg('@ssa_ignore', Count2),
+ C = #b_set{op={float,clearerror},dst=#b_var{name=Ignore}},
+ {[C|Is],Fs#fs{s=cleared,regs=Rs},Count};
+ cleared ->
+ {Is,Fs#fs{regs=Rs},Count2}
+ end.
+
+float_load([A|As], [T|Ts], Rs0, Count0, Acc) ->
+ {Load,Rs,Count} = float_reg_arg(A, T, Rs0, Count0),
+ float_load(As, Ts, Rs, Count, [Load|Acc]);
+float_load([], [], Rs, Count, Acc) ->
+ {reverse(Acc),Rs,Count}.
+
+float_reg_arg(A, T, Rs, Count0) ->
+ case Rs of
+ #{A:=Fr} ->
+ {{Fr,[]},Rs,Count0};
+ #{} ->
+ {Fr,Count} = new_float_copy_reg(Count0),
+ Dst = #b_var{name=Fr},
+ I = float_load_reg(T, A, Dst),
+ {{Dst,[I]},Rs#{A=>Dst},Count}
+ end.
+
+float_load_reg(convert, #b_var{}=Src, Dst) ->
+ #b_set{op={float,convert},dst=Dst,args=[Src]};
+float_load_reg(convert, #b_literal{val=Val}=Src, Dst) ->
+ try float(Val) of
+ F ->
+ #b_set{op={float,put},dst=Dst,args=[#b_literal{val=F}]}
+ catch
+ error:_ ->
+ %% Let the exception happen at runtime.
+ #b_set{op={float,convert},dst=Dst,args=[Src]}
+ end;
+float_load_reg(float, Src, Dst) ->
+ #b_set{op={float,put},dst=Dst,args=[Src]}.
+
+new_float_copy_reg(Count) ->
+ new_reg('@fr_copy', Count).
+
+new_reg(Base, Count) ->
+ Fr = {Base,Count},
+ {Fr,Count+1}.
+
+float_flush(L, Blk, Bs0, Count0, #fs{s=cleared,fail=Fail,ren=Ren0}=Fs0) ->
+ {Bool0,Count1} = new_reg('@ssa_bool', Count0),
+ Bool = #b_var{name=Bool0},
+
+ %% Insert two blocks before the current block. First allocate
+ %% block numbers.
+ FirstL = L, %For checkerror.
+ MiddleL = Count1, %For flushed float regs.
+ LastL = Count1 + 1, %For original block.
+ Count2 = Count1 + 2,
+
+ %% Build the block with the checkerror instruction.
+ CheckIs = [#b_set{op={float,checkerror},dst=Bool}],
+ FirstBlk = #b_blk{is=CheckIs,last=#b_br{bool=Bool,succ=MiddleL,fail=Fail}},
+
+ %% Build the block that flushes all registers. Note that this must be a
+ %% separate block in case the original block begins with a phi instruction,
+ %% to avoid embedding a phi instruction in the middle of a block.
+ FlushIs = float_flush_regs(Fs0),
+ MiddleBlk = #b_blk{is=FlushIs,last=#b_br{bool=#b_literal{val=true},
+ succ=LastL,fail=LastL}},
+
+ %% The last block is the original unmodified block.
+ LastBlk = Blk,
+
+ %% Update state and blocks.
+ Ren = Ren0#{L=>LastL},
+ Fs = Fs0#fs{s=undefined,regs=#{},fail=none,ren=Ren},
+ Bs1 = [{FirstL,FirstBlk},{MiddleL,MiddleBlk},{LastL,LastBlk}|Bs0],
+ float_opt(Bs1, Count2, Fs).
+
+float_fail_label(#b_blk{last=Last}, Fs) ->
+ case Last of
+ #b_br{bool=#b_var{},fail=Fail} ->
+ Fs#fs{fail=Fail};
+ _ ->
+ Fs
+ end.
+
+float_flush_regs(#fs{regs=Rs}) ->
+ maps:fold(fun(_, #b_var{name={'@fr_copy',_}}, Acc) ->
+ Acc;
+ (Dst, Fr, Acc) ->
+ [#b_set{op={float,get},dst=Dst,args=[Fr]}|Acc]
+ end, [], Rs).
+
+%%%
+%%% Live optimization.
+%%%
+%%% Optimize instructions whose values are not used. They could be
+%%% removed if they have no side effects, or in a few cases replaced
+%%% with a cheaper instructions
+%%%
+
+ssa_opt_live(#st{ssa=Linear}=St) ->
+ St#st{ssa=live_opt(reverse(Linear), #{}, [])}.
+
+live_opt([{L,Blk0}|Bs], LiveMap0, Acc) ->
+ Successors = beam_ssa:successors(Blk0),
+ Live0 = live_opt_succ(Successors, L, LiveMap0),
+ {Blk,Live} = live_opt_blk(Blk0, Live0),
+ LiveMap = live_opt_phis(Blk#b_blk.is, L, Live, LiveMap0),
+ live_opt(Bs, LiveMap, [{L,Blk}|Acc]);
+live_opt([], _, Acc) -> Acc.
+
+live_opt_succ([S|Ss], L, LiveMap) ->
+ Live0 = live_opt_succ(Ss, L, LiveMap),
+ Key = {S,L},
+ case LiveMap of
+ #{Key:=Live} ->
+ gb_sets:union(Live, Live0);
+ #{S:=Live} ->
+ gb_sets:union(Live, Live0);
+ #{} ->
+ Live0
+ end;
+live_opt_succ([], _, _) ->
+ gb_sets:empty().
+
+live_opt_phis(Is, L, Live0, LiveMap0) ->
+ LiveMap = LiveMap0#{L=>Live0},
+ Phis = takewhile(fun(#b_set{op=Op}) -> Op =:= phi end, Is),
+ case Phis of
+ [] ->
+ LiveMap;
+ [_|_] ->
+ PhiArgs = append([Args || #b_set{args=Args} <- Phis]),
+ PhiVars = [{P,V} || {#b_var{name=V},P} <- PhiArgs],
+ PhiLive0 = rel2fam(PhiVars),
+ PhiLive = [{{L,P},gb_sets:union(gb_sets:from_list(Vs), Live0)} ||
+ {P,Vs} <- PhiLive0],
+ maps:merge(LiveMap, maps:from_list(PhiLive))
+ end.
+
+live_opt_blk(#b_blk{is=Is0,last=Last}=Blk, Live0) ->
+ Live1 = gb_sets:union(Live0, gb_sets:from_ordset(beam_ssa:used(Last))),
+ {Is,Live} = live_opt_is(reverse(Is0), Live1, []),
+ {Blk#b_blk{is=Is},Live}.
+
+live_opt_is([#b_set{op=phi,dst=#b_var{name=Dst}}=I|Is], Live, Acc) ->
+ case gb_sets:is_member(Dst, Live) of
+ true ->
+ live_opt_is(Is, Live, [I|Acc]);
+ false ->
+ live_opt_is(Is, Live, Acc)
+ end;
+live_opt_is([#b_set{op=succeeded,dst=#b_var{name=SuccDst}=SuccDstVar,
+ args=[#b_var{name=Dst}]}=SuccI,
+ #b_set{dst=#b_var{name=Dst}}=I|Is], Live0, Acc) ->
+ case gb_sets:is_member(Dst, Live0) of
+ true ->
+ case gb_sets:is_member(SuccDst, Live0) of
+ true ->
+ Live1 = gb_sets:add(Dst, Live0),
+ Live = gb_sets:delete_any(SuccDst, Live1),
+ live_opt_is([I|Is], Live, [SuccI|Acc]);
+ false ->
+ live_opt_is([I|Is], Live0, Acc)
+ end;
+ false ->
+ case live_opt_unused(I) of
+ {replace,NewI0} ->
+ NewI = NewI0#b_set{dst=SuccDstVar},
+ live_opt_is([NewI|Is], Live0, Acc);
+ keep ->
+ case gb_sets:is_member(SuccDst, Live0) of
+ true ->
+ Live1 = gb_sets:add(Dst, Live0),
+ Live = gb_sets:delete_any(SuccDst, Live1),
+ live_opt_is([I|Is], Live, [SuccI|Acc]);
+ false ->
+ live_opt_is([I|Is], Live0, Acc)
+ end
+ end
+ end;
+live_opt_is([#b_set{op=Op,dst=#b_var{name=Dst}}=I|Is], Live0, Acc) ->
+ case gb_sets:is_member(Dst, Live0) of
+ true ->
+ Live1 = gb_sets:union(Live0, gb_sets:from_ordset(beam_ssa:used(I))),
+ Live = gb_sets:delete_any(Dst, Live1),
+ live_opt_is(Is, Live, [I|Acc]);
+ false ->
+ case is_pure(Op) of
+ true ->
+ live_opt_is(Is, Live0, Acc);
+ false ->
+ Live = gb_sets:union(Live0, gb_sets:from_ordset(beam_ssa:used(I))),
+ live_opt_is(Is, Live, [I|Acc])
+ end
+ end;
+live_opt_is([], Live, Acc) ->
+ {Acc,Live}.
+
+is_pure({bif,_}) -> true;
+is_pure({float,get}) -> true;
+is_pure(bs_extract) -> true;
+is_pure(extract) -> true;
+is_pure(get_hd) -> true;
+is_pure(get_tl) -> true;
+is_pure(get_tuple_element) -> true;
+is_pure(is_nonempty_list) -> true;
+is_pure(is_tagged_tuple) -> true;
+is_pure(put_list) -> true;
+is_pure(put_tuple) -> true;
+is_pure(_) -> false.
+
+live_opt_unused(#b_set{op=get_map_element}=Set) ->
+ {replace,Set#b_set{op=has_map_field}};
+live_opt_unused(_) -> keep.
+
+%%%
+%%% Optimize binary matching instructions.
+%%%
+
+ssa_opt_bsm(#st{ssa=Linear}=St) ->
+ Extracted0 = bsm_extracted(Linear),
+ Extracted = cerl_sets:from_list(Extracted0),
+ St#st{ssa=bsm_skip(Linear, Extracted)}.
+
+bsm_skip([{L,#b_blk{is=Is0}=Blk}|Bs], Extracted) ->
+ Is = bsm_skip_is(Is0, Extracted),
+ [{L,Blk#b_blk{is=Is}}|bsm_skip(Bs, Extracted)];
+bsm_skip([], _) -> [].
+
+bsm_skip_is([I0|Is], Extracted) ->
+ case I0 of
+ #b_set{op=bs_match,args=[#b_literal{val=string}|_]} ->
+ [I0|bsm_skip_is(Is, Extracted)];
+ #b_set{op=bs_match,dst=Ctx,args=[Type,PrevCtx|Args0]} ->
+ I = case cerl_sets:is_element(Ctx, Extracted) of
+ true ->
+ I0;
+ false ->
+ %% The value is never extracted.
+ Args = [#b_literal{val=skip},PrevCtx,Type|Args0],
+ I0#b_set{args=Args}
+ end,
+ [I|Is];
+ #b_set{} ->
+ [I0|bsm_skip_is(Is, Extracted)]
+ end;
+bsm_skip_is([], _) -> [].
+
+bsm_extracted([{_,#b_blk{is=Is}}|Bs]) ->
+ case Is of
+ [#b_set{op=bs_extract,args=[Ctx]}|_] ->
+ [Ctx|bsm_extracted(Bs)];
+ _ ->
+ bsm_extracted(Bs)
+ end;
+bsm_extracted([]) -> [].
+
+%%%
+%%% Short-cutting binary matching instructions.
+%%%
+
+ssa_opt_bsm_shortcut(#st{ssa=Linear}=St) ->
+ Positions = bsm_positions(Linear, #{}),
+ case map_size(Positions) of
+ 0 ->
+ %% No binary matching instructions.
+ St;
+ _ ->
+ St#st{ssa=bsm_shortcut(Linear, Positions)}
+ end.
+
+bsm_positions([{L,#b_blk{is=Is,last=Last}}|Bs], PosMap0) ->
+ PosMap = bsm_positions_is(Is, PosMap0),
+ case {Is,Last} of
+ {[#b_set{op=bs_test_tail,dst=Bool,args=[Ctx,#b_literal{val=Bits0}]}],
+ #b_br{bool=Bool,fail=Fail}} ->
+ Bits = Bits0 + maps:get(Ctx, PosMap0),
+ bsm_positions(Bs, PosMap#{L=>{Bits,Fail}});
+ {_,_} ->
+ bsm_positions(Bs, PosMap)
+ end;
+bsm_positions([], PosMap) -> PosMap.
+
+bsm_positions_is([#b_set{op=bs_start_match,dst=New}|Is], PosMap0) ->
+ PosMap = PosMap0#{New=>0},
+ bsm_positions_is(Is, PosMap);
+bsm_positions_is([#b_set{op=bs_match,dst=New,args=Args}|Is], PosMap0) ->
+ [_,Old|_] = Args,
+ #{Old:=Bits0} = PosMap0,
+ Bits = bsm_update_bits(Args, Bits0),
+ PosMap = PosMap0#{New=>Bits},
+ bsm_positions_is(Is, PosMap);
+bsm_positions_is([_|Is], PosMap) ->
+ bsm_positions_is(Is, PosMap);
+bsm_positions_is([], PosMap) -> PosMap.
+
+bsm_update_bits([#b_literal{val=string},_,#b_literal{val=String}], Bits) ->
+ Bits + bit_size(String);
+bsm_update_bits([#b_literal{val=utf8}|_], Bits) ->
+ Bits + 8;
+bsm_update_bits([#b_literal{val=utf16}|_], Bits) ->
+ Bits + 16;
+bsm_update_bits([#b_literal{val=utf32}|_], Bits) ->
+ Bits + 32;
+bsm_update_bits([_,_,_,#b_literal{val=Sz},#b_literal{val=U}], Bits)
+ when is_integer(Sz) ->
+ Bits + Sz*U;
+bsm_update_bits(_, Bits) -> Bits.
+
+bsm_shortcut([{L,#b_blk{is=Is,last=Last0}=Blk}|Bs], PosMap) ->
+ case {Is,Last0} of
+ {[#b_set{op=bs_match,dst=New,args=[_,Old|_]},
+ #b_set{op=succeeded,dst=Bool,args=[New]}],
+ #b_br{bool=Bool,fail=Fail}} ->
+ case PosMap of
+ #{Old:=Bits,Fail:={TailBits,NextFail}} when Bits > TailBits ->
+ Last = Last0#b_br{fail=NextFail},
+ [{L,Blk#b_blk{last=Last}}|bsm_shortcut(Bs, PosMap)];
+ #{} ->
+ [{L,Blk}|bsm_shortcut(Bs, PosMap)]
+ end;
+ {_,_} ->
+ [{L,Blk}|bsm_shortcut(Bs, PosMap)]
+ end;
+bsm_shortcut([], _PosMap) -> [].
+
+%%%
+%%% Miscellanous optimizations in execution order.
+%%%
+
+ssa_opt_misc(#st{ssa=Linear}=St) ->
+ St#st{ssa=misc_opt(Linear, #{})}.
+
+misc_opt([{L,#b_blk{is=Is0,last=Last0}=Blk0}|Bs], Sub0) ->
+ {Is,Sub} = misc_opt_is(Is0, Sub0, []),
+ Last = sub(Last0, Sub),
+ Blk = Blk0#b_blk{is=Is,last=Last},
+ [{L,Blk}|misc_opt(Bs, Sub)];
+misc_opt([], _) -> [].
+
+misc_opt_is([#b_set{op=phi}=I0|Is], Sub0, Acc) ->
+ #b_set{dst=Dst,args=Args} = I = sub(I0, Sub0),
+ case all_same(Args) of
+ true ->
+ %% Eliminate the phi node if there is just one source
+ %% value or if the values are identical.
+ [{Val,_}|_] = Args,
+ Sub = Sub0#{Dst=>Val},
+ misc_opt_is(Is, Sub, Acc);
+ false ->
+ misc_opt_is(Is, Sub0, [I|Acc])
+ end;
+misc_opt_is([#b_set{}=I0|Is], Sub, Acc) ->
+ #b_set{op=Op,dst=Dst,args=Args} = I = sub(I0, Sub),
+ case make_literal(Op, Args) of
+ #b_literal{}=Literal ->
+ misc_opt_is(Is, Sub#{Dst=>Literal}, Acc);
+ error ->
+ misc_opt_is(Is, Sub, [I|Acc])
+ end;
+misc_opt_is([], Sub, Acc) ->
+ {reverse(Acc),Sub}.
+
+all_same([{H,_}|T]) ->
+ all(fun({E,_}) -> E =:= H end, T).
+
+make_literal(put_tuple, Args) ->
+ case make_literal_list(Args, []) of
+ error ->
+ error;
+ List ->
+ #b_literal{val=list_to_tuple(List)}
+ end;
+make_literal(put_list, [#b_literal{val=H},#b_literal{val=T}]) ->
+ #b_literal{val=[H|T]};
+make_literal(_, _) -> error.
+
+make_literal_list([#b_literal{val=H}|T], Acc) ->
+ make_literal_list(T, [H|Acc]);
+make_literal_list([_|_], _) ->
+ error;
+make_literal_list([], Acc) ->
+ reverse(Acc).
+
+%%%
+%%% Merge blocks.
+%%%
+
+ssa_opt_merge_blocks(#st{ssa=Blocks}=St) ->
+ Preds = beam_ssa:predecessors(Blocks),
+ St#st{ssa=merge_blocks_1(beam_ssa:rpo(Blocks), Preds, Blocks)}.
+
+merge_blocks_1([L|Ls], Preds0, Blocks0) ->
+ case Preds0 of
+ #{L:=[P]} ->
+ #{P:=Blk0,L:=Blk1} = Blocks0,
+ case is_merge_allowed(L, Blk0, Blk1) of
+ true ->
+ #b_blk{is=Is0} = Blk0,
+ #b_blk{is=Is1} = Blk1,
+ Is = Is0 ++ Is1,
+ Blk = Blk1#b_blk{is=Is},
+ Blocks1 = maps:remove(L, Blocks0),
+ Blocks2 = maps:put(P, Blk, Blocks1),
+ Successors = beam_ssa:successors(Blk),
+ Blocks = beam_ssa:update_phi_labels(Successors, L, P, Blocks2),
+ Preds = merge_update_preds(Successors, L, P, Preds0),
+ merge_blocks_1(Ls, Preds, Blocks);
+ false ->
+ merge_blocks_1(Ls, Preds0, Blocks0)
+ end;
+ #{} ->
+ merge_blocks_1(Ls, Preds0, Blocks0)
+ end;
+merge_blocks_1([], _Preds, Blocks) -> Blocks.
+
+merge_update_preds([L|Ls], From, To, Preds0) ->
+ Ps = [rename_label(P, From, To) || P <- maps:get(L, Preds0)],
+ Preds = maps:put(L, Ps, Preds0),
+ merge_update_preds(Ls, From, To, Preds);
+merge_update_preds([], _, _, Preds) -> Preds.
+
+rename_label(From, From, To) -> To;
+rename_label(Lbl, _, _) -> Lbl.
+
+is_merge_allowed(_, _, #b_blk{is=[#b_set{op=peek_message}|_]}) ->
+ false;
+is_merge_allowed(L, Blk0, #b_blk{}) ->
+ case beam_ssa:successors(Blk0) of
+ [L] -> true;
+ [_|_] -> false
+ end.
+
+%%%
+%%% When a tuple is matched, the pattern matching compiler generates a
+%%% get_tuple_element instruction for every tuple element that will
+%%% ever be used in the rest of the function. That often forces the
+%%% extracted tuple elements to be stored in Y registers until it's
+%%% time to use them. It could also mean that there could be execution
+%%% paths that will never use the extracted elements.
+%%%
+%%% This optimization will sink get_tuple_element instructions, that
+%%% is, move them forward in the execution stream to the last possible
+%%% block there they will still dominate all uses. That may reduce the
+%%% size of stack frames, reduce register shuffling, and avoid
+%%% extracting tuple elements on execution paths that never use the
+%%% extracted values.
+%%%
+
+ssa_opt_sink(#st{ssa=Blocks0}=St) ->
+ Linear = beam_ssa:linearize(Blocks0),
+
+ %% Create a map with all variables that define get_tuple_element
+ %% instructions. The variable name map to the block it is defined in.
+ Defs = maps:from_list(def_blocks(Linear)),
+
+ %% Now find all the blocks that use variables defined by get_tuple_element
+ %% instructions.
+ Used = used_blocks(Linear, Defs, []),
+
+ %% Calculate dominators.
+ Dom0 = beam_ssa:dominators(Blocks0),
+
+ %% It is not safe to move get_tuple_element instructions to blocks
+ %% that begin with certain instructions. It is also unsafe to move
+ %% the instructions into any part of a receive. To avoid such
+ %% unsafe moves, pretend that the unsuitable blocks are not
+ %% dominators.
+ Unsuitable = unsuitable(Linear, Blocks0),
+ Dom = case gb_sets:is_empty(Unsuitable) of
+ true ->
+ Dom0;
+ false ->
+ F = fun(_, DomBy) ->
+ [L || L <- DomBy,
+ not gb_sets:is_element(L, Unsuitable)]
+ end,
+ maps:map(F, Dom0)
+ end,
+
+ %% Calculate new positions for get_tuple_element instructions. The new
+ %% position is a block that dominates all uses of the variable.
+ DefLoc = new_def_locations(Used, Defs, Dom),
+
+ %% Now move all suitable get_tuple_element instructions to their
+ %% new blocks.
+ Blocks = foldl(fun({V,To}, A) ->
+ From = maps:get(V, Defs),
+ move_defs(V, From, To, A)
+ end, Blocks0, DefLoc),
+ St#st{ssa=Blocks}.
+
+def_blocks([{L,#b_blk{is=Is}}|Bs]) ->
+ def_blocks_is(Is, L, def_blocks(Bs));
+def_blocks([]) -> [].
+
+def_blocks_is([#b_set{op=get_tuple_element,dst=#b_var{name=Dst}}|Is], L, Acc) ->
+ def_blocks_is(Is, L, [{Dst,L}|Acc]);
+def_blocks_is([_|Is], L, Acc) ->
+ def_blocks_is(Is, L, Acc);
+def_blocks_is([], _, Acc) -> Acc.
+
+used_blocks([{L,Blk}|Bs], Def, Acc0) ->
+ Used = beam_ssa:used(Blk),
+ Acc = [{V,L} || V <- Used, maps:is_key(V, Def)] ++ Acc0,
+ used_blocks(Bs, Def, Acc);
+used_blocks([], _Def, Acc) ->
+ rel2fam(Acc).
+
+%% unsuitable(Linear, Blocks) -> Unsuitable.
+%% Return an ordset of block labels for the blocks that are not
+%% suitable for sinking of get_tuple_element instructions.
+
+unsuitable(Linear, Blocks) ->
+ Predecessors = beam_ssa:predecessors(Blocks),
+ Unsuitable0 = unsuitable_1(Linear),
+ Unsuitable1 = unsuitable_recv(Linear, Blocks, Predecessors),
+ gb_sets:from_list(Unsuitable0 ++ Unsuitable1).
+
+unsuitable_1([{L,#b_blk{is=[#b_set{op=Op}|_]}}|Bs]) ->
+ Unsuitable = case Op of
+ bs_extract -> true;
+ bs_put -> true;
+ {float,_} -> true;
+ landingpad -> true;
+ peek_message -> true;
+ wait_timeout -> true;
+ _ -> false
+ end,
+ case Unsuitable of
+ true ->
+ [L|unsuitable_1(Bs)];
+ false ->
+ unsuitable_1(Bs)
+ end;
+unsuitable_1([{_,#b_blk{}}|Bs]) ->
+ unsuitable_1(Bs);
+unsuitable_1([]) -> [].
+
+unsuitable_recv([{L,#b_blk{is=[#b_set{op=Op}|_]}}|Bs], Blocks, Predecessors) ->
+ Ls = case Op of
+ remove_message ->
+ unsuitable_loop(L, Blocks, Predecessors);
+ recv_next ->
+ unsuitable_loop(L, Blocks, Predecessors);
+ _ ->
+ []
+ end,
+ Ls ++ unsuitable_recv(Bs, Blocks, Predecessors);
+unsuitable_recv([_|Bs], Blocks, Predecessors) ->
+ unsuitable_recv(Bs, Blocks, Predecessors);
+unsuitable_recv([], _, _) -> [].
+
+unsuitable_loop(L, Blocks, Predecessors) ->
+ unsuitable_loop(L, Blocks, Predecessors, []).
+
+unsuitable_loop(L, Blocks, Predecessors, Acc) ->
+ Ps = maps:get(L, Predecessors),
+ unsuitable_loop_1(Ps, Blocks, Predecessors, Acc).
+
+unsuitable_loop_1([P|Ps], Blocks, Predecessors, Acc0) ->
+ case maps:get(P, Blocks) of
+ #b_blk{is=[#b_set{op=peek_message}|_]} ->
+ unsuitable_loop_1(Ps, Blocks, Predecessors, Acc0);
+ #b_blk{} ->
+ case ordsets:is_element(P, Acc0) of
+ false ->
+ Acc1 = ordsets:add_element(P, Acc0),
+ Acc = unsuitable_loop(P, Blocks, Predecessors, Acc1),
+ unsuitable_loop_1(Ps, Blocks, Predecessors, Acc);
+ true ->
+ unsuitable_loop_1(Ps, Blocks, Predecessors, Acc0)
+ end
+ end;
+unsuitable_loop_1([], _, _, Acc) -> Acc.
+
+%% new_def_locations([{Variable,[UsedInBlock]}|Vs], Defs, Dominators) ->
+%% [{Variable,NewDefinitionBlock}]
+%% Calculate new locations for get_tuple_element instructions. For each
+%% variable, the new location is a block that dominates all uses of
+%% variable and as near to the uses of as possible. If no such block
+%% distinct from the block where the instruction currently is, the
+%% variable will not be included in the result list.
+
+new_def_locations([{V,UsedIn}|Vs], Defs, Dom) ->
+ DefIn = maps:get(V, Defs),
+ case common_dom(UsedIn, DefIn, Dom) of
+ [] ->
+ new_def_locations(Vs, Defs, Dom);
+ [_|_]=BetterDef ->
+ L = most_dominated(BetterDef, Dom),
+ [{V,L}|new_def_locations(Vs, Defs, Dom)]
+ end;
+new_def_locations([], _, _) -> [].
+
+common_dom([L|Ls], DefIn, Dom) ->
+ DomBy0 = maps:get(L, Dom),
+ DomBy = ordsets:subtract(DomBy0, maps:get(DefIn, Dom)),
+ common_dom_1(Ls, Dom, DomBy).
+
+common_dom_1(_, _, []) ->
+ [];
+common_dom_1([L|Ls], Dom, [_|_]=DomBy0) ->
+ DomBy1 = maps:get(L, Dom),
+ DomBy = ordsets:intersection(DomBy0, DomBy1),
+ common_dom_1(Ls, Dom, DomBy);
+common_dom_1([], _, DomBy) -> DomBy.
+
+most_dominated([L|Ls], Dom) ->
+ most_dominated(Ls, L, maps:get(L, Dom), Dom).
+
+most_dominated([L|Ls], L0, DomBy, Dom) ->
+ case member(L, DomBy) of
+ true ->
+ most_dominated(Ls, L0, DomBy, Dom);
+ false ->
+ most_dominated(Ls, L, maps:get(L, Dom), Dom)
+ end;
+most_dominated([], L, _, _) -> L.
+
+
+%% Move get_tuple_element instructions to their new locations.
+
+move_defs(V, From, To, Blocks) ->
+ #{From:=FromBlk0,To:=ToBlk0} = Blocks,
+ {Def,FromBlk} = remove_def(V, FromBlk0),
+ try insert_def(V, Def, ToBlk0) of
+ ToBlk ->
+ %%io:format("~p: ~p => ~p\n", [V,From,To]),
+ Blocks#{From:=FromBlk,To:=ToBlk}
+ catch
+ throw:not_possible ->
+ Blocks
+ end.
+
+remove_def(V, #b_blk{is=Is0}=Blk) ->
+ {Def,Is} = remove_def_is(Is0, V, []),
+ {Def,Blk#b_blk{is=Is}}.
+
+remove_def_is([#b_set{dst=#b_var{name=Dst}}=Def|Is], Dst, Acc) ->
+ {Def,reverse(Acc, Is)};
+remove_def_is([I|Is], Dst, Acc) ->
+ remove_def_is(Is, Dst, [I|Acc]).
+
+insert_def(V, Def, #b_blk{is=Is0}=Blk) ->
+ Is = insert_def_is(Is0, V, Def),
+ Blk#b_blk{is=Is}.
+
+insert_def_is([#b_set{op=phi}=I|Is], V, Def) ->
+ case member(V, beam_ssa:used(I)) of
+ true ->
+ throw(not_possible);
+ false ->
+ [I|insert_def_is(Is, V, Def)]
+ end;
+insert_def_is([#b_set{op=Op}=I|Is]=Is0, V, Def) ->
+ Action0 = case Op of
+ call -> beyond;
+ 'catch_end' -> beyond;
+ set_tuple_element -> beyond;
+ timeout -> beyond;
+ _ -> here
+ end,
+ Action = case Is of
+ [#b_set{op=succeeded}|_] -> here;
+ _ -> Action0
+ end,
+ case Action of
+ beyond ->
+ case member(V, beam_ssa:used(I)) of
+ true ->
+ %% The variable is used by this instruction. We must
+ %% place the definition before this instruction.
+ [Def|Is0];
+ false ->
+ %% Place it beyond the current instruction.
+ [I|insert_def_is(Is, V, Def)]
+ end;
+ here ->
+ [Def|Is0]
+ end;
+insert_def_is([], _V, Def) ->
+ [Def].
+
+
+%%%
+%%% Common utilities.
+%%%
+
+rel2fam(S0) ->
+ S1 = sofs:relation(S0),
+ S = sofs:rel2fam(S1),
+ sofs:to_external(S).
+
+sub(#b_set{op=phi,args=Args}=I, Sub) ->
+ I#b_set{args=[{sub_arg(A, Sub),P} || {A,P} <- Args]};
+sub(#b_set{args=Args}=I, Sub) ->
+ I#b_set{args=[sub_arg(A, Sub) || A <- Args]};
+sub(#b_br{bool=#b_var{}=Old}=Br, Sub) ->
+ New = sub_arg(Old, Sub),
+ Br#b_br{bool=New};
+sub(#b_switch{arg=#b_var{}=Old}=Sw, Sub) ->
+ New = sub_arg(Old, Sub),
+ Sw#b_switch{arg=New};
+sub(#b_ret{arg=#b_var{}=Old}=Ret, Sub) ->
+ New = sub_arg(Old, Sub),
+ Ret#b_ret{arg=New};
+sub(Last, _) -> Last.
+
+sub_arg(#b_remote{mod=Mod,name=Name}=Rem, Sub) ->
+ Rem#b_remote{mod=sub_arg(Mod, Sub),name=sub_arg(Name, Sub)};
+sub_arg(Old, Sub) ->
+ case Sub of
+ #{Old:=New} -> New;
+ #{} -> Old
+ end.
diff --git a/lib/compiler/src/beam_ssa_pp.erl b/lib/compiler/src/beam_ssa_pp.erl
new file mode 100644
index 0000000000..9daa2c2523
--- /dev/null
+++ b/lib/compiler/src/beam_ssa_pp.erl
@@ -0,0 +1,238 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2018. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+-module(beam_ssa_pp).
+
+-export([format_function/1,format_instr/1,format_var/1]).
+
+-include("beam_ssa.hrl").
+
+-spec format_function(beam_ssa:b_function()) -> iolist().
+
+format_function(#b_function{anno=Anno0,args=Args,
+ bs=Blocks,cnt=Counter}) ->
+ #{func_info:={M,F,_}} = Anno0,
+ Anno = maps:without([func_info,location,live_intervals,registers], Anno0),
+ FuncAnno = case Anno0 of
+ #{live_intervals:=Intervals} ->
+ Anno0#{live_intervals:=maps:from_list(Intervals)};
+ #{} ->
+ Anno0
+ end,
+ ReachableBlocks = beam_ssa:rpo(Blocks),
+ All = maps:keys(Blocks),
+ Unreachable = ordsets:subtract(ordsets:from_list(All),
+ ordsets:from_list(ReachableBlocks)),
+ [case Anno0 of
+ #{location:={Filename,Line}} ->
+ io_lib:format("%% ~ts:~p\n", [Filename,Line]);
+ #{} ->
+ []
+ end,
+ io_lib:format("%% Counter = ~p\n", [Counter]),
+ [format_anno(Key, Value) ||
+ {Key,Value} <- lists:sort(maps:to_list(Anno))],
+ io_lib:format("function ~p:~p(~ts) {\n", [M,F,format_args(Args, FuncAnno)]),
+ [format_live_interval(Var, FuncAnno) || Var <- Args],
+ format_blocks(ReachableBlocks, Blocks, FuncAnno),
+ case Unreachable of
+ [] ->
+ [];
+ [_|_] ->
+ ["\n%% Unreachable blocks\n\n",
+ format_blocks(Unreachable, Blocks, FuncAnno)]
+ end,
+
+ "}\n"].
+
+
+-spec format_instr(beam_ssa:b_set()) -> iolist().
+
+format_instr(#b_set{}=I) ->
+ Cs = lists:flatten(format_instr(I#b_set{anno=#{}}, #{}, true)),
+ string:trim(Cs, leading);
+format_instr(I0) ->
+ I = setelement(2, I0, #{}),
+ Cs = lists:flatten(format_terminator(I, #{})),
+ string:trim(Cs, both).
+
+-spec format_var(beam_ssa:b_var()) -> iolist().
+
+format_var(V) ->
+ Cs = lists:flatten(format_var(V, #{})),
+ string:trim(Cs, leading).
+
+%%%
+%%% Local functions.
+%%%
+
+format_anno(Key, Map) when is_map(Map) ->
+ Sorted = lists:sort(maps:to_list(Map)),
+ [io_lib:format("%% ~s:\n", [Key]),
+ [io_lib:format("%% ~w => ~w\n", [K,V]) || {K,V} <- Sorted]];
+format_anno(Key, Value) ->
+ io_lib:format("%% ~s: ~p\n", [Key,Value]).
+
+format_blocks(Ls, Blocks, Anno) ->
+ PP = [format_block(L, Blocks, Anno) || L <- Ls],
+ lists:join($\n, PP).
+
+format_block(L, Blocks, FuncAnno) ->
+ #b_blk{anno=Anno,is=Is,last=Last} = maps:get(L, Blocks),
+ [case map_size(Anno) of
+ 0 -> [];
+ _ -> io_lib:format("%% ~p\n", [Anno])
+ end,
+ io_lib:format("~p:", [L]),
+ format_instrs(Is, FuncAnno, true),
+ $\n,
+ format_terminator(Last, FuncAnno)].
+
+format_instrs([I|Is], FuncAnno, First) ->
+ [$\n,
+ format_instr(I, FuncAnno, First),
+ format_instrs(Is, FuncAnno, false)];
+format_instrs([], _FuncAnno, _First) ->
+ [].
+
+format_instr(#b_set{anno=Anno,op=Op,dst=Dst,args=Args},
+ FuncAnno, First) ->
+ AnnoStr = format_anno(Anno),
+ LiveIntervalStr = format_live_interval(Dst, FuncAnno),
+ [if
+ First ->
+ [];
+ AnnoStr =/= []; LiveIntervalStr =/= [] ->
+ $\n;
+ true ->
+ []
+ end,
+ AnnoStr,
+ LiveIntervalStr,
+ io_lib:format(" ~s~ts = ~ts", [format_i_number(Anno),
+ format_var(Dst, FuncAnno),
+ format_op(Op)]),
+ case Args of
+ [] ->
+ [];
+ [_|_] ->
+ io_lib:format(" ~ts", [format_args(Args, FuncAnno)])
+ end].
+
+format_i_number(#{n:=N}) ->
+ io_lib:format("[~p] ", [N]);
+format_i_number(#{}) -> [].
+
+format_terminator(#b_br{anno=A,bool=#b_literal{val=true},succ=Lbl}, _) ->
+ io_lib:format(" ~sbr label ~p\n", [format_i_number(A),Lbl]);
+format_terminator(#b_br{anno=A,bool=#b_literal{val=false},fail=Lbl}, _) ->
+ io_lib:format(" ~sbr label ~p\n", [format_i_number(A),Lbl]);
+format_terminator(#b_br{anno=A,bool=Bool,succ=Succ,fail=Fail}, FuncAnno) ->
+ io_lib:format(" ~sbr ~ts, label ~p, label ~p\n",
+ [format_i_number(A),format_arg(Bool, FuncAnno),Succ,Fail]);
+format_terminator(#b_switch{anno=A,arg=Arg,fail=Fail,list=List}, FuncAnno) ->
+ io_lib:format(" ~sswitch ~ts, label ~p, ~ts\n",
+ [format_i_number(A),format_arg(Arg, FuncAnno),Fail,
+ format_list(List,FuncAnno)]);
+format_terminator(#b_ret{anno=A,arg=Arg}, FuncAnno) ->
+ io_lib:format(" ~sret ~ts\n", [format_i_number(A),format_arg(Arg, FuncAnno)]).
+
+format_op({Prefix,Name}) ->
+ io_lib:format("~p:~p", [Prefix,Name]);
+format_op(Name) ->
+ io_lib:format("~p", [Name]).
+
+format_register(#b_var{name=V}, #{registers:=Regs}) ->
+ {Tag,N} = maps:get(V, Regs),
+ io_lib:format("~p~p", [Tag,N]);
+format_register(_, #{}) -> "".
+
+format_var(Var, FuncAnno) ->
+ VarString = format_var_1(Var),
+ case format_register(Var, FuncAnno) of
+ [] -> VarString;
+ [_|_]=Reg -> [Reg,$/,VarString]
+ end.
+
+format_var_1(#b_var{name={Name,Uniq}}) ->
+ if
+ is_atom(Name) ->
+ io_lib:format("~ts:~p", [Name,Uniq]);
+ is_integer(Name) ->
+ io_lib:format("_~p:~p", [Name,Uniq])
+ end;
+format_var_1(#b_var{name=Name}) when is_atom(Name) ->
+ atom_to_list(Name);
+format_var_1(#b_var{name=Name}) when is_integer(Name) ->
+ "_"++integer_to_list(Name).
+
+format_args(Args, FuncAnno) ->
+ Ss = [format_arg(Arg, FuncAnno) || Arg <- Args],
+ lists:join(", ", Ss).
+
+format_arg(#b_var{}=Arg, FuncAnno) ->
+ format_var(Arg, FuncAnno);
+format_arg(#b_literal{val=Val}, _FuncAnno) ->
+ io_lib:format("literal ~p", [Val]);
+format_arg(#b_remote{mod=Mod,name=Name,arity=Arity}, FuncAnno) ->
+ io_lib:format("remote (~ts):(~ts)/~p",
+ [format_arg(Mod, FuncAnno),format_arg(Name, FuncAnno),Arity]);
+format_arg(#b_local{name=Name,arity=Arity}, FuncAnno) ->
+ io_lib:format("local ~ts/~p", [format_arg(Name, FuncAnno),Arity]);
+format_arg({Value,Label}, FuncAnno) when is_integer(Label) ->
+ io_lib:format("{ ~ts, ~p }", [format_arg(Value, FuncAnno),Label]);
+format_arg(Other, _) ->
+ io_lib:format("*** ~p ***", [Other]).
+
+format_list(List, FuncAnno) ->
+ Ss = [io_lib:format("{ ~ts, ~ts }", [format_arg(Val, FuncAnno),format_label(L)]) ||
+ {Val,L} <- List],
+ io_lib:format("[ ~ts ]", [lists:join(", ", Ss)]).
+
+format_label(L) ->
+ ["label ",integer_to_list(L)].
+
+format_anno(#{n:=_}=Anno) ->
+ format_anno(maps:remove(n, Anno));
+format_anno(#{location:={File,Line}}=Anno0) ->
+ Anno = maps:remove(location, Anno0),
+ [io_lib:format(" %% ~ts:~p\n", [File,Line])|format_anno_1(Anno)];
+format_anno(Anno) ->
+ format_anno_1(Anno).
+
+format_anno_1(Anno) ->
+ case map_size(Anno) of
+ 0 ->
+ [];
+ _ ->
+ [io_lib:format(" %% Anno: ~p\n", [Anno])]
+ end.
+
+format_live_interval(#b_var{name=V}=Dst, #{live_intervals:=Intervals}) ->
+ case Intervals of
+ #{V:=Rs0} ->
+ Rs1 = [io_lib:format("~p..~p", [Start,End]) ||
+ {Start,End} <- Rs0],
+ Rs = lists:join(" ", Rs1),
+ io_lib:format(" %% ~ts: ~s\n", [format_var_1(Dst),Rs]);
+ #{} ->
+ []
+ end;
+format_live_interval(_, _) -> [].
+
diff --git a/lib/compiler/src/beam_ssa_pre_codegen.erl b/lib/compiler/src/beam_ssa_pre_codegen.erl
new file mode 100644
index 0000000000..bbc3739eb5
--- /dev/null
+++ b/lib/compiler/src/beam_ssa_pre_codegen.erl
@@ -0,0 +1,2437 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2018. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+%% Purpose: Prepare for code generation, including register allocation.
+%%
+%% The output of this compiler pass is still in the SSA format, but
+%% it has been annotated and transformed to help the code generator.
+%%
+%% * Some instructions are translated to other instructions closer to
+%% the BEAM instructions. For example, the put_tuple instruction is
+%% broken apart into the put_tuple_arity and put_tuple_elements
+%% instructions. Similary, the binary matching instructions are
+%% transformed from the optimization-friendly internal format to
+%% instruction more similar to the actual BEAM instructions.
+%%
+%% * Blocks that will need an instruction for allocating a stack frame
+%% are annotated with a {frame_size,Size} annotation.
+%%
+%% * 'copy' instructions are added for all variables that need
+%% to be saved to the stack frame. Additional 'copy' instructions
+%% can be added as an optimization to reuse y registers (see
+%% the copy_retval sub pass).
+%%
+%% * Each function is annotated with a {register,RegisterMap}
+%% annotation that maps each variable to a BEAM register. The linear
+%% scan algorithm is used to allocate registers.
+%%
+%% There are four kind of registers. x, y, fr (floating point register),
+%% and z. A variable will be allocated to a z register if it is only
+%% used by the instruction following the instruction that defines the
+%% the variable. The code generator will typically combine those
+%% instructions to a test instruction. z registers are also used for
+%% some instructions that don't have a return value.
+%%
+%% References:
+%%
+%% [1] H. Mössenböck and M. Pfeiffer. Linear scan register allocation
+%% in the context of SSA form and register constraints. In Proceedings
+%% of the International Conference on Compiler Construction, pages
+%% 229–246. LNCS 2304, Springer-Verlag, 2002.
+%%
+%% [2] C. Wimmer and H. Mössenböck. Optimized interval splitting in a
+%% linear scan register allocator. In Proceedings of the ACM/USENIX
+%% International Conference on Virtual Execution Environments, pages
+%% 132–141. ACM Press, 2005.
+%%
+%% [3] C. Wimmer and M. Franz. Linear Scan Register Allocation on SSA
+%% Form. In Proceedings of the International Symposium on Code
+%% Generation and Optimization, pages 170-179. ACM Press, 2010.
+%%
+
+-module(beam_ssa_pre_codegen).
+
+-export([module/2]).
+
+-include("beam_ssa.hrl").
+
+-import(lists, [all/2,any/2,append/1,duplicate/2,
+ foldl/3,last/1,map/2,member/2,partition/2,
+ reverse/1,reverse/2,sort/1,zip/2]).
+
+-spec module(beam_ssa:b_module(), [compile:option()]) ->
+ {'ok',beam_ssa:b_module()}.
+
+module(#b_module{body=Fs0}=Module, Opts) ->
+ ExtraAnnos = proplists:get_bool(dprecg, Opts),
+ Ps = passes(ExtraAnnos),
+ Fs = functions(Fs0, Ps),
+ {ok,Module#b_module{body=Fs}}.
+
+functions([F|Fs], Ps) ->
+ [function(F, Ps)|functions(Fs, Ps)];
+functions([], _Ps) -> [].
+
+-type b_var() :: beam_ssa:b_var().
+-type var_name() :: beam_ssa:var_name().
+-type instr_number() :: pos_integer().
+-type range() :: {instr_number(),instr_number()}.
+-type reg_num() :: beam_asm:reg_num().
+-type xreg() :: {'x',reg_num()}.
+-type yreg() :: {'y',reg_num()}.
+-type ypool() :: {'y',beam_ssa:label()}.
+-type reservation() :: 'fr' | {'prefer',xreg()} | 'x' | {'x',xreg()} |
+ ypool() | {yreg(),ypool()} | 'z'.
+-type ssa_register() :: beam_ssa_codegen:ssa_register().
+
+-define(TC(Body), tc(fun() -> Body end, ?FILE, ?LINE)).
+-record(st, {ssa :: beam_ssa:block_map(),
+ args :: [b_var()],
+ cnt :: beam_ssa:label(),
+ frames=[] :: [beam_ssa:label()],
+ intervals=[] :: [{b_var(),[range()]}],
+ aliases=[] :: [{b_var(),b_var()}],
+ res=[] :: [{b_var(),reservation()}] | #{b_var():=reservation()},
+ regs=#{} :: #{b_var():=ssa_register()},
+ extra_annos=[] :: [{atom(),term()}]
+ }).
+-define(PASS(N), {N,fun N/1}).
+
+passes(ExtraAnnos) ->
+ Ps = [?PASS(assert_no_critical_edges),
+
+ %% Preliminaries.
+ ?PASS(fix_bs),
+ ?PASS(sanitize),
+ ?PASS(fix_tuples),
+ ?PASS(place_frames),
+ ?PASS(fix_receives),
+
+ %% Find and reserve Y registers.
+ ?PASS(find_yregs),
+ ?PASS(reserve_yregs),
+
+ %% Improve reuse of Y registers to potentially
+ %% reduce the size of the stack frame.
+ ?PASS(copy_retval),
+ ?PASS(opt_get_list),
+
+ %% Calculate live intervals.
+ ?PASS(number_instructions),
+ ?PASS(live_intervals),
+ ?PASS(remove_unsuitable_aliases),
+ ?PASS(reserve_regs),
+ ?PASS(merge_intervals),
+
+ %% If needed for a .precg file, save the live intervals
+ %% so they can be included in an annotation.
+ case ExtraAnnos of
+ false -> ignore;
+ true -> ?PASS(save_live_intervals)
+ end,
+
+ %% Allocate registers.
+ ?PASS(linear_scan),
+ ?PASS(fix_aliased_regs),
+ ?PASS(frame_size),
+ ?PASS(turn_yregs)],
+ case ExtraAnnos of
+ false -> [P || P <- Ps, P =/= ignore];
+ true -> Ps
+ end.
+
+function(#b_function{anno=Anno,args=Args,bs=Blocks0,cnt=Count0}=F0, Ps) ->
+ try
+ St0 = #st{ssa=Blocks0,args=Args,cnt=Count0},
+ St = compile:run_sub_passes(Ps, St0),
+ #st{ssa=Blocks,cnt=Count,regs=Regs,extra_annos=ExtraAnnos} = St,
+ F1 = add_extra_annos(F0, ExtraAnnos),
+ F = beam_ssa:add_anno(registers, Regs, F1),
+ F#b_function{bs=Blocks,cnt=Count}
+ catch
+ Class:Error:Stack ->
+ #{func_info:={_,Name,Arity}} = Anno,
+ io:fwrite("Function: ~w/~w\n", [Name,Arity]),
+ erlang:raise(Class, Error, Stack)
+ end.
+
+save_live_intervals(#st{intervals=Intervals}=St) ->
+ St#st{extra_annos=[{live_intervals,Intervals}]}.
+
+fix_aliased_regs(#st{aliases=Aliases,regs=Regs}=St) ->
+ St#st{regs=fix_aliased_regs(Aliases, Regs)}.
+
+fix_aliased_regs([{Alias,V}|Aliases], Regs) ->
+ #{V:=Reg} = Regs,
+ fix_aliased_regs(Aliases, Regs#{Alias=>Reg});
+fix_aliased_regs([], Regs) -> Regs.
+
+%% Add extra annotations when a .precg listing file is being produced.
+add_extra_annos(F, Annos) ->
+ foldl(fun({Name,Value}, Acc) ->
+ beam_ssa:add_anno(Name, Value, Acc)
+ end, F, Annos).
+
+%% assert_no_critical_edges(St0) -> St.
+%% The code generator will not work if there are critial edges.
+%% Abort if any critical edges are found.
+
+assert_no_critical_edges(#st{ssa=Blocks}=St) ->
+ F = fun assert_no_ces/3,
+ beam_ssa:fold_rpo(F, Blocks, Blocks),
+ St.
+
+assert_no_ces(_, #b_blk{is=[#b_set{op=phi,args=[_,_]=Phis}|_]}, Blocks) ->
+ %% This block has multiple predecessors. Make sure that none
+ %% of the precessors have more than one successor.
+ true = all(fun({_,P}) ->
+ length(beam_ssa:successors(P, Blocks)) =:= 1
+ end, Phis), %Assertion.
+ Blocks;
+assert_no_ces(_, _, Blocks) -> Blocks.
+
+%% fix_bs(St0) -> St.
+%% Fix up the binary matching instructions:
+%%
+%% * Insert bs_save and bs_restore instructions where needed.
+%%
+%% * Combine bs_match and bs_extract instructions to bs_get
+%% instructions.
+
+fix_bs(#st{ssa=Blocks,cnt=Count0}=St) ->
+ F = fun(#b_set{op=bs_start_match,dst=Dst}, A) ->
+ %% Mark the root of the match context list.
+ [{Dst,{context,Dst}}|A];
+ (#b_set{op=bs_match,dst=Dst,args=[_,ParentCtx|_]}, A) ->
+ %% Link this match context the previous match context.
+ [{Dst,ParentCtx}|A];
+ (_, A) ->
+ A
+ end,
+ case beam_ssa:fold_instrs_rpo(F, [0], [],Blocks) of
+ [] ->
+ %% No binary matching in this function.
+ St;
+ [_|_]=M ->
+ CtxChain = maps:from_list(M),
+ Linear0 = beam_ssa:linearize(Blocks),
+
+ %% Insert bs_save / bs_restore instructions where needed.
+ {Linear1,Count} = bs_save_restore(Linear0, CtxChain, Count0),
+
+ %% Rename instructions.
+ Linear = bs_instrs(Linear1, CtxChain, []),
+
+ St#st{ssa=maps:from_list(Linear),cnt=Count}
+ end.
+
+
+%% Insert bs_save and bs_restore instructions as needed.
+
+bs_save_restore(Linear0, CtxChain, Count0) ->
+ Rs0 = bs_restores(Linear0, CtxChain, #{}, #{}),
+ Rs = maps:values(Rs0),
+ S0 = sofs:relation(Rs, [{context,save_point}]),
+ S1 = sofs:relation_to_family(S0),
+ S = sofs:to_external(S1),
+ Slots = make_save_point_dict(S, []),
+ {Saves,Count1} = make_save_map(Rs, Slots, Count0, []),
+ {Restores,Count} = make_restore_map(maps:to_list(Rs0), Slots, Count1, []),
+
+ %% Now insert all saves and restores.
+ {bs_insert(Linear0, Saves, Restores, Slots),Count}.
+
+make_save_map([{Ctx,Save}=Ps|T], Slots, Count, Acc) ->
+ Ignored = #b_var{name={'@ssa_ignored',Count}},
+ case make_slot(Ps, Slots) of
+ #b_literal{val=start} ->
+ make_save_map(T, Slots, Count, Acc);
+ Slot ->
+ I = #b_set{op=bs_save,dst=Ignored,args=[Ctx,Slot]},
+ make_save_map(T, Slots, Count+1, [{Save,I}|Acc])
+ end;
+make_save_map([], _, Count, Acc) ->
+ {maps:from_list(Acc),Count}.
+
+make_restore_map([{Bef,{Ctx,_}=Ps}|T], Slots, Count, Acc) ->
+ Ignored = #b_var{name={'@ssa_ignored',Count}},
+ I = #b_set{op=bs_restore,dst=Ignored,args=[Ctx,make_slot(Ps, Slots)]},
+ make_restore_map(T, Slots, Count+1, [{Bef,I}|Acc]);
+make_restore_map([], _, Count, Acc) ->
+ {maps:from_list(Acc),Count}.
+
+make_slot({Same,Same}, _Slots) ->
+ #b_literal{val=start};
+make_slot({_,_}=Ps, Slots) ->
+ #b_literal{val=maps:get(Ps, Slots)}.
+
+make_save_point_dict([{Ctx,Pts}|T], Acc0) ->
+ Acc = make_save_point_dict_1(Pts, Ctx, 0, Acc0),
+ make_save_point_dict(T, Acc);
+make_save_point_dict([], Acc) ->
+ maps:from_list(Acc).
+
+make_save_point_dict_1([Ctx|T], Ctx, I, Acc) ->
+ %% Special {atom,start} save point. Does not need a
+ %% bs_save instruction.
+ make_save_point_dict_1(T, Ctx, I, Acc);
+make_save_point_dict_1([H|T], Ctx, I, Acc) ->
+ make_save_point_dict_1(T, Ctx, I+1, [{{Ctx,H},I}|Acc]);
+make_save_point_dict_1([], Ctx, I, Acc) ->
+ [{Ctx,I}|Acc].
+
+bs_restores([{L,#b_blk{is=Is,last=Last}}|Bs], CtxChain, D0, Rs0) ->
+ FPos = case D0 of
+ #{L:=Pos0} -> Pos0;
+ #{} -> #{}
+ end,
+ {SPos,Rs} = bs_restores_is(Is, CtxChain, FPos, Rs0),
+ D = bs_update_successors(Last, SPos, FPos, D0),
+ bs_restores(Bs, CtxChain, D, Rs);
+bs_restores([], _, _, Rs) -> Rs.
+
+bs_update_successors(#b_br{succ=Succ,fail=Fail}, SPos, FPos, D) ->
+ join_positions([{Succ,SPos},{Fail,FPos}], D);
+bs_update_successors(#b_switch{fail=Fail,list=List}, SPos, FPos, D) ->
+ SPos = FPos, %Assertion.
+ Update = [{L,SPos} || {_,L} <- List] ++ [{Fail,SPos}],
+ join_positions(Update, D);
+bs_update_successors(#b_ret{}, _, _, D) -> D.
+
+join_positions([{L,MapPos0}|T], D) ->
+ case D of
+ #{L:=MapPos0} ->
+ %% Same map.
+ join_positions(T, D);
+ #{L:=MapPos1} ->
+ %% Different maps.
+ MapPos = join_positions_1(MapPos0, MapPos1),
+ join_positions(T, D#{L:=MapPos});
+ #{} ->
+ join_positions(T, D#{L=>MapPos0})
+ end;
+join_positions([], D) -> D.
+
+join_positions_1(MapPos0, MapPos1) ->
+ MapPos2 = maps:map(fun(Start, Pos) ->
+ case MapPos0 of
+ #{Start:=Pos} -> Pos;
+ #{Start:=_} -> unknown;
+ #{} -> Pos
+ end
+ end, MapPos1),
+ maps:merge(MapPos0, MapPos2).
+
+bs_restores_is([#b_set{op=bs_start_match,dst=Start}|Is],
+ CtxChain, PosMap0, Rs) ->
+ PosMap = PosMap0#{Start=>Start},
+ bs_restores_is(Is, CtxChain, PosMap, Rs);
+bs_restores_is([#b_set{op=bs_match,dst=NewPos,args=Args}=I|Is],
+ CtxChain, PosMap0, Rs0) ->
+ Start = bs_subst_ctx(NewPos, CtxChain),
+ [_,FromPos|_] = Args,
+ case PosMap0 of
+ #{Start:=FromPos} ->
+ %% Same position, no restore needed.
+ PosMap = case bs_match_type(I) of
+ plain ->
+ %% Update position to new position.
+ PosMap0#{Start:=NewPos};
+ _ ->
+ %% Position will not change (test_unit
+ %% instruction or no instruction at
+ %% all).
+ PosMap0#{Start:=FromPos}
+ end,
+ bs_restores_is(Is, CtxChain, PosMap, Rs0);
+ #{Start:=_} ->
+ %% Different positions, might need a restore instruction.
+ case bs_match_type(I) of
+ none ->
+ %% The tail test will be optimized away.
+ %% No need to do a restore.
+ PosMap = PosMap0#{Start:=FromPos},
+ bs_restores_is(Is, CtxChain, PosMap, Rs0);
+ test_unit ->
+ %% This match instruction will be replaced by
+ %% a test_unit instruction. We will need a
+ %% restore. The new position will be the position
+ %% restored to (NOT NewPos).
+ PosMap = PosMap0#{Start:=FromPos},
+ Rs = Rs0#{NewPos=>{Start,FromPos}},
+ bs_restores_is(Is, CtxChain, PosMap, Rs);
+ plain ->
+ %% Match or skip. Position will be changed.
+ PosMap = PosMap0#{Start:=NewPos},
+ Rs = Rs0#{NewPos=>{Start,FromPos}},
+ bs_restores_is(Is, CtxChain, PosMap, Rs)
+ end
+ end;
+bs_restores_is([#b_set{op=bs_extract,args=[FromPos|_]}|Is],
+ CtxChain, PosMap, Rs) ->
+ Start = bs_subst_ctx(FromPos, CtxChain),
+ #{Start:=FromPos} = PosMap, %Assertion.
+ bs_restores_is(Is, CtxChain, PosMap, Rs);
+bs_restores_is([#b_set{op=Op,dst=Dst,args=Args}|Is],
+ CtxChain, PosMap0, Rs0)
+ when Op =:= bs_test_tail;
+ Op =:= call ->
+ {Rs,PosMap} = bs_restore_args(Args, PosMap0, CtxChain, Dst, Rs0),
+ bs_restores_is(Is, CtxChain, PosMap, Rs);
+bs_restores_is([_|Is], CtxChain, PosMap, Rs) ->
+ bs_restores_is(Is, CtxChain, PosMap, Rs);
+bs_restores_is([], _CtxChain, PosMap, Rs) ->
+ {PosMap,Rs}.
+
+
+bs_match_type(#b_set{args=[#b_literal{val=skip},_Ctx,
+ #b_literal{val=binary},_Flags,
+ #b_literal{val=all},#b_literal{val=U}]}) ->
+ case U of
+ 1 -> none;
+ _ -> test_unit
+ end;
+bs_match_type(_) ->
+ plain.
+
+bs_restore_args([#b_var{}=Arg|Args], PosMap0, CtxChain, Dst, Rs0) ->
+ Start = bs_subst_ctx(Arg, CtxChain),
+ case PosMap0 of
+ #{Start:=Arg} ->
+ %% Same position, no restore needed.
+ bs_restore_args(Args, PosMap0, CtxChain, Dst, Rs0);
+ #{Start:=_} ->
+ %% Different positions, need a restore instruction.
+ PosMap = PosMap0#{Start:=Arg},
+ Rs = Rs0#{Dst=>{Start,Arg}},
+ bs_restore_args(Args, PosMap, CtxChain, Dst, Rs);
+ #{} ->
+ %% Not a match context.
+ bs_restore_args(Args, PosMap0, CtxChain, Dst, Rs0)
+ end;
+bs_restore_args([_|Args], PosMap, CtxChain, Dst, Rs) ->
+ bs_restore_args(Args, PosMap, CtxChain, Dst, Rs);
+bs_restore_args([], PosMap, _CtxChain, _Dst, Rs) ->
+ {Rs,PosMap}.
+
+%% Insert all bs_save and bs_restore instructions.
+
+bs_insert([{L,#b_blk{is=Is0}=Blk}|Bs0], Saves, Restores, Slots) ->
+ Is = bs_insert_is_1(Is0, Restores, Slots),
+ Bs = bs_insert_saves(Is, Bs0, Saves),
+ [{L,Blk#b_blk{is=Is}}|bs_insert(Bs, Saves, Restores, Slots)];
+bs_insert([], _, _, _) -> [].
+
+bs_insert_is_1([#b_set{op=Op,dst=Dst}=I0|Is], Restores, Slots) ->
+ if
+ Op =:= bs_test_tail;
+ Op =:= bs_match;
+ Op =:= call ->
+ Rs = case Restores of
+ #{Dst:=R} -> [R];
+ #{} -> []
+ end,
+ Rs ++ [I0|bs_insert_is_1(Is, Restores, Slots)];
+ Op =:= bs_start_match ->
+ NumSlots = case Slots of
+ #{Dst:=NumSlots0} -> NumSlots0;
+ #{} -> 0
+ end,
+ I = beam_ssa:add_anno(num_slots, NumSlots, I0),
+ [I|bs_insert_is_1(Is, Restores, Slots)];
+ true ->
+ [I0|bs_insert_is_1(Is, Restores, Slots)]
+ end;
+bs_insert_is_1([], _, _) -> [].
+
+bs_insert_saves([#b_set{dst=Dst}|Is], Bs, Saves) ->
+ case Saves of
+ #{Dst:=S} ->
+ bs_insert_save(S, Bs);
+ #{} ->
+ bs_insert_saves(Is, Bs, Saves)
+ end;
+bs_insert_saves([], Bs, _) -> Bs.
+
+bs_insert_save(Save, [{L,#b_blk{is=Is0}=Blk}|Bs]) ->
+ Is = case Is0 of
+ [#b_set{op=bs_extract}=Ex|Is1] ->
+ [Ex,Save|Is1];
+ _ ->
+ [Save|Is0]
+ end,
+ [{L,Blk#b_blk{is=Is}}|Bs].
+
+%% Translate bs_match instructions to bs_get, bs_match_string,
+%% or bs_skip. Also rename match context variables to use the
+%% variable assigned to by the start_match instruction.
+
+bs_instrs([{L,#b_blk{is=Is0}=Blk}|Bs], CtxChain, Acc0) ->
+ case bs_instrs_is(Is0, CtxChain, []) of
+ [#b_set{op=bs_extract,dst=Dst,args=[Ctx]}|Is] ->
+ %% Drop this instruction. Rewrite the corresponding
+ %% bs_match instruction in the previous block to
+ %% a bs_get instruction.
+ Acc = bs_combine(Dst, Ctx, Acc0),
+ bs_instrs(Bs, CtxChain, [{L,Blk#b_blk{is=Is}}|Acc]);
+ Is ->
+ bs_instrs(Bs, CtxChain, [{L,Blk#b_blk{is=Is}}|Acc0])
+ end;
+bs_instrs([], _, Acc) ->
+ reverse(Acc).
+
+bs_instrs_is([#b_set{op=Op,args=Args0}=I0|Is], CtxChain, Acc) ->
+ Args = [bs_subst_ctx(A, CtxChain) || A <- Args0],
+ I1 = I0#b_set{args=Args},
+ I = case {Op,Args} of
+ {bs_match,[#b_literal{val=skip},Ctx,Type|As]} ->
+ I1#b_set{op=bs_skip,args=[Type,Ctx|As]};
+ {bs_match,[#b_literal{val=string},Ctx|As]} ->
+ I1#b_set{op=bs_match_string,args=[Ctx|As]};
+ {_,_} ->
+ I1
+ end,
+ bs_instrs_is(Is, CtxChain, [I|Acc]);
+bs_instrs_is([], _, Acc) ->
+ reverse(Acc).
+
+%% Combine a bs_match instruction with the destination register
+%% taken from a bs_extract instruction.
+
+bs_combine(Dst, Ctx, [{L,#b_blk{is=Is0}=Blk}|Acc]) ->
+ [#b_set{}=Succeeded,
+ #b_set{op=bs_match,args=[Type,_|As]}=BsMatch|Is1] = reverse(Is0),
+ Is = reverse(Is1, [BsMatch#b_set{op=bs_get,dst=Dst,args=[Type,Ctx|As]},
+ Succeeded#b_set{args=[Dst]}]),
+ [{L,Blk#b_blk{is=Is}}|Acc].
+
+bs_subst_ctx(#b_var{}=Var, CtxChain) ->
+ case CtxChain of
+ #{Var:={context,Ctx}} ->
+ Ctx;
+ #{Var:=ParentCtx} ->
+ bs_subst_ctx(ParentCtx, CtxChain);
+ #{} ->
+ %% Not a match context variable.
+ Var
+ end;
+bs_subst_ctx(Other, _CtxChain) ->
+ Other.
+
+%% sanitize(St0) -> St.
+%% Remove constructs that can cause problems later:
+%%
+%% * Unreachable blocks may cause problems for determination of
+%% dominators.
+%%
+%% * Some instructions (such as get_hd) don't accept literal
+%% arguments. Evaluate the instructions and remove them.
+
+sanitize(#st{ssa=Blocks0,cnt=Count0}=St) ->
+ Ls = beam_ssa:rpo(Blocks0),
+ {Blocks,Count} = sanitize(Ls, Count0, Blocks0, #{}),
+ St#st{ssa=Blocks,cnt=Count}.
+
+sanitize([L|Ls], Count0, Blocks0, Values0) ->
+ #b_blk{is=Is0} = Blk0 = maps:get(L, Blocks0),
+ case sanitize_is(Is0, Count0, Values0, false, []) of
+ no_change ->
+ sanitize(Ls, Count0, Blocks0, Values0);
+ {Is,Count,Values} ->
+ Blk = Blk0#b_blk{is=Is},
+ Blocks = Blocks0#{L:=Blk},
+ sanitize(Ls, Count, Blocks, Values)
+ end;
+sanitize([], Count, Blocks0, Values) ->
+ Blocks = if
+ map_size(Values) =:= 0 ->
+ Blocks0;
+ true ->
+ beam_ssa:rename_vars(Values, [0], Blocks0)
+ end,
+
+ %% Unreachable blocks can cause problems for the dominator calculations.
+ Ls = beam_ssa:rpo(Blocks),
+ Reachable = gb_sets:from_list(Ls),
+ {case map_size(Blocks) =:= gb_sets:size(Reachable) of
+ true -> Blocks;
+ false -> remove_unreachable(Ls, Blocks, Reachable, [])
+ end,Count}.
+
+sanitize_is([#b_set{op=get_map_element,
+ args=[#b_literal{}=Map,Key]}=I0|Is],
+ Count0, Values, _Changed, Acc) ->
+ {MapVarName,Count} = new_var_name('@ssa_map', Count0),
+ MapVar = #b_var{name=MapVarName},
+ I = I0#b_set{args=[MapVar,Key]},
+ Copy = #b_set{op=copy,dst=MapVar,args=[Map]},
+ sanitize_is(Is, Count, Values, true, [I,Copy|Acc]);
+sanitize_is([#b_set{op=Op,dst=#b_var{name=Dst},args=Args0}=I0|Is0],
+ Count, Values, Changed, Acc) ->
+ Args = map(fun(#b_var{name=V}=Var) ->
+ case Values of
+ #{V:=New} -> New;
+ #{} -> Var
+ end;
+ (Lit) -> Lit
+ end, Args0),
+ case sanitize_instr(Op, Args, I0) of
+ {value,Value0} ->
+ Value = #b_literal{val=Value0},
+ sanitize_is(Is0, Count, Values#{Dst=>Value}, true, Acc);
+ {ok,I} ->
+ sanitize_is(Is0, Count, Values, true, [I|Acc]);
+ ok ->
+ sanitize_is(Is0, Count, Values, Changed, [I0|Acc])
+ end;
+sanitize_is([], Count, Values, Changed, Acc) ->
+ case Changed of
+ true ->
+ {reverse(Acc),Count,Values};
+ false ->
+ no_change
+ end.
+
+sanitize_instr({bif,Bif}, [#b_literal{val=Lit}], _I) ->
+ case erl_bifs:is_pure(erlang, Bif, 1) of
+ false ->
+ ok;
+ true ->
+ try
+ {value,erlang:Bif(Lit)}
+ catch
+ error:_ ->
+ ok
+ end
+ end;
+sanitize_instr({bif,Bif}, [#b_literal{val=Lit1},#b_literal{val=Lit2}], _I) ->
+ true = erl_bifs:is_pure(erlang, Bif, 2), %Assertion.
+ try
+ {value,erlang:Bif(Lit1, Lit2)}
+ catch
+ error:_ ->
+ ok
+ end;
+sanitize_instr(get_hd, [#b_literal{val=[Hd|_]}], _I) ->
+ {value,Hd};
+sanitize_instr(get_tl, [#b_literal{val=[_|Tl]}], _I) ->
+ {value,Tl};
+sanitize_instr(get_tuple_element, [#b_literal{val=T},
+ #b_literal{val=I}], _I)
+ when I < tuple_size(T) ->
+ {value,element(I+1, T)};
+sanitize_instr(is_nonempty_list, [#b_literal{val=Lit}], _I) ->
+ {value,case Lit of
+ [_|_] -> true;
+ _ -> false
+ end};
+sanitize_instr(is_tagged_tuple, [#b_literal{val=Tuple},
+ #b_literal{val=Arity},
+ #b_literal{val=Tag}], _I)
+ when is_integer(Arity), is_atom(Tag) ->
+ if
+ tuple_size(Tuple) =:= Arity, element(1, Tuple) =:= Tag ->
+ {value,true};
+ true ->
+ {value,false}
+ end;
+sanitize_instr(bs_init, [#b_literal{val=new},#b_literal{val=Sz}|_], I0) ->
+ if
+ is_integer(Sz), Sz >= 0 -> ok;
+ true -> {ok,sanitize_badarg(I0)}
+ end;
+sanitize_instr(bs_init, [#b_literal{val=append},_,#b_literal{val=Sz}|_], I0) ->
+ if
+ is_integer(Sz), Sz >= 0 -> ok;
+ true -> {ok,sanitize_badarg(I0)}
+ end;
+sanitize_instr(succeeded, [#b_literal{}], _I) ->
+ {value,true};
+sanitize_instr(_, _, _) -> ok.
+
+sanitize_badarg(I) ->
+ Func = #b_remote{mod=#b_literal{val=erlang},
+ name=#b_literal{val=error},arity=1},
+ I#b_set{op=call,args=[Func,#b_literal{val=badarg}]}.
+
+remove_unreachable([L|Ls], Blocks, Reachable, Acc) ->
+ #b_blk{is=Is0} = Blk0 = maps:get(L, Blocks),
+ case split_phis(Is0) of
+ {[_|_]=Phis,Rest} ->
+ Is = [prune_phi(Phi, Reachable) || Phi <- Phis] ++ Rest,
+ Blk = Blk0#b_blk{is=Is},
+ remove_unreachable(Ls, Blocks, Reachable, [{L,Blk}|Acc]);
+ {[],_} ->
+ remove_unreachable(Ls, Blocks, Reachable, [{L,Blk0}|Acc])
+ end;
+remove_unreachable([], _Blocks, _, Acc) ->
+ maps:from_list(Acc).
+
+prune_phi(#b_set{args=Args0}=Phi, Reachable) ->
+ Args = [A || {_,Pred}=A <- Args0,
+ gb_sets:is_element(Pred, Reachable)],
+ Phi#b_set{args=Args}.
+
+%%%
+%%% Fix tuples.
+%%%
+
+%% fix_tuples(St0) -> St.
+%% We must split tuple creation into two instruction to mirror the
+%% the way tuples are created in BEAM. Each put_tuple instruction is
+%% split into put_tuple_arity followed by put_tuple_elements.
+
+fix_tuples(#st{ssa=Blocks0,cnt=Count0}=St) ->
+ F = fun (#b_set{op=put_tuple,args=Args}=Put, C0) ->
+ Arity = #b_literal{val=length(Args)},
+ {VarName,C} = new_var_name('@ssa_ignore', C0),
+ Ignore = #b_var{name=VarName},
+ {[Put#b_set{op=put_tuple_arity,args=[Arity]},
+ #b_set{dst=Ignore,op=put_tuple_elements,args=Args}],C};
+ (I, C) -> {[I],C}
+ end,
+ {Blocks,Count} = beam_ssa:flatmapfold_instrs_rpo(F, [0], Count0, Blocks0),
+ St#st{ssa=Blocks,cnt=Count}.
+
+%%%
+%%% Find out where frames should be placed.
+%%%
+
+%% place_frames(St0) -> St.
+%% Return a list of the labels for the blocks that need stack frame
+%% allocation instructions.
+%%
+%% This function attempts to place stack frames as tight as possible
+%% around the code, to avoid building stack frames for code paths
+%% that don't need one.
+%%
+%% Stack frames are placed in blocks that dominate all of their
+%% descendants. That guarantees that the deallocation instructions
+%% cannot be reached from other execution paths that didn't set up
+%% a stack frame or set up a stack frame with a different size.
+
+place_frames(#st{ssa=Blocks}=St) ->
+ Doms = beam_ssa:dominators(Blocks),
+ Ls = beam_ssa:rpo(Blocks),
+ Tried = gb_sets:empty(),
+ Frames0 = [],
+ {Frames,_} = place_frames_1(Ls, Blocks, Doms, Tried, Frames0),
+ St#st{frames=Frames}.
+
+place_frames_1([L|Ls], Blocks, Doms, Tried0, Frames0) ->
+ Blk = maps:get(L, Blocks),
+ case need_frame(Blk) of
+ true ->
+ %% This block needs a frame. Try to place it here.
+ {Frames,Tried} = do_place_frame(L, Blocks, Doms, Tried0, Frames0),
+
+ %% Successfully placed. Try to place more frames in descendants
+ %% that are not dominated by this block.
+ place_frames_1(Ls, Blocks, Doms, Tried, Frames);
+ false ->
+ try
+ place_frames_1(Ls, Blocks, Doms, Tried0, Frames0)
+ catch
+ throw:{need_frame,For,Tried1}=Reason ->
+ %% An descendant block needs a stack frame. Try to
+ %% place it here.
+ case is_dominated_by(For, L, Doms) of
+ true ->
+ %% Try to place a frame here.
+ {Frames,Tried} = do_place_frame(L, Blocks, Doms,
+ Tried1, Frames0),
+ place_frames_1(Ls, Blocks, Doms, Tried, Frames);
+ false ->
+ %% Wrong place. This block does not dominate
+ %% the block that needs the frame. Pass it on
+ %% to our ancestors.
+ throw(Reason)
+ end
+ end
+ end;
+place_frames_1([], _, _, Tried, Frames) ->
+ {Frames,Tried}.
+
+%% do_place_frame(Label, Blocks, Dominators, Tried0, Frames0) -> {Frames,Tried}.
+%% Try to place a frame in this block. This function returns
+%% successfully if it either succeds at placing a frame in this
+%% block, if an ancestor that dominates this block has already placed
+%% a frame, or if we have already tried to put a frame in this block.
+%%
+%% An {need_frame,Label,Tried} exception will be thrown if this block
+%% block is not suitable for having a stack frame (i.e. it does not dominate
+%% all of its descendants). The exception means that an ancestor will have to
+%% place the frame needed by this block.
+
+do_place_frame(L, Blocks, Doms, Tried0, Frames) ->
+ case gb_sets:is_element(L, Tried0) of
+ true ->
+ %% We have already tried to put a frame in this block.
+ {Frames,Tried0};
+ false ->
+ %% Try to place a frame in this block.
+ Tried = gb_sets:insert(L, Tried0),
+ case place_frame_here(L, Blocks, Doms, Frames) of
+ yes ->
+ %% We need a frame and it is safe to place it here.
+ {[L|Frames],Tried};
+ no ->
+ %% An ancestor has a frame. Not needed.
+ {Frames,Tried};
+ ancestor ->
+ %% This block does not dominate all of its
+ %% descendants. We must place the frame in
+ %% an ancestor.
+ throw({need_frame,L,Tried})
+ end
+ end.
+
+%% place_frame_here(Label, Blocks, Doms, Frames) -> no|yes|ancestor.
+%% Determine whether a frame should be placed in block Label.
+
+place_frame_here(L, Blocks, Doms, Frames) ->
+ B0 = any(fun(DomBy) ->
+ is_dominated_by(L, DomBy, Doms)
+ end, Frames),
+ case B0 of
+ true ->
+ %% This block is dominated by an ancestor block that
+ %% defines a frame. Not needed/allowed to put a frame
+ %% here.
+ no;
+ false ->
+ %% No frame in any ancestor. We need a frame.
+ %% Now check whether the frame can be placed here.
+ %% If this block dominates all of its descendants
+ %% and the predecessors of any phi nodes it can be
+ %% placed here.
+ Descendants = beam_ssa:rpo([L], Blocks),
+ PhiPredecessors = phi_predecessors(L, Blocks),
+ MustDominate = ordsets:from_list(PhiPredecessors ++ Descendants),
+ Dominates = all(fun(?BADARG_BLOCK) ->
+ %% This block defines no variables and calls
+ %% erlang:error(badarg). It does not matter
+ %% whether L dominates ?BADARG_BLOCK or not;
+ %% it is still safe to put the frame in L.
+ true;
+ (Bl) ->
+ is_dominated_by(Bl, L, Doms)
+ end, MustDominate),
+
+ %% Also, this block must not be a loop header.
+ IsLoopHeader = is_loop_header(L, Blocks),
+ case Dominates andalso not IsLoopHeader of
+ true -> yes;
+ false -> ancestor
+ end
+ end.
+
+%% phi_predecessors(Label, Blocks) ->
+%% Return all predecessors referenced in phi nodes.
+
+phi_predecessors(L, Blocks) ->
+ #b_blk{is=Is} = maps:get(L, Blocks),
+ [P || #b_set{op=phi,args=Args} <- Is, {_,P} <- Args].
+
+%% is_dominated_by(Label, DominatedBy, Dominators) -> true|false.
+%% Test whether block Label is dominated by block DominatedBy.
+
+is_dominated_by(L, DomBy, Doms) ->
+ DominatedBy = maps:get(L, Doms),
+ ordsets:is_element(DomBy, DominatedBy).
+
+%% need_frame(#b_blk{}) -> true|false.
+%% Test whether any of the instructions in the block requires a stack frame.
+
+need_frame(#b_blk{is=Is,last=#b_ret{arg=Ret}}) ->
+ need_frame_1(Is, {return,Ret});
+need_frame(#b_blk{is=Is}) ->
+ need_frame_1(Is, body).
+
+need_frame_1([#b_set{op=make_fun,dst=#b_var{name=Fun}}|Is], {return,_}=Context) ->
+ %% Since make_fun clobbers X registers, a stack frame is needed if
+ %% any of the following instructions use any other variable than
+ %% the one holding the reference to the created fun.
+ need_frame_1(Is, Context) orelse
+ case beam_ssa:used(#b_blk{is=Is,last=#b_ret{arg=#b_var{name=Fun}}}) of
+ [Fun] -> false;
+ [_|_] -> true
+ end;
+need_frame_1([#b_set{op=new_try_tag}|_], _) ->
+ true;
+need_frame_1([#b_set{op=call,dst=Val}]=Is, {return,Ret}) ->
+ if
+ Val =:= Ret -> need_frame_1(Is, tail);
+ true -> need_frame_1(Is, body)
+ end;
+need_frame_1([#b_set{op=call,args=[Func|_]}|Is], Context) ->
+ case Func of
+ #b_remote{mod=#b_literal{val=Mod},
+ name=#b_literal{val=Name},
+ arity=Arity} ->
+ case erl_bifs:is_exit_bif(Mod, Name, Arity) of
+ true ->
+ false;
+ false ->
+ Context =:= body orelse
+ Is =/= [] orelse
+ is_trap_bif(Mod, Name, Arity)
+ end;
+ #b_remote{} ->
+ %% This is an apply(), which always needs a frame.
+ true;
+ #b_var{} ->
+ %% A fun call always needs a frame.
+ true;
+ _ ->
+ Context =:= body orelse Is =/= []
+ end;
+need_frame_1([I|Is], Context) ->
+ beam_ssa:clobbers_xregs(I) orelse need_frame_1(Is, Context);
+need_frame_1([], _) -> false.
+
+%% is_trap_bif(Mod, Name, Arity) -> true|false.
+%% Test whether we need a stack frame for this BIF.
+
+is_trap_bif(erlang, '!', 2) -> true;
+is_trap_bif(erlang, link, 1) -> true;
+is_trap_bif(erlang, unlink, 1) -> true;
+is_trap_bif(erlang, monitor_node, 2) -> true;
+is_trap_bif(erlang, group_leader, 2) -> true;
+is_trap_bif(erlang, exit, 2) -> true;
+is_trap_bif(_, _, _) -> false.
+
+%%%
+%%% Fix variables used in matching in receive.
+%%%
+%%% The loop_rec/2 instruction may return a reference to a
+%%% message outside of any heap or heap fragment. If the message
+%%% does not match, it is not allowed to store any reference to
+%%% the message (or part of the message) on the stack. If we do,
+%%% the message will be corrupted if there happens to be a GC.
+%%%
+%%% Here we make sure to introduce copies of variables that are
+%%% matched out and subsequently used after the remove_message/0
+%%% instructions. That will make sure that only X registers are
+%%% used during matching.
+%%%
+%%% Depending on where variables are defined and used, they must
+%%% be handling in two different ways.
+%%%
+%%% Variables that are always defined in the receive (before branching
+%%% out into the different clauses of the receive) and used after the
+%%% receive, must be handled in the following way: Before each
+%%% remove_message instruction, each such variable must be copied, and
+%%% all variables must be consolidated using a phi node in the
+%%% common exit block for the receive.
+%%%
+%%% Variables that are matched out and used in the same clause
+%%% need copy instructions before the remove_message instruction
+%%% in that clause.
+%%%
+
+fix_receives(#st{ssa=Blocks0,cnt=Count0}=St) ->
+ {Blocks,Count} = fix_receives_1(maps:to_list(Blocks0),
+ Blocks0, Count0),
+ St#st{ssa=Blocks,cnt=Count}.
+
+fix_receives_1([{L,Blk}|Ls], Blocks0, Count0) ->
+ case Blk of
+ #b_blk{is=[#b_set{op=peek_message}|_]} ->
+ Rm = find_rm_blocks(L, Blocks0),
+ LoopExit = find_loop_exit(Rm, Blocks0),
+ Defs0 = beam_ssa:def([L], Blocks0),
+ CommonUsed = recv_common(Defs0, LoopExit, Blocks0),
+ {Blocks1,Count1} = recv_fix_common(CommonUsed, LoopExit, Rm,
+ Blocks0, Count0),
+ Defs = ordsets:subtract(Defs0, CommonUsed),
+ {Blocks,Count} = fix_receive(Rm, Defs, Blocks1, Count1),
+ fix_receives_1(Ls, Blocks, Count);
+ #b_blk{} ->
+ fix_receives_1(Ls, Blocks0, Count0)
+ end;
+fix_receives_1([], Blocks, Count) ->
+ {Blocks,Count}.
+
+recv_common(_Defs, none, _Blocks) ->
+ %% There is no common exit block because receive is used
+ %% in the tail position of a function.
+ [];
+recv_common(Defs, Exit, Blocks) ->
+ {ExitDefs,ExitUsed} = beam_ssa:def_used([Exit], Blocks),
+ Def = ordsets:subtract(Defs, ExitDefs),
+ ordsets:intersection(Def, ExitUsed).
+
+%% recv_fix_common([CommonVar], LoopExit, [RemoveMessageLabel],
+%% Blocks0, Count0) -> {Blocks,Count}.
+%% Handle variables alwys defined in a receive and used
+%% in the exit block following the receive.
+
+recv_fix_common([Msg0|T], Exit, Rm, Blocks0, Count0) ->
+ {Msg1,Count1} = new_var_name('@recv', Count0),
+ Msg = #b_var{name=Msg1},
+ Blocks1 = beam_ssa:rename_vars(#{Msg0=>Msg}, [Exit], Blocks0),
+ N = length(Rm),
+ {MsgVars0,Count} = new_var_names(duplicate(N, '@recv'), Count1),
+ MsgVars = [#b_var{name=V} || V <- MsgVars0],
+ PhiArgs = fix_exit_phi_args(MsgVars, Rm, Exit, Blocks1),
+ Phi = #b_set{op=phi,dst=Msg,args=PhiArgs},
+ ExitBlk0 = maps:get(Exit, Blocks1),
+ ExitBlk = ExitBlk0#b_blk{is=[Phi|ExitBlk0#b_blk.is]},
+ Blocks2 = Blocks1#{Exit:=ExitBlk},
+ Blocks = recv_fix_common_1(MsgVars, Rm, Msg0, Blocks2),
+ recv_fix_common(T, Exit, Rm, Blocks, Count);
+recv_fix_common([], _, _, Blocks, Count) ->
+ {Blocks,Count}.
+
+recv_fix_common_1([V|Vs], [Rm|Rms], Msg, Blocks0) ->
+ Ren = #{Msg=>V},
+ Blocks1 = beam_ssa:rename_vars(Ren, [Rm], Blocks0),
+ #b_blk{is=Is0} = Blk0 = maps:get(Rm, Blocks1),
+ Copy = #b_set{op=copy,dst=V,args=[#b_var{name=Msg}]},
+ Is = insert_after_phis(Is0, [Copy]),
+ Blk = Blk0#b_blk{is=Is},
+ Blocks = Blocks1#{Rm:=Blk},
+ recv_fix_common_1(Vs, Rms, Msg, Blocks);
+recv_fix_common_1([], [], _Msg, Blocks) -> Blocks.
+
+fix_exit_phi_args([V|Vs], [Rm|Rms], Exit, Blocks) ->
+ Path = beam_ssa:rpo([Rm], Blocks),
+ Pred = exit_predecessor(Path, Exit),
+ [{V,Pred}|fix_exit_phi_args(Vs, Rms, Exit, Blocks)];
+fix_exit_phi_args([], [], _, _) -> [].
+
+exit_predecessor([Pred,Exit|_], Exit) ->
+ Pred;
+exit_predecessor([_|Bs], Exit) ->
+ exit_predecessor(Bs, Exit).
+
+%% fix_receive([Label], Defs, Blocks0, Count0) -> {Blocks,Count}.
+%% Add a copy instruction for all variables that are matched out and
+%% later used within a clause of the receive.
+
+fix_receive([L|Ls], Defs, Blocks0, Count0) ->
+ {RmDefs,Used0} = beam_ssa:def_used([L], Blocks0),
+ Def = ordsets:subtract(Defs, RmDefs),
+ Used = ordsets:intersection(Def, Used0),
+ {NewVs,Count} = new_var_names(Used, Count0),
+ NewVars = [#b_var{name=V} || V <- NewVs],
+ Ren = zip(Used, NewVars),
+ Blocks1 = beam_ssa:rename_vars(Ren, [L], Blocks0),
+ #b_blk{is=Is0} = Blk1 = maps:get(L, Blocks1),
+ CopyIs = [#b_set{op=copy,dst=New,args=[#b_var{name=Old}]} ||
+ {Old,New} <- Ren],
+ Is = insert_after_phis(Is0, CopyIs),
+ Blk = Blk1#b_blk{is=Is},
+ Blocks = maps:put(L, Blk, Blocks1),
+ fix_receive(Ls, Defs, Blocks, Count);
+fix_receive([], _Defs, Blocks, Count) ->
+ {Blocks,Count}.
+
+%% find_loop_exit([Label], Blocks) -> Label | none.
+%% Find the block to which control is transferred when the
+%% the receive loop is exited.
+
+find_loop_exit([L1,L2|_Ls], Blocks) ->
+ Path1 = beam_ssa:rpo([L1], Blocks),
+ Path2 = beam_ssa:rpo([L2], Blocks),
+ find_loop_exit_1(reverse(Path1), reverse(Path2), none);
+find_loop_exit(_, _) -> none.
+
+find_loop_exit_1([H|T1], [H|T2], _) ->
+ find_loop_exit_1(T1, T2, H);
+find_loop_exit_1(_, _, Exit) -> Exit.
+
+%% find_rm_blocks(StartLabel, Blocks) -> [Label].
+%% Find all blocks that start with remove_message within the receive
+%% loop whose peek_message label is StartLabel.
+
+find_rm_blocks(L, Blocks) ->
+ Seen = gb_sets:singleton(L),
+ Blk = maps:get(L, Blocks),
+ Succ = beam_ssa:successors(Blk),
+ find_rm_blocks_1(Succ, Seen, Blocks).
+
+find_rm_blocks_1([L|Ls], Seen0, Blocks) ->
+ case gb_sets:is_member(L, Seen0) of
+ true ->
+ find_rm_blocks_1(Ls, Seen0, Blocks);
+ false ->
+ Seen = gb_sets:insert(L, Seen0),
+ Blk = maps:get(L, Blocks),
+ case find_rm_act(Blk#b_blk.is) of
+ prune ->
+ %% Looping back. Don't look at any successors.
+ find_rm_blocks_1(Ls, Seen, Blocks);
+ continue ->
+ %% Neutral block. Do nothing here, but look at
+ %% all successors.
+ Succ = beam_ssa:successors(Blk),
+ find_rm_blocks_1(Succ++Ls, Seen, Blocks);
+ found ->
+ %% Found remove_message instruction.
+ [L|find_rm_blocks_1(Ls, Seen, Blocks)]
+ end
+ end;
+find_rm_blocks_1([], _, _) -> [].
+
+find_rm_act([#b_set{op=Op}|Is]) ->
+ case Op of
+ remove_message -> found;
+ peek_message -> prune;
+ recv_next -> prune;
+ wait_timeout -> prune;
+ wait -> prune;
+ _ -> find_rm_act(Is)
+ end;
+find_rm_act([]) ->
+ continue.
+
+%%%
+%%% Find out which variables need to be stored in Y registers.
+%%%
+
+-record(dk, {d :: ordsets:ordset(var_name()),
+ k :: ordsets:ordset(var_name())
+ }).
+
+%% find_yregs(St0) -> St.
+%% Find all variables that must be stored in Y registers. Annotate
+%% the blocks that allocate frames with the set of Y registers
+%% used within that stack frame.
+%%
+%% Basically, we following all execution paths starting from a block
+%% that allocates a frame, keeping track of of all defined registers
+%% and all registers killed by an instruction that clobbers X
+%% registers. For every use of a variable, we check if if it is in
+%% the set of killed variables; if it is, it must be stored in an Y
+%% register.
+
+find_yregs(#st{frames=[]}=St) ->
+ St;
+find_yregs(#st{frames=[_|_]=Frames,args=Args,ssa=Blocks0}=St) ->
+ FrameDefs = find_defs(Frames, Blocks0, [V || #b_var{name=V} <- Args]),
+ Blocks = find_yregs_1(FrameDefs, Blocks0),
+ St#st{ssa=Blocks}.
+
+find_yregs_1([{F,Defs}|Fs], Blocks0) ->
+ DK = #dk{d=Defs,k=[]},
+ D0 = #{F=>DK},
+ Ls = beam_ssa:rpo([F], Blocks0),
+ Yregs0 = [],
+ Yregs = find_yregs_2(Ls, Blocks0, D0, Yregs0),
+ Blk0 = maps:get(F, Blocks0),
+ Blk = beam_ssa:add_anno(yregs, Yregs, Blk0),
+ Blocks = Blocks0#{F:=Blk},
+ find_yregs_1(Fs, Blocks);
+find_yregs_1([], Blocks) -> Blocks.
+
+find_yregs_2([L|Ls], Blocks0, D0, Yregs0) ->
+ Blk0 = maps:get(L, Blocks0),
+ #b_blk{is=Is,last=Last} = Blk0,
+ Ys0 = maps:get(L, D0),
+ {Yregs1,Ys} = find_yregs_is(Is, Ys0, Yregs0),
+ Yregs = find_yregs_terminator(Last, Ys, Yregs1),
+ Successors = beam_ssa:successors(Blk0),
+ D = find_update_succ(Successors, Ys, D0),
+ find_yregs_2(Ls, Blocks0, D, Yregs);
+find_yregs_2([], _Blocks, _D, Yregs) -> Yregs.
+
+find_defs(Frames, Blocks, Defs) ->
+ Seen = gb_sets:empty(),
+ FramesSet = gb_sets:from_list(Frames),
+ {FrameDefs,_} = find_defs_1([0], Blocks, FramesSet, Seen, Defs, []),
+ FrameDefs.
+
+find_defs_1([L|Ls], Blocks, Frames, Seen0, Defs0, Acc0) ->
+ case gb_sets:is_member(L, Frames) of
+ true ->
+ OrderedDefs = ordsets:from_list(Defs0),
+ find_defs_1(Ls, Blocks, Frames, Seen0, Defs0,
+ [{L,OrderedDefs}|Acc0]);
+ false ->
+ case gb_sets:is_member(L, Seen0) of
+ true ->
+ find_defs_1(Ls, Blocks, Frames, Seen0, Defs0, Acc0);
+ false ->
+ Seen1 = gb_sets:insert(L, Seen0),
+ {Acc,Seen} = find_defs_1(Ls, Blocks, Frames, Seen1, Defs0, Acc0),
+ #b_blk{is=Is} = Blk = maps:get(L, Blocks),
+ Defs = find_defs_is(Is, Defs0),
+ Successors = beam_ssa:successors(Blk),
+ find_defs_1(Successors, Blocks, Frames, Seen, Defs, Acc)
+ end
+ end;
+find_defs_1([], _, _, Seen, _, Acc) ->
+ {Acc,Seen}.
+
+find_defs_is([#b_set{dst=#b_var{name=Dst}}|Is], Acc) ->
+ find_defs_is(Is, [Dst|Acc]);
+find_defs_is([], Acc) -> Acc.
+
+find_update_succ([S|Ss], #dk{d=Defs0,k=Killed0}=DK0, D0) ->
+ case D0 of
+ #{S:=#dk{d=Defs1,k=Killed1}} ->
+ Defs = ordsets:intersection(Defs0, Defs1),
+ Killed = ordsets:union(Killed0, Killed1),
+ DK = #dk{d=Defs,k=Killed},
+ D = maps:put(S, DK, D0),
+ find_update_succ(Ss, DK0, D);
+ #{} ->
+ D = maps:put(S, DK0, D0),
+ find_update_succ(Ss, DK0, D)
+ end;
+find_update_succ([], _, D) -> D.
+
+find_yregs_is([#b_set{dst=#b_var{name=Dst}}=I|Is], #dk{d=Defs0,k=Killed0}=Ys, Yregs0) ->
+ Used = beam_ssa:used(I),
+ Yregs1 = ordsets:intersection(Used, Killed0),
+ Yregs = ordsets:union(Yregs0, Yregs1),
+ case beam_ssa:clobbers_xregs(I) of
+ false ->
+ Defs = ordsets:add_element(Dst, Defs0),
+ find_yregs_is(Is, Ys#dk{d=Defs}, Yregs);
+ true ->
+ Killed = ordsets:union(Defs0, Killed0),
+ Defs = [Dst],
+ find_yregs_is(Is, Ys#dk{d=Defs,k=Killed}, Yregs)
+ end;
+find_yregs_is([], Ys, Yregs) -> {Yregs,Ys}.
+
+find_yregs_terminator(Terminator, #dk{k=Killed}, Yregs0) ->
+ Used = beam_ssa:used(Terminator),
+ Yregs = ordsets:intersection(Used, Killed),
+ ordsets:union(Yregs0, Yregs).
+
+%%%
+%%% Try to reduce the size of the stack frame, by adding an explicit
+%%% 'copy' instructions for return values from 'call' and 'make_fun' that
+%%% need to be saved in Y registers. Here is an example to show
+%%% how that's useful. First, here is the Erlang code:
+%%%
+%%% f(Pid) ->
+%%% Res = foo(42),
+%%% _ = node(Pid),
+%%% bar(),
+%%% Res.
+%%%
+%%% Compiled to SSA format, the main part of the code looks like this:
+%%%
+%%% 0:
+%%% Res = call local literal foo/1, literal 42
+%%% _1 = bif:node Pid
+%%% @ssa_bool = succeeded _1
+%%% br @ssa_bool, label 3, label 1
+%%% 3:
+%%% @ssa_ignored = call local literal bar/0
+%%% ret Res
+%%%
+%%% It can be seen that the variables Pid and Res must be saved in Y
+%%% registers in order to survive the function calls. A previous sub
+%%% pass has inserted a 'copy' instruction to save the value of the
+%%% variable Pid:
+%%%
+%%% 0:
+%%% Pid:4 = copy Pid
+%%% Res = call local literal foo/1, literal 42
+%%% _1 = bif:node Pid:4
+%%% @ssa_bool = succeeded _1
+%%% br @ssa_bool, label 3, label 1
+%%%
+%%% 3:
+%%% @ssa_ignored = call local literal bar/0
+%%% ret Res
+%%%
+%%% The Res and Pid:4 variables must be assigned to different Y registers
+%%% because they are live at the same time. copy_retval() inserts a
+%%% 'copy' instruction to copy Res to a new variable:
+%%%
+%%% 0:
+%%% Pid:4 = copy Pid
+%%% Res:6 = call local literal foo/1, literal 42
+%%% _1 = bif:node Pid:4
+%%% @ssa_bool = succeeded _1
+%%% br @ssa_bool, label 3, label 1
+%%%
+%%% 3:
+%%% Res = copy Res:6
+%%% @ssa_ignored = call local literal bar/0
+%%% ret Res
+%%%
+%%% The new variable Res:6 is used to capture the return value from the call.
+%%% The variables Pid:4 and Res are no longer live at the same time, so they
+%%% can be assigned to the same Y register.
+%%%
+
+copy_retval(#st{frames=Frames,ssa=Blocks0,cnt=Count0}=St) ->
+ {Blocks,Count} = copy_retval_1(Frames, Blocks0, Count0),
+ St#st{ssa=Blocks,cnt=Count}.
+
+copy_retval_1([F|Fs], Blocks0, Count0) ->
+ #b_blk{anno=#{yregs:=Yregs0},is=Is} = maps:get(F, Blocks0),
+ Yregs1 = gb_sets:from_list(Yregs0),
+ Yregs = collect_yregs(Is, Yregs1),
+ Ls = beam_ssa:rpo([F], Blocks0),
+ {Blocks,Count} = copy_retval_2(Ls, Yregs, none, Blocks0, Count0),
+ copy_retval_1(Fs, Blocks, Count);
+copy_retval_1([], Blocks, Count) ->
+ {Blocks,Count}.
+
+collect_yregs([#b_set{op=copy,dst=#b_var{name=Y},args=[#b_var{name=X}]}|Is],
+ Yregs0) ->
+ true = gb_sets:is_member(X, Yregs0), %Assertion.
+ Yregs = gb_sets:insert(Y, gb_sets:delete(X, Yregs0)),
+ collect_yregs(Is, Yregs);
+collect_yregs([#b_set{}|Is], Yregs) ->
+ collect_yregs(Is, Yregs);
+collect_yregs([], Yregs) -> Yregs.
+
+copy_retval_2([L|Ls], Yregs, Copy0, Blocks0, Count0) ->
+ #b_blk{is=Is0,last=Last} = Blk = maps:get(L, Blocks0),
+ RC = case {Last,Ls} of
+ {#b_br{succ=Succ,fail=?BADARG_BLOCK},[Succ|_]} ->
+ true;
+ {_,_} ->
+ false
+ end,
+ case copy_retval_is(Is0, RC, Yregs, Copy0, Count0, []) of
+ {Is,Count} ->
+ case Copy0 =:= none andalso Count0 =:= Count of
+ true ->
+ copy_retval_2(Ls, Yregs, none, Blocks0, Count0);
+ false ->
+ Blocks = Blocks0#{L=>Blk#b_blk{is=Is}},
+ copy_retval_2(Ls, Yregs, none, Blocks, Count)
+ end;
+ {Is,Count,Copy} ->
+ Blocks = Blocks0#{L=>Blk#b_blk{is=Is}},
+ copy_retval_2(Ls, Yregs, Copy, Blocks, Count)
+ end;
+copy_retval_2([], _Yregs, none, Blocks, Count) ->
+ {Blocks,Count}.
+
+copy_retval_is([#b_set{op=put_tuple_elements,args=Args0}=I0], false, _Yregs,
+ Copy, Count, Acc) ->
+ I = I0#b_set{args=copy_sub_args(Args0, Copy)},
+ {reverse(Acc, [I|acc_copy([], Copy)]),Count};
+copy_retval_is([#b_set{}]=Is, false, _Yregs, Copy, Count, Acc) ->
+ {reverse(Acc, acc_copy(Is, Copy)),Count};
+copy_retval_is([#b_set{},#b_set{op=succeeded}]=Is, false, _Yregs, Copy, Count, Acc) ->
+ {reverse(Acc, acc_copy(Is, Copy)),Count};
+copy_retval_is([#b_set{op=Op,dst=#b_var{name=RetVal}=Dst}=I0|Is], RC, Yregs,
+ Copy0, Count0, Acc0) when Op =:= call; Op =:= make_fun ->
+ {I1,Count1,Acc} = place_retval_copy(I0, Yregs, Copy0, Count0, Acc0),
+ case gb_sets:is_member(RetVal, Yregs) of
+ true ->
+ {NewVarName,Count} = new_var_name(RetVal, Count1),
+ NewVar = #b_var{name=NewVarName},
+ Copy = #b_set{op=copy,dst=Dst,args=[NewVar]},
+ I = I1#b_set{dst=NewVar},
+ copy_retval_is(Is, RC, Yregs, Copy, Count, [I|Acc]);
+ false ->
+ copy_retval_is(Is, RC, Yregs, none, Count1, [I1|Acc])
+ end;
+copy_retval_is([#b_set{args=Args0}=I0|Is], RC, Yregs, Copy, Count, Acc) ->
+ I = I0#b_set{args=copy_sub_args(Args0, Copy)},
+ case beam_ssa:clobbers_xregs(I) of
+ true ->
+ copy_retval_is(Is, RC, Yregs, none, Count, [I|acc_copy(Acc, Copy)]);
+ false ->
+ copy_retval_is(Is, RC, Yregs, Copy, Count, [I|Acc])
+ end;
+copy_retval_is([], RC, _, Copy, Count, Acc) ->
+ case {Copy,RC} of
+ {none,_} ->
+ {reverse(Acc),Count};
+ {#b_set{},true} ->
+ {reverse(Acc),Count,Copy};
+ {#b_set{},false} ->
+ {reverse(Acc, [Copy]),Count}
+ end.
+
+%%
+%% Consider this code:
+%%
+%% Var = ...
+%% ...
+%% A1 = call foo/0
+%% A = copy A1
+%% B = call bar/1, Var
+%%
+%% If the Var variable is no longer used after this code, its Y register
+%% can't be reused for A. To allow the Y register to be reused
+%% we will need to insert 'copy' instructions for arguments that are
+%% in Y registers:
+%%
+%% Var = ...
+%% ...
+%% A1 = call foo/0
+%% Var1 = copy Var
+%% A = copy A1
+%% B = call bar/1, Var1
+%%
+
+place_retval_copy(I, _Yregs, none, Count, Acc) ->
+ {I,Count,Acc};
+place_retval_copy(#b_set{args=[F|Args0]}=I, Yregs, Copy, Count0, Acc0) ->
+ #b_set{dst=#b_var{name=Avoid}} = Copy,
+ {Args,Acc1,Count} = copy_func_args(Args0, Yregs, Avoid, Acc0, [], Count0),
+ Acc = [Copy|Acc1],
+ {I#b_set{args=[F|Args]},Count,Acc}.
+
+copy_func_args([#b_var{name=V}=A|As], Yregs, Avoid, CopyAcc, Acc, Count0) ->
+ case gb_sets:is_member(V, Yregs) of
+ true when V =/= Avoid ->
+ {NewVarName,Count} = new_var_name(V, Count0),
+ NewVar = #b_var{name=NewVarName},
+ Copy = #b_set{op=copy,dst=NewVar,args=[A]},
+ copy_func_args(As, Yregs, Avoid, [Copy|CopyAcc], [NewVar|Acc], Count);
+ _ ->
+ copy_func_args(As, Yregs, Avoid, CopyAcc, [A|Acc], Count0)
+ end;
+copy_func_args([A|As], Yregs, Avoid, CopyAcc, Acc, Count) ->
+ copy_func_args(As, Yregs, Avoid, CopyAcc, [A|Acc], Count);
+copy_func_args([], _Yregs, _Avoid, CopyAcc, Acc, Count) ->
+ {reverse(Acc),CopyAcc,Count}.
+
+acc_copy(Acc, none) -> Acc;
+acc_copy(Acc, #b_set{}=Copy) -> [Copy|Acc].
+
+copy_sub_args(Args, none) ->
+ Args;
+copy_sub_args(Args, #b_set{dst=Dst,args=[Src]}) ->
+ [sub_arg(A, Dst, Src) || A <- Args].
+
+sub_arg(Old, Old, New) -> New;
+sub_arg(Old, _, _) -> Old.
+
+%%%
+%%% Consider:
+%%%
+%%% x1/Hd = get_hd x0/Cons
+%%% y0/Tl = get_tl x0/Cons
+%%%
+%%% Register x0 can't be reused for Hd. If Hd needs to be in x0,
+%%% a 'move' instruction must be inserted.
+%%%
+%%% If we swap get_hd and get_tl when Tl is in a Y register,
+%%% x0 can be used for Hd if Cons is not used again:
+%%%
+%%% y0/Tl = get_tl x0/Cons
+%%% x0/Hd = get_hd x0/Cons
+%%%
+
+opt_get_list(#st{ssa=Blocks,res=Res}=St) ->
+ ResMap = maps:from_list(Res),
+ Ls = beam_ssa:rpo(Blocks),
+ St#st{ssa=opt_get_list_1(Ls, ResMap, Blocks)}.
+
+opt_get_list_1([L|Ls], Res, Blocks0) ->
+ #b_blk{is=Is0} = Blk = maps:get(L, Blocks0),
+ case opt_get_list_is(Is0, Res, [], false) of
+ no ->
+ opt_get_list_1(Ls, Res, Blocks0);
+ {yes,Is} ->
+ Blocks = Blocks0#{L:=Blk#b_blk{is=Is}},
+ opt_get_list_1(Ls, Res, Blocks)
+ end;
+opt_get_list_1([], _, Blocks) -> Blocks.
+
+opt_get_list_is([#b_set{op=get_hd,dst=#b_var{name=Hd},
+ args=[Cons]}=GetHd,
+ #b_set{op=get_tl,dst=#b_var{name=Tl},
+ args=[Cons]}=GetTl|Is],
+ Res, Acc, Changed) ->
+ %% Note that when this pass is run, only Y registers have
+ %% reservations. The absence of an entry for a variable therefore
+ %% means that the variable will be in an X register.
+ case Res of
+ #{Hd:={y,_}} ->
+ %% Hd will be in a Y register. Don't swap.
+ opt_get_list_is([GetTl|Is], Res, [GetHd|Acc], Changed);
+ #{Tl:={y,_}} ->
+ %% Tl will be in a Y register. Swap.
+ opt_get_list_is([GetHd|Is], Res, [GetTl|Acc], true);
+ #{} ->
+ %% Both are in X registers. Nothing to do.
+ opt_get_list_is([GetTl|Is], Res, [GetHd|Acc], Changed)
+ end;
+opt_get_list_is([I|Is], Res, Acc, Changed) ->
+ opt_get_list_is(Is, Res, [I|Acc], Changed);
+opt_get_list_is([], _Res, Acc, Changed) ->
+ case Changed of
+ true ->
+ {yes,reverse(Acc)};
+ false ->
+ no
+ end.
+
+%%%
+%%% Number instructions in the order they are executed.
+%%%
+
+%% number_instructions(St0) -> St.
+%% Number instructions in the order they are executed. Use a step
+%% size of 2. Don't number phi instructions. All phi variables in
+%% a block will be live one unit before the first non-phi instruction
+%% in the block.
+
+number_instructions(#st{ssa=Blocks0}=St) ->
+ Ls = beam_ssa:rpo(Blocks0),
+ St#st{ssa=number_is_1(Ls, 1, Blocks0)}.
+
+number_is_1([L|Ls], N0, Blocks0) ->
+ #b_blk{is=Is0,last=Last0} = Bl0 = maps:get(L, Blocks0),
+ {Is,N1} = number_is_2(Is0, N0, []),
+ Last = beam_ssa:add_anno(n, N1, Last0),
+ N = N1 + 2,
+ Bl = Bl0#b_blk{is=Is,last=Last},
+ Blocks = maps:put(L, Bl, Blocks0),
+ number_is_1(Ls, N, Blocks);
+number_is_1([], _, Blocks) -> Blocks.
+
+number_is_2([#b_set{op=phi}=I|Is], N, Acc) ->
+ number_is_2(Is, N, [I|Acc]);
+number_is_2([I0|Is], N, Acc) ->
+ I = beam_ssa:add_anno(n, N, I0),
+ number_is_2(Is, N+2, [I|Acc]);
+number_is_2([], N, Acc) ->
+ {reverse(Acc),N}.
+
+%%%
+%%% Calculate live intervals.
+%%%
+
+live_intervals(#st{args=Args,ssa=Blocks}=St) ->
+ Vars0 = [{V,{0,1}} || #b_var{name=V} <- Args],
+ F = fun(L, _, A) -> live_interval_blk(L, Blocks, A) end,
+ LiveMap0 = #{},
+ Acc0 = {[],[],LiveMap0},
+ {Vars,Aliases,_} = beam_ssa:fold_po(F, Acc0, Blocks),
+ Intervals = merge_ranges(rel2fam(Vars0++Vars)),
+ St#st{intervals=Intervals,aliases=Aliases}.
+
+merge_ranges([{V,Rs}|T]) ->
+ [{V,merge_ranges_1(Rs)}|merge_ranges(T)];
+merge_ranges([]) -> [].
+
+merge_ranges_1([{A,N},{N,Z}|Rs]) ->
+ merge_ranges_1([{A,Z}|Rs]);
+merge_ranges_1([R|Rs]) ->
+ [R|merge_ranges_1(Rs)];
+merge_ranges_1([]) -> [].
+
+live_interval_blk(L, Blocks, {Vars0,Aliases0,LiveMap0}) ->
+ Live0 = [],
+ Successors = beam_ssa:successors(L, Blocks),
+ Live1 = update_successors(Successors, L, Blocks, LiveMap0, Live0),
+
+ %% Add ranges for all variables that are live in the successors.
+ #b_blk{is=Is,last=Last} = maps:get(L, Blocks),
+ End = beam_ssa:get_anno(n, Last),
+ Use = [{V,{use,End+1}} || V <- Live1],
+
+ %% Determine used and defined variables in this block.
+ FirstNumber = first_number(Is, Last),
+ {UseDef0,Aliases} = live_interval_blk_1([Last|reverse(Is)],
+ FirstNumber, Aliases0, Use),
+ UseDef = rel2fam(UseDef0),
+
+ %% Update what is live at the beginning of this block and
+ %% store it.
+ Used = [V || {V,[{use,_}|_]} <- UseDef],
+ Live2 = ordsets:union(Live1, Used),
+ Killed = [V || {V,[{def,_}|_]} <- UseDef],
+ Live = ordsets:subtract(Live2, Killed),
+ LiveMap = LiveMap0#{L=>Live},
+
+ %% Construct the ranges for this block.
+ Vars = make_block_ranges(UseDef, FirstNumber, Vars0),
+ {Vars,Aliases,LiveMap}.
+
+make_block_ranges([{V,[{def,Def}]}|Vs], First, Acc) ->
+ make_block_ranges(Vs, First, [{V,{Def,Def}}|Acc]);
+make_block_ranges([{V,[{def,Def}|Uses]}|Vs], First, Acc) ->
+ {use,Last} = last(Uses),
+ make_block_ranges(Vs, First, [{V,{Def,Last}}|Acc]);
+make_block_ranges([{V,[{use,_}|_]=Uses}|Vs], First, Acc) ->
+ {use,Last} = last(Uses),
+ make_block_ranges(Vs, First, [{V,{First,Last}}|Acc]);
+make_block_ranges([], _, Acc) -> Acc.
+
+live_interval_blk_1([#b_set{op=phi,dst=#b_var{name=Dst}}|Is],
+ FirstNumber, Aliases, Acc0) ->
+ Acc = [{Dst,{def,FirstNumber}}|Acc0],
+ live_interval_blk_1(Is, FirstNumber, Aliases, Acc);
+live_interval_blk_1([#b_set{op=bs_start_match}=I|Is], FirstNumber,
+ Aliases0, Acc0) ->
+ N = beam_ssa:get_anno(n, I),
+ #b_set{dst=#b_var{name=Dst}} = I,
+ Acc1 = [{Dst,{def,N}}|Acc0],
+ Aliases = case beam_ssa:get_anno(reuse_for_context, I) of
+ true ->
+ #b_set{args=[#b_var{name=Src}]} = I,
+ [{Dst,Src}|Aliases0];
+ false ->
+ Aliases0
+ end,
+ Acc = [{V,{use,N}} || V <- beam_ssa:used(I)] ++ Acc1,
+ live_interval_blk_1(Is, FirstNumber, Aliases, Acc);
+live_interval_blk_1([I|Is], FirstNumber, Aliases, Acc0) ->
+ N = beam_ssa:get_anno(n, I),
+ Acc1 = case I of
+ #b_set{dst=#b_var{name=Dst}} ->
+ [{Dst,{def,N}}|Acc0];
+ _ ->
+ Acc0
+ end,
+ Used = beam_ssa:used(I),
+ Acc = [{V,{use,N}} || V <- Used] ++ Acc1,
+ live_interval_blk_1(Is, FirstNumber, Aliases, Acc);
+live_interval_blk_1([], _FirstNumber, Aliases, Acc) ->
+ {Acc,Aliases}.
+
+%% first_number([#b_set{}]) -> InstructionNumber.
+%% Return the number for the first instruction for the block.
+%% Note that this number is one less than the first
+%% non-phi instruction in the block.
+
+first_number([#b_set{op=phi}|Is], Last) ->
+ first_number(Is, Last);
+first_number([I|_], _) ->
+ beam_ssa:get_anno(n, I) - 1;
+first_number([], Last) ->
+ beam_ssa:get_anno(n, Last) - 1.
+
+update_successors([L|Ls], Pred, Blocks, LiveMap, Live0) ->
+ Live1 = ordsets:union(Live0, get_live(L, LiveMap)),
+ #b_blk{is=Is} = maps:get(L, Blocks),
+ Live = update_live_phis(Is, Pred, Live1),
+ update_successors(Ls, Pred, Blocks, LiveMap, Live);
+update_successors([], _, _, _, Live) -> Live.
+
+get_live(L, LiveMap) ->
+ case LiveMap of
+ #{L:=Live} -> Live;
+ #{} -> []
+ end.
+
+update_live_phis([#b_set{op=phi,dst=#b_var{name=Killed},args=Args}|Is],
+ Pred, Live0) ->
+ Used = [V || {#b_var{name=V},L} <- Args, L =:= Pred],
+ Live1 = ordsets:union(ordsets:from_list(Used), Live0),
+ Live = ordsets:del_element(Killed, Live1),
+ update_live_phis(Is, Pred, Live);
+update_live_phis(_, _, Live) -> Live.
+
+%%%
+%%% Reserve Y registers.
+%%%
+
+%% reserve_yregs(St0) -> St.
+%% In each block that allocates a stack frame, insert instructions
+%% that copy variables that must be in Y registers (given by
+%% YRegisters) to new variables.
+%%
+%% Also allocate specific Y registers for try and catch tags.
+%% The outermost try/catch tag is placed in y0, any directly
+%% nested tag in y1, and so on. Note that this is the reversed
+%% order as required by BEAM; it will be corrected later by
+%% turn_yregs().
+
+reserve_yregs(#st{frames=Frames}=St0) ->
+ foldl(fun reserve_yregs_1/2, St0, Frames).
+
+reserve_yregs_1(L, #st{ssa=Blocks0,cnt=Count0,res=Res0}=St) ->
+ Blk = maps:get(L, Blocks0),
+ Yregs = beam_ssa:get_anno(yregs, Blk),
+ {Def,Used} = beam_ssa:def_used([L], Blocks0),
+ UsedYregs = ordsets:intersection(Yregs, Used),
+ DefBefore = ordsets:subtract(UsedYregs, Def),
+ {BeforeVars,Blocks,Count} = rename_vars(DefBefore, L, Blocks0, Count0),
+ InsideVars = ordsets:subtract(UsedYregs, DefBefore),
+ ResTryTags0 = reserve_try_tags(L, Blocks),
+ ResTryTags = [{V,{Reg,Count}} || {V,Reg} <- ResTryTags0],
+ Vars = BeforeVars ++ InsideVars,
+ Res = [{V,{y,Count}} || V <- Vars] ++ ResTryTags ++ Res0,
+ St#st{res=Res,ssa=Blocks,cnt=Count+1}.
+
+reserve_try_tags(L, Blocks) ->
+ Seen = gb_sets:empty(),
+ {Res0,_} = reserve_try_tags_1([L], Blocks, Seen, #{}),
+ Res1 = [maps:to_list(M) || {_,M} <- maps:to_list(Res0)],
+ Res = [{V,{y,Y}} || {V,Y} <- append(Res1)],
+ ordsets:from_list(Res).
+
+reserve_try_tags_1([L|Ls], Blocks, Seen0, ActMap0) ->
+ case gb_sets:is_element(L, Seen0) of
+ true ->
+ reserve_try_tags_1(Ls, Blocks, Seen0, ActMap0);
+ false ->
+ Seen1 = gb_sets:insert(L, Seen0),
+ #b_blk{is=Is} = Blk = maps:get(L, Blocks),
+ Active0 = get_active(L, ActMap0),
+ Active = reserve_try_tags_is(Is, Active0),
+ Successors = beam_ssa:successors(Blk),
+ ActMap1 = update_act_map(Successors, Active, ActMap0),
+ {ActMap,Seen} = reserve_try_tags_1(Ls, Blocks, Seen1, ActMap1),
+ reserve_try_tags_1(Successors, Blocks, Seen,ActMap)
+ end;
+reserve_try_tags_1([], _Blocks, Seen, ActMap) ->
+ {ActMap,Seen}.
+
+get_active(L, ActMap) ->
+ case ActMap of
+ #{L:=Active} -> Active;
+ #{} -> #{}
+ end.
+
+reserve_try_tags_is([#b_set{op=new_try_tag,dst=#b_var{name=V}}|Is], Active) ->
+ N = map_size(Active),
+ reserve_try_tags_is(Is, Active#{V=>N});
+reserve_try_tags_is([#b_set{op=kill_try_tag,args=[#b_var{name=Tag}]}|Is], Active) ->
+ reserve_try_tags_is(Is, maps:remove(Tag, Active));
+reserve_try_tags_is([_|Is], Active) ->
+ reserve_try_tags_is(Is, Active);
+reserve_try_tags_is([], Active) -> Active.
+
+update_act_map([L|Ls], Active0, ActMap0) ->
+ case ActMap0 of
+ #{L:=Active1} ->
+ ActMap = ActMap0#{L=>maps:merge(Active0, Active1)},
+ update_act_map(Ls, Active0, ActMap);
+ #{} ->
+ ActMap = ActMap0#{L=>Active0},
+ update_act_map(Ls, Active0, ActMap)
+ end;
+update_act_map([], _, ActMap) -> ActMap.
+
+rename_vars([], _, Blocks, Count) ->
+ {[],Blocks,Count};
+rename_vars(Vs, L, Blocks0, Count0) ->
+ {NewVs,Count} = new_var_names(Vs, Count0),
+ NewVars = [#b_var{name=V} || V <- NewVs],
+ Ren = zip(Vs, NewVars),
+ Blocks1 = beam_ssa:rename_vars(Ren, [L], Blocks0),
+ #b_blk{is=Is0} = Blk0 = maps:get(L, Blocks1),
+ CopyIs = [#b_set{op=copy,dst=New,args=[#b_var{name=Old}]} ||
+ {Old,New} <- Ren],
+ Is = insert_after_phis(Is0, CopyIs),
+ Blk = Blk0#b_blk{is=Is},
+ Blocks = maps:put(L, Blk, Blocks1),
+ {NewVs,Blocks,Count}.
+
+insert_after_phis([#b_set{op=phi}=I|Is], InsertIs) ->
+ [I|insert_after_phis(Is, InsertIs)];
+insert_after_phis(Is, InsertIs) ->
+ InsertIs ++ Is.
+
+%% frame_size(St0) -> St.
+%% Calculate the frame size for each block that allocates a frame.
+%% Annotate the block with the frame size. Also annotate all
+%% return instructions with {deallocate,FrameSize} to simplify
+%% code generation.
+
+frame_size(#st{frames=Frames,regs=Regs,ssa=Blocks0}=St) ->
+ Blocks = foldl(fun(L, Blks) ->
+ frame_size_1(L, Regs, Blks)
+ end, Blocks0, Frames),
+ St#st{ssa=Blocks}.
+
+frame_size_1(L, Regs, Blocks0) ->
+ Def = beam_ssa:def([L], Blocks0),
+ Yregs0 = [maps:get(V, Regs) || V <- Def, is_yreg(maps:get(V, Regs))],
+ Yregs = ordsets:from_list(Yregs0),
+ FrameSize = length(ordsets:from_list(Yregs)),
+ if
+ FrameSize =/= 0 ->
+ [{y,0}|_] = Yregs, %Assertion.
+ {y,Last} = last(Yregs),
+ Last = FrameSize - 1, %Assertion.
+ ok;
+ true ->
+ ok
+ end,
+ Blk0 = maps:get(L, Blocks0),
+ Blk = beam_ssa:add_anno(frame_size, FrameSize, Blk0),
+
+ %% Insert an annotation for frame deallocation on
+ %% each #b_ret{}.
+ Blocks = maps:put(L, Blk, Blocks0),
+ Reachable = beam_ssa:rpo([L], Blocks),
+ frame_deallocate(Reachable, FrameSize, Blocks).
+
+frame_deallocate([L|Ls], Size, Blocks0) ->
+ Blk0 = maps:get(L, Blocks0),
+ Blk = case Blk0 of
+ #b_blk{last=#b_ret{}=Ret0} ->
+ Ret = beam_ssa:add_anno(deallocate, Size, Ret0),
+ Blk0#b_blk{last=Ret};
+ #b_blk{} ->
+ Blk0
+ end,
+ Blocks = maps:put(L, Blk, Blocks0),
+ frame_deallocate(Ls, Size, Blocks);
+frame_deallocate([], _, Blocks) -> Blocks.
+
+
+%% turn_yregs(St0) -> St.
+%% Renumber y registers so that {y,0} becomes {y,FrameSize-1},
+%% {y,FrameSize-1} becomes {y,0} and so on. This is to make nested
+%% catches work. The register allocator (linear_scan()) has given
+%% a lower number to the outermost catch.
+
+turn_yregs(#st{frames=Frames,regs=Regs0,ssa=Blocks}=St) ->
+ Regs1 = foldl(fun(L, A) ->
+ Blk = maps:get(L, Blocks),
+ FrameSize = beam_ssa:get_anno(frame_size, Blk),
+ Def = beam_ssa:def([L], Blocks),
+ [turn_yregs_1(Def, FrameSize, Regs0)|A]
+ end, [], Frames),
+ Regs = maps:merge(Regs0, maps:from_list(append(Regs1))),
+ St#st{regs=Regs}.
+
+turn_yregs_1(Def, FrameSize, Regs) ->
+ Yregs0 = [{maps:get(V, Regs),V} || V <- Def, is_yreg(maps:get(V, Regs))],
+ Yregs1 = rel2fam(Yregs0),
+ FrameSize = length(Yregs1),
+ Yregs2 = [{{y,FrameSize-Y-1},Vs} || {{y,Y},Vs} <- Yregs1],
+ R0 = sofs:family(Yregs2),
+ R1 = sofs:family_to_relation(R0),
+ R = sofs:converse(R1),
+ sofs:to_external(R).
+
+%%%
+%%% Reserving registers before register allocation.
+%%%
+
+%% reserve_regs(St0) -> St.
+%% Reserve registers prior to register allocation. Y registers
+%% have already been reserved. This function will reserve z,
+%% fr, and specific x registers.
+
+reserve_regs(#st{args=Args,ssa=Blocks,intervals=Intervals,res=Res0}=St) ->
+ %% Reserve x0, x1, and so on for the function arguments.
+ Res1 = reserve_arg_regs(Args, 0, Res0),
+
+ %% Reserve Z registers (dummy registers) for instructions with no
+ %% return values (e.g. remove_message) or pseudo-return values
+ %% (e.g. landingpad).
+ Res2 = reserve_zregs(Blocks, Intervals, Res1),
+
+ %% Reserve float registers.
+ Res3 = reserve_fregs(Blocks, Res2),
+
+ %% Reserve all remaining unreserved variables as X registers.
+ Res = maps:from_list(Res3),
+ St#st{res=reserve_xregs(Blocks, Res)}.
+
+reserve_arg_regs([#b_var{name=Arg}|Is], N, Acc) ->
+ reserve_arg_regs(Is, N+1, [{Arg,{x,N}}|Acc]);
+reserve_arg_regs([], _, Acc) -> Acc.
+
+reserve_zregs(Blocks, Intervals, Res) ->
+ ShortLived0 = [V || {V,[{Start,End}]} <- Intervals, Start+2 =:= End],
+ ShortLived = cerl_sets:from_list(ShortLived0),
+ F = fun(_, #b_blk{is=Is,last=Last}, A) ->
+ reserve_zreg(Is, Last, ShortLived, A)
+ end,
+ beam_ssa:fold_rpo(F, [0], Res, Blocks).
+
+reserve_zreg([#b_set{op={bif,tuple_size},dst=Dst},
+ #b_set{op={bif,'=:='},args=[Dst,Val]}], _Last, ShortLived, A0) ->
+ case Val of
+ #b_literal{val=Arity} when Arity bsr 32 =:= 0 ->
+ %% These two instructions can be combined to a test_arity
+ %% instruction provided that the arity variable is short-lived.
+ reserve_zreg_1(Dst, ShortLived, A0);
+ _ ->
+ A0
+ end;
+reserve_zreg([#b_set{op={bif,tuple_size},dst=Dst}],
+ #b_switch{}, ShortLived, A) ->
+ reserve_zreg_1(Dst, ShortLived, A);
+reserve_zreg([#b_set{op=Op,dst=#b_var{name=Dst}}|Is], Last, ShortLived, A0) ->
+ IsZReg = case Op of
+ context_to_binary -> true;
+ bs_match_string -> true;
+ bs_restore -> true;
+ bs_save -> true;
+ {float,clearerror} -> true;
+ kill_try_tag -> true;
+ landingpad -> true;
+ put_tuple_elements -> true;
+ remove_message -> true;
+ set_tuple_element -> true;
+ succeeded -> true;
+ timeout -> true;
+ wait_timeout -> true;
+ _ -> false
+ end,
+ A = case IsZReg of
+ true -> [{Dst,z}|A0];
+ false -> A0
+ end,
+ reserve_zreg(Is, Last, ShortLived, A);
+reserve_zreg([], #b_br{bool=Bool}, ShortLived, A) ->
+ reserve_zreg_1(Bool, ShortLived, A);
+reserve_zreg([], _, _, A) -> A.
+
+reserve_zreg_1(#b_var{name=V}, ShortLived, A) ->
+ case cerl_sets:is_element(V, ShortLived) of
+ true -> [{V,z}|A];
+ false -> A
+ end;
+reserve_zreg_1(#b_literal{}, _, A) -> A.
+
+reserve_fregs(Blocks, Res) ->
+ F = fun(_, #b_blk{is=Is}, A) ->
+ reserve_freg(Is, A)
+ end,
+ beam_ssa:fold_rpo(F, [0], Res, Blocks).
+
+reserve_freg([#b_set{op={float,Op},dst=#b_var{name=V}}|Is], Res) ->
+ case Op of
+ get ->
+ reserve_freg(Is, Res);
+ _ ->
+ reserve_freg(Is, [{V,fr}|Res])
+ end;
+reserve_freg([_|Is], Res) ->
+ reserve_freg(Is, Res);
+reserve_freg([], Res) -> Res.
+
+%% reserve_xregs(St0) -> St.
+%% Reserve all remaining variables as X registers.
+%%
+%% If a variable will need to be in a specific X register for a
+%% 'call' or 'make_fun' (and there is nothing that will kill it
+%% between the definition and use), reserve the register using a
+%% {prefer,{x,X} annotation. That annotation means that the linear
+%% scan algorithm will place the variable in the preferred register,
+%% unless that register is already occupied.
+%%
+%% All remaining variables are reserved as X registers. Linear scan
+%% will allocate the lowest free X register for the variable.
+
+reserve_xregs(Blocks, Res) ->
+ F = fun(L, #b_blk{is=Is,last=Last}, R) ->
+ {Xs0,Used0} = reserve_terminator(L, Last, Blocks, R),
+ reserve_xregs_is(reverse(Is), R, Xs0, Used0)
+ end,
+ beam_ssa:fold_po(F, Res, Blocks).
+
+reserve_xregs_is([#b_set{op=Op,dst=#b_var{name=Dst},args=Args}=I|Is], Res0, Xs0, Used0) ->
+ Xs1 = case is_gc_safe(I) of
+ true ->
+ Xs0;
+ false ->
+ %% There may be a garbage collection after executing this
+ %% instruction. We will need prune the list of preferred
+ %% X registers.
+ res_xregs_prune(Xs0, Used0, Res0)
+ end,
+ Res = reserve_xreg(Dst, Xs1, Res0),
+ Used1 = ordsets:union(Used0, beam_ssa:used(I)),
+ Used = ordsets:del_element(Dst, Used1),
+ case Op of
+ call ->
+ Xs = reserve_call_args(tl(Args)),
+ reserve_xregs_is(Is, Res, Xs, Used);
+ make_fun ->
+ Xs = reserve_call_args(tl(Args)),
+ reserve_xregs_is(Is, Res, Xs, Used);
+ _ ->
+ reserve_xregs_is(Is, Res, Xs1, Used)
+ end;
+reserve_xregs_is([], Res, _Xs, _Used) -> Res.
+
+reserve_terminator(L, #b_br{bool=#b_literal{val=true},succ=Succ}, Blocks, Res) ->
+ case maps:get(Succ, Blocks) of
+ #b_blk{is=[],last=Last} ->
+ reserve_terminator(Succ, Last, Blocks, Res);
+ #b_blk{is=[_|_]=Is} ->
+ {res_xregs_from_phi(Is, L, Res, #{}),[]}
+ end;
+reserve_terminator(_, Last, _, _) ->
+ {#{},beam_ssa:used(Last)}.
+
+res_xregs_from_phi([#b_set{op=phi,dst=#b_var{name=Dst},args=Args}|Is],
+ Pred, Res, Acc) ->
+ case [V || {#b_var{name=V},L} <- Args, L =:= Pred] of
+ [] ->
+ res_xregs_from_phi(Is, Pred, Res, Acc);
+ [V] ->
+ case Res of
+ #{Dst:={prefer,Reg}} ->
+ res_xregs_from_phi(Is, Pred, Res, Acc#{V=>Reg});
+ #{Dst:=_} ->
+ res_xregs_from_phi(Is, Pred, Res, Acc)
+ end
+ end;
+res_xregs_from_phi(_, _, _, Acc) -> Acc.
+
+reserve_call_args(Args) ->
+ reserve_call_args(Args, 0, #{}).
+
+reserve_call_args([#b_var{name=Name}|As], X, Xs) ->
+ reserve_call_args(As, X+1, Xs#{Name=>{x,X}});
+reserve_call_args([#b_literal{}|As], X, Xs) ->
+ reserve_call_args(As, X+1, Xs);
+reserve_call_args([], _, Xs) -> Xs.
+
+reserve_xreg(V, Xs, Res) ->
+ case Res of
+ #{V:=_} ->
+ %% Already reserved.
+ Res;
+ #{} ->
+ case Xs of
+ #{V:=X} ->
+ %% Add a hint that a specific X register is
+ %% preferred, unless it is already in use.
+ Res#{V=>{prefer,X}};
+ #{} ->
+ %% Reserve as an X register in general.
+ Res#{V=>x}
+ end
+ end.
+
+is_gc_safe(#b_set{op=phi}) ->
+ false;
+is_gc_safe(#b_set{op=Op,args=Args}) ->
+ case beam_ssa_codegen:classify_heap_need(Op, Args) of
+ neutral -> true;
+ {put,_} -> true;
+ _ -> false
+ end.
+
+%% res_xregs_prune(PreferredRegs, Used, Res) -> PreferredRegs.
+%% Prune the list of preferred to only include X registers that
+%% are guaranteed to survice a garbage collection.
+
+res_xregs_prune(Xs, Used, Res) ->
+ %% The number of safe registers is the number of the X registers
+ %% used after this point. The actual number of safe registers may
+ %% be highter than this number, but this is a conservative safe
+ %% estimate.
+ NumSafe = foldl(fun(V, N) ->
+ case Res of
+ #{V:={x,_}} -> N + 1;
+ #{V:=_} -> N;
+ #{} -> N + 1
+ end
+ end, 0, Used),
+
+ %% Remove unsafe registers from the list of potential
+ %% preferred registers.
+ maps:filter(fun(_, {x,X}) -> X < NumSafe end, Xs).
+
+%%%
+%%% Remove unsuitable aliases.
+%%%
+%%% If a binary is matched more than once, we must not put the
+%%% the match context in the same register as the binary to
+%%% avoid the following situation:
+%%%
+%%% {test,bs_start_match2,{f,3},1,[{x,0},0],{x,0}}.
+%%% .
+%%% .
+%%% .
+%%% {test,bs_start_match2,{f,6},1,[{x,0},0],{x,1}}. %% ILLEGAL!
+%%%
+%%% The second instruction is illegal because a match context source
+%%% is only allowed if source and destination registers are identical.
+%%%
+
+remove_unsuitable_aliases(#st{aliases=[_|_]=Aliases0,ssa=Blocks}=St) ->
+ R = rem_unsuitable(maps:values(Blocks)),
+ Unsuitable0 = [V || {V,[_,_|_]} <- rel2fam(R)],
+ Unsuitable = gb_sets:from_list(Unsuitable0),
+ Aliases =[P || {_,V}=P <- Aliases0,
+ not gb_sets:is_member(V, Unsuitable)],
+ St#st{aliases=Aliases};
+remove_unsuitable_aliases(#st{aliases=[]}=St) -> St.
+
+rem_unsuitable([#b_blk{is=Is}|Bs]) ->
+ Vs = [{V,Dst} ||
+ #b_set{op=bs_start_match,dst=#b_var{name=Dst},
+ args=[#b_var{name=V}]} <- Is],
+ Vs ++ rem_unsuitable(Bs);
+rem_unsuitable([]) -> [].
+
+%%%
+%%% Merge intervals.
+%%%
+
+merge_intervals(#st{aliases=Aliases0,intervals=Intervals0,
+ res=Reserved}=St) ->
+ Aliases1 = [A || A <- Aliases0,
+ is_suitable_alias(A, Reserved)],
+ case Aliases1 of
+ [] ->
+ St#st{aliases=Aliases1};
+ [_|_] ->
+ Intervals1 = maps:from_list(Intervals0),
+ {Intervals,Aliases} =
+ merge_intervals_1(Aliases1, Intervals1, []),
+ St#st{aliases=Aliases,intervals=Intervals}
+ end.
+
+merge_intervals_1([{Alias,V}|Vs], Intervals0, Acc) ->
+ #{Alias:=Int1,V:=Int2} = Intervals0,
+ Int3 = lists:merge(Int1, Int2),
+ Int = merge_intervals_2(Int3),
+ Intervals1 = maps:remove(Alias, Intervals0),
+ Intervals = Intervals1#{V:=Int},
+ merge_intervals_1(Vs, Intervals, [{Alias,V}|Acc]);
+merge_intervals_1([], Intervals, Acc) ->
+ {maps:to_list(Intervals),Acc}.
+
+merge_intervals_2([{A1,B1},{A2,B2}|Is]) when A2 =< B1 ->
+ merge_intervals_2([{min(A1, A2),max(B1, B2)}|Is]);
+merge_intervals_2([{_A1,B1}=R|[{A2,_B2}|_]=Is]) when B1 < A2 ->
+ [R|merge_intervals_2(Is)];
+merge_intervals_2([_]=Is) -> Is.
+
+is_suitable_alias({V1,V2}, Reserved) ->
+ #{V1:=Res1,V2:=Res2} = Reserved,
+ case {Res1,Res2} of
+ {x,x} -> true;
+ {x,{x,_}} -> true;
+ {{x,_},x} -> true;
+ {_,_} -> false
+ end.
+
+%%%
+%%% Register allocation using linear scan.
+%%%
+
+-record(i,
+ {sort=1 :: instr_number(),
+ reg=none :: i_reg(),
+ pool=x :: pool_id(),
+ var=#b_var{} :: b_var(),
+ rs=[] :: [range()]
+ }).
+
+-record(l,
+ {cur=#i{} :: interval(),
+ unhandled_res=[] :: [interval()],
+ unhandled_any=[] :: [interval()],
+ active=[] :: [interval()],
+ inactive=[] :: [interval()],
+ free=#{} :: #{var_name()=>pool(),
+ {'next',pool_id()}:=reg_num()},
+ regs=[] :: [{b_var(),ssa_register()}]
+ }).
+
+-type interval() :: #i{}.
+-type i_reg() :: ssa_register() | {'prefer',xreg()} | 'none'.
+-type pool_id() :: 'fr' | 'x' | 'z' | instr_number().
+-type pool() :: ordsets:ordset(ssa_register()).
+
+linear_scan(#st{intervals=Intervals0,res=Res}=St0) ->
+ St = St0#st{intervals=[],res=[]},
+ Free = init_free(maps:to_list(Res)),
+ Intervals1 = [init_interval(Int, Res) || Int <- Intervals0],
+ Intervals = sort(Intervals1),
+ IsReserved = fun (#i{reg=Reg}) -> Reg =/= none end,
+ {UnhandledRes,Unhandled} = partition(IsReserved, Intervals),
+ L = #l{unhandled_res=UnhandledRes,
+ unhandled_any=Unhandled,free=Free},
+ #l{regs=Regs} = do_linear(L),
+ St#st{regs=maps:from_list(Regs)}.
+
+init_interval({V,[{Start,_}|_]=Rs}, Res) ->
+ Info = maps:get(V, Res),
+ Pool = case Info of
+ {prefer,{x,_}} -> x;
+ x -> x;
+ {x,_} -> x;
+ {y,Uniq} -> Uniq;
+ {{y,_},Uniq} -> Uniq;
+ z -> z;
+ fr -> fr
+ end,
+ Reg = case Info of
+ {prefer,{x,_}} -> Info;
+ {x,_} -> Info;
+ {{y,_}=Y,_} -> Y;
+ _ -> none
+ end,
+ #i{sort=Start,var=V,reg=Reg,pool=Pool,rs=Rs}.
+
+init_free(Res) ->
+ Free0 = rel2fam([{x,{x,0}}|init_free_1(Res)]),
+ #{x:=Xs0} = Free1 = maps:from_list(Free0),
+ Xs = init_xregs(Xs0),
+ Free = Free1#{x:=Xs},
+ Next = maps:fold(fun(K, V, A) -> [{{next,K},length(V)}|A] end, [], Free),
+ maps:merge(Free, maps:from_list(Next)).
+
+init_free_1([{_,{prefer,{x,_}=Reg}}|Res]) ->
+ [{x,Reg}|init_free_1(Res)];
+init_free_1([{_,{x,_}=Reg}|Res]) ->
+ [{x,Reg}|init_free_1(Res)];
+init_free_1([{_,{y,Uniq}}|Res]) ->
+ [{Uniq,{y,0}}|init_free_1(Res)];
+init_free_1([{_,{{y,_}=Reg,Uniq}}|Res]) ->
+ [{Uniq,Reg}|init_free_1(Res)];
+init_free_1([{_,z}|Res]) ->
+ [{z,{z,0}}|init_free_1(Res)];
+init_free_1([{_,fr}|Res]) ->
+ [{fr,{fr,0}}|init_free_1(Res)];
+init_free_1([{_,x}|Res]) ->
+ init_free_1(Res);
+init_free_1([]) -> [].
+
+%% Make sure that the pool of xregs is contiguous.
+init_xregs([{x,N},{x,M}|Is]) when N+1 =:= M ->
+ [{x,N}|init_xregs([{x,M}|Is])];
+init_xregs([{x,N}|[{x,_}|_]=Is]) ->
+ [{x,N}|init_xregs([{x,N+1}|Is])];
+init_xregs([{x,_}]=Is) -> Is.
+
+do_linear(L0) ->
+ case set_next_current(L0) of
+ done ->
+ L0;
+ L1 ->
+ L2 = expire_active(L1),
+ L3 = check_inactive(L2),
+ Available = collect_available(L3),
+ L4 = select_register(Available, L3),
+ L = make_cur_active(L4),
+ do_linear(L)
+ end.
+
+set_next_current(#l{unhandled_res=[Cur1|T1],
+ unhandled_any=[Cur2|T2]}=L) ->
+ case {Cur1,Cur2} of
+ {#i{sort=N1},#i{sort=N2}} when N1 < N2 ->
+ L#l{cur=Cur1,unhandled_res=T1};
+ {_,_} ->
+ L#l{cur=Cur2,unhandled_any=T2}
+ end;
+set_next_current(#l{unhandled_res=[],
+ unhandled_any=[Cur|T]}=L) ->
+ L#l{cur=Cur,unhandled_any=T};
+set_next_current(#l{unhandled_res=[Cur|T],
+ unhandled_any=[]}=L) ->
+ L#l{cur=Cur,unhandled_res=T};
+set_next_current(#l{unhandled_res=[],unhandled_any=[]}) ->
+ done.
+
+expire_active(#l{cur=#i{sort=CurBegin},active=Act0}=L0) ->
+ {Act,L} = expire_active(Act0, CurBegin, L0, []),
+ L#l{active=Act}.
+
+expire_active([#i{reg=Reg,rs=Rs0}=I|Is], CurBegin, L0, Acc) ->
+ {_,_} = Reg, %Assertion.
+ case overlap_status(Rs0, CurBegin) of
+ ends_before_cur ->
+ L = free_reg(I, L0),
+ expire_active(Is, CurBegin, L, Acc);
+ overlapping ->
+ expire_active(Is, CurBegin, L0, [I|Acc]);
+ not_overlapping ->
+ Rs = strip_before_current(Rs0, CurBegin),
+ L1 = free_reg(I, L0),
+ L = L1#l{inactive=[I#i{rs=Rs}|L1#l.inactive]},
+ expire_active(Is, CurBegin, L, Acc)
+ end;
+expire_active([], _CurBegin, L, Acc) ->
+ {Acc,L}.
+
+check_inactive(#l{cur=#i{sort=CurBegin},inactive=InAct0}=L0) ->
+ {InAct,L} = check_inactive(InAct0, CurBegin, L0, []),
+ L#l{inactive=InAct}.
+
+check_inactive([#i{rs=Rs0}=I|Is], CurBegin, L0, Acc) ->
+ case overlap_status(Rs0, CurBegin) of
+ ends_before_cur ->
+ check_inactive(Is, CurBegin, L0, Acc);
+ not_overlapping ->
+ check_inactive(Is, CurBegin, L0, [I|Acc]);
+ overlapping ->
+ Rs = strip_before_current(Rs0, CurBegin),
+ L1 = L0#l{active=[I#i{rs=Rs}|L0#l.active]},
+ L = reserve_reg(I, L1),
+ check_inactive(Is, CurBegin, L, Acc)
+ end;
+check_inactive([], _CurBegin, L, Acc) ->
+ {Acc,L}.
+
+strip_before_current([{_,E}|Rs], CurBegin) when E =< CurBegin ->
+ strip_before_current(Rs, CurBegin);
+strip_before_current(Rs, _CurBegin) -> Rs.
+
+collect_available(#l{cur=#i{reg={prefer,{_,_}=Prefer}}=I}=L) ->
+ %% Use the preferred register if it is available.
+ Avail = collect_available(L#l{cur=I#i{reg=none}}),
+ case member(Prefer, Avail) of
+ true -> [Prefer];
+ false -> Avail
+ end;
+collect_available(#l{cur=#i{reg={_,_}=ReservedReg}}) ->
+ %% Return the already reserved register.
+ [ReservedReg];
+collect_available(#l{unhandled_res=Unhandled,cur=Cur}=L) ->
+ Free = get_pool(Cur, L),
+
+ %% Note that since the live intervals are constructed from
+ %% SSA form, there cannot be any overlap of the current interval
+ %% with any inactive interval. See [3], page 175. Therefore we
+ %% only have check the unhandled intervals for overlap with
+ %% the current interval. As a further optimization, we only need
+ %% to check the intervals that have reserved registers.
+ collect_available(Unhandled, Cur, Free).
+
+collect_available([#i{pool=Pool1}|Is], #i{pool=Pool2}=Cur, Free)
+ when Pool1 =/= Pool2 ->
+ %% Wrong pool. Ignore this interval.
+ collect_available(Is, Cur, Free);
+collect_available([#i{reg={_,_}=Reg}=I|Is], Cur, Free0) ->
+ case overlaps(I, Cur) of
+ true ->
+ Free = ordsets:del_element(Reg, Free0),
+ collect_available(Is, Cur, Free);
+ false ->
+ collect_available(Is, Cur, Free0)
+ end;
+collect_available([], _, Free) -> Free.
+
+select_register([{_,_}=Reg|_], #l{cur=Cur0,regs=Regs}=L) ->
+ Cur = Cur0#i{reg=Reg},
+ reserve_reg(Cur, L#l{cur=Cur,regs=[{Cur#i.var,Reg}|Regs]});
+select_register([], #l{cur=Cur0,regs=Regs}=L0) ->
+ %% Allocate a new register in the pool.
+ {Reg,L1} = get_next_free(Cur0, L0),
+ Cur = Cur0#i{reg=Reg},
+ L = L1#l{cur=Cur,regs=[{Cur#i.var,Reg}|Regs]},
+ reserve_reg(Cur, L).
+
+make_cur_active(#l{cur=Cur,active=Act}=L) ->
+ L#l{active=[Cur|Act]}.
+
+overlaps(#i{rs=Rs1}, #i{rs=Rs2}) ->
+ are_overlapping(Rs1, Rs2).
+
+overlap_status([{S,E}], CurBegin) ->
+ if
+ E =< CurBegin -> ends_before_cur;
+ CurBegin < S -> not_overlapping;
+ true -> overlapping
+ end;
+overlap_status([{S,E}|Rs], CurBegin) ->
+ if
+ E =< CurBegin ->
+ overlap_status(Rs, CurBegin);
+ S =< CurBegin ->
+ overlapping;
+ true ->
+ not_overlapping
+ end.
+
+reserve_reg(#i{reg={_,_}=Reg}=I, L) ->
+ FreeRegs0 = get_pool(I, L),
+ FreeRegs = ordsets:del_element(Reg, FreeRegs0),
+ update_pool(I, FreeRegs, L).
+
+free_reg(#i{reg={_,_}=Reg}=I, L) ->
+ FreeRegs0 = get_pool(I, L),
+ FreeRegs = ordsets:add_element(Reg, FreeRegs0),
+ update_pool(I, FreeRegs, L).
+
+get_pool(#i{pool=Pool}, #l{free=Free}) ->
+ maps:get(Pool, Free).
+
+update_pool(#i{pool=Pool}, New, #l{free=Free0}=L) ->
+ Free = maps:put(Pool, New, Free0),
+ L#l{free=Free}.
+
+get_next_free(#i{pool=Pool}, #l{free=Free0}=L0) ->
+ K = {next,Pool},
+ N = maps:get(K, Free0),
+ Free = maps:put(K, N+1, Free0),
+ L = L0#l{free=Free},
+ if
+ is_integer(Pool) -> {{y,N},L};
+ is_atom(Pool) -> {{Pool,N},L}
+ end.
+
+%%%
+%%% Interval utilities.
+%%%
+
+are_overlapping([R|Rs1], Rs2) ->
+ case are_overlapping_1(R, Rs2) of
+ true ->
+ true;
+ false ->
+ are_overlapping(Rs1, Rs2)
+ end;
+are_overlapping([], _) -> false.
+
+are_overlapping_1({_S1,E1}, [{S2,_E2}|_]) when E1 < S2 ->
+ false;
+are_overlapping_1({S1,E1}=R, [{S2,E2}|Rs]) ->
+ (S2 < E1 andalso E2 > S1) orelse are_overlapping_1(R, Rs);
+are_overlapping_1({_,_}, []) -> false.
+
+%%%
+%%% Utilities.
+%%%
+
+%% is_loop_header(L, Blocks) -> false|true.
+%% Check whether the block is a loop header.
+
+is_loop_header(L, Blocks) ->
+ %% We KNOW that a loop header must start with a peek_message
+ %% instruction.
+ case maps:get(L, Blocks) of
+ #b_blk{is=[#b_set{op=peek_message}|_]} -> true;
+ _ -> false
+ end.
+
+rel2fam(S0) ->
+ S1 = sofs:relation(S0),
+ S = sofs:rel2fam(S1),
+ sofs:to_external(S).
+
+split_phis(Is) ->
+ partition(fun(#b_set{op=Op}) -> Op =:= phi end, Is).
+
+is_yreg({y,_}) -> true;
+is_yreg({x,_}) -> false;
+is_yreg({z,_}) -> false;
+is_yreg({fr,_}) -> false.
+
+new_var_names([V0|Vs0], Count0) ->
+ {V,Count1} = new_var_name(V0, Count0),
+ {Vs,Count} = new_var_names(Vs0, Count1),
+ {[V|Vs],Count};
+new_var_names([], Count) -> {[],Count}.
+
+new_var_name({Base,Int}, Count) ->
+ true = is_integer(Int), %Assertion.
+ {{Base,Count},Count+1};
+new_var_name(Base, Count) ->
+ {{Base,Count},Count+1}.
diff --git a/lib/compiler/src/beam_ssa_recv.erl b/lib/compiler/src/beam_ssa_recv.erl
new file mode 100644
index 0000000000..82fe006487
--- /dev/null
+++ b/lib/compiler/src/beam_ssa_recv.erl
@@ -0,0 +1,281 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2018. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+-module(beam_ssa_recv).
+-export([module/2]).
+
+%%%
+%%% In code such as:
+%%%
+%%% Ref = make_ref(), %Or erlang:monitor(process, Pid)
+%%% .
+%%% .
+%%% .
+%%% receive
+%%% {Ref,Reply} -> Reply
+%%% end.
+%%%
+%%% we know that none of the messages that exist in the message queue
+%%% before the call to make_ref/0 can be matched out in the receive
+%%% statement. Therefore we can avoid going through the entire message
+%%% queue if we introduce two new instructions (here written as
+%%% BIFs in pseudo-Erlang):
+%%%
+%%% recv_mark(SomeUniqInteger),
+%%% Ref = make_ref(),
+%%% .
+%%% .
+%%% .
+%%% recv_set(SomeUniqInteger),
+%%% receive
+%%% {Ref,Reply} -> Reply
+%%% end.
+%%%
+%%% The recv_mark/1 instruction will save the current position and
+%%% SomeUniqInteger in the process context. The recv_set
+%%% instruction will verify that SomeUniqInteger is still stored
+%%% in the process context. If it is, it will set the current pointer
+%%% for the message queue (the next message to be read out) to the
+%%% position that was saved by recv_mark/1.
+%%%
+%%% The remove_message instruction must be modified to invalidate
+%%% the information stored by the previous recv_mark/1, in case there
+%%% is another receive executed between the calls to recv_mark/1 and
+%%% recv_set/1.
+%%%
+%%% We use a reference to a label (i.e. a position in the loaded code)
+%%% as the SomeUniqInteger.
+%%%
+
+-include("beam_ssa.hrl").
+-import(lists, [all/2,reverse/2]).
+
+-spec module(beam_ssa:b_module(), [compile:option()]) ->
+ {'ok',beam_ssa:b_module()}.
+
+module(#b_module{body=Fs0}=Module, _Opts) ->
+ Fs = [function(F) || F <- Fs0],
+ {ok,Module#b_module{body=Fs}}.
+
+%%%
+%%% Local functions.
+%%%
+
+function(#b_function{anno=Anno,bs=Blocks0}=F) ->
+ try
+ Blocks = opt(Blocks0),
+ F#b_function{bs=Blocks}
+ catch
+ Class:Error:Stack ->
+ #{func_info:={_,Name,Arity}} = Anno,
+ io:fwrite("Function: ~w/~w\n", [Name,Arity]),
+ erlang:raise(Class, Error, Stack)
+ end.
+
+opt(Blocks) ->
+ Linear = beam_ssa:linearize(Blocks),
+ opt(Linear, Blocks, []).
+
+opt([{L,#b_blk{is=[#b_set{op=peek_message}|_]}=Blk0}|Bs], Blocks0, Preds) ->
+ %% Search for a suitable reference creating call in one of the predecessor
+ %% blocks. Whether we find such a call or not, we always clear the
+ %% the list of predecessors to ensure that any nested receive can't
+ %% search above the current receive.
+ case recv_opt(Preds, L, Blocks0) of
+ {yes,Blocks1} ->
+ Blk = beam_ssa:add_anno(recv_set, L, Blk0),
+ Blocks = maps:put(L, Blk, Blocks1),
+ opt(Bs, Blocks, []);
+ no ->
+ opt(Bs, Blocks0, [])
+ end;
+opt([{L,_}|Bs], Blocks, Preds) ->
+ opt(Bs, Blocks, [L|Preds]);
+opt([], Blocks, _) -> Blocks.
+
+recv_opt([L|Ls], RecvLbl, Blocks) ->
+ #b_blk{is=Is0} = Blk0 = maps:get(L, Blocks),
+ case recv_opt_is(Is0, RecvLbl, Blocks, []) of
+ {yes,Is} ->
+ Blk = Blk0#b_blk{is=Is},
+ {yes,maps:put(L, Blk, Blocks)};
+ no ->
+ recv_opt(Ls, RecvLbl, Blocks)
+ end;
+recv_opt([], _, _Blocks) -> no.
+
+recv_opt_is([#b_set{op=call}=I0|Is], RecvLbl, Blocks0, Acc) ->
+ case makes_ref(I0, Blocks0) of
+ no ->
+ recv_opt_is(Is, RecvLbl, Blocks0, [I0|Acc]);
+ {yes,Ref} ->
+ case opt_ref_used(RecvLbl, Ref, Blocks0) of
+ false ->
+ recv_opt_is(Is, RecvLbl, Blocks0, [I0|Acc]);
+ true ->
+ I = beam_ssa:add_anno(recv_mark, RecvLbl, I0),
+ {yes,reverse(Acc, [I|Is])}
+ end
+ end;
+recv_opt_is([I|Is], RecvLbl, Blocks, Acc) ->
+ recv_opt_is(Is, RecvLbl, Blocks, [I|Acc]);
+recv_opt_is([], _, _, _) -> no.
+
+makes_ref(#b_set{dst=#b_var{name=Dst},args=[Func0|_]}, Blocks) ->
+ Func = case Func0 of
+ #b_remote{mod=#b_literal{val=erlang},
+ name=#b_literal{val=Name},arity=A0} ->
+ {Name,A0};
+ _ ->
+ none
+ end,
+ case Func of
+ {make_ref,0} ->
+ {yes,Dst};
+ {monitor,2} ->
+ {yes,Dst};
+ {spawn_monitor,A} when A =:= 1; A =:= 3 ->
+ ref_in_tuple(Dst, Blocks);
+ _ ->
+ no
+ end.
+
+ref_in_tuple(Tuple, Blocks) ->
+ F = fun(#b_set{op=get_tuple_element,dst=#b_var{name=Ref},
+ args=[#b_var{name=Tup},#b_literal{val=1}]}, no)
+ when Tup =:= Tuple -> {yes,Ref};
+ (_, A) -> A
+ end,
+ beam_ssa:fold_instrs_rpo(F, [0], no, Blocks).
+
+opt_ref_used(RecvLbl, Ref, Blocks) ->
+ Vs = #{{var,Ref}=>ref,ref=>Ref,ref_matched=>false},
+ case opt_ref_used_1(RecvLbl, Vs, Blocks) of
+ used -> true;
+ not_used -> false;
+ done -> false
+ end.
+
+opt_ref_used_1(L, Vs0, Blocks) ->
+ #b_blk{is=Is} = Blk = maps:get(L, Blocks),
+ case opt_ref_used_is(Is, Vs0) of
+ #{}=Vs ->
+ opt_ref_used_last(Blk, Vs, Blocks);
+ Result ->
+ Result
+ end.
+
+opt_ref_used_is([#b_set{op=peek_message,dst=#b_var{name=M}}|Is], Vs0) ->
+ Vs = Vs0#{{var,M}=>message},
+ opt_ref_used_is(Is, Vs);
+opt_ref_used_is([#b_set{op={bif,Bif},args=Args,dst=#b_var{name=B}}=I|Is],
+ Vs0) ->
+ S = case Bif of
+ '=:=' -> true;
+ '==' -> true;
+ _ -> none
+ end,
+ case S of
+ none ->
+ Vs = update_vars(I, Vs0),
+ opt_ref_used_is(Is, Vs);
+ Bool when is_boolean(Bool) ->
+ case is_ref_msg_comparison(Args, Vs0) of
+ true ->
+ Vs = Vs0#{B=>{is_ref,Bool}},
+ opt_ref_used_is(Is, Vs);
+ false ->
+ opt_ref_used_is(Is, Vs0)
+ end
+ end;
+opt_ref_used_is([#b_set{op=remove_message}|_], Vs) ->
+ case Vs of
+ #{ref_matched:=true} ->
+ used;
+ #{ref_matched:=false} ->
+ not_used
+ end;
+opt_ref_used_is([#b_set{op=recv_next}|_], _Vs) ->
+ done;
+opt_ref_used_is([#b_set{op=wait_timeout}|_], _Vs) ->
+ done;
+opt_ref_used_is([#b_set{op=wait}|_], _Vs) ->
+ done;
+opt_ref_used_is([#b_set{}=I|Is], Vs0) ->
+ Vs = update_vars(I, Vs0),
+ opt_ref_used_is(Is, Vs);
+opt_ref_used_is([], Vs) -> Vs.
+
+opt_ref_used_last(#b_blk{last=Last}=Blk, Vs, Blocks) ->
+ case Last of
+ #b_br{bool=#b_var{name=Bool},succ=Succ,fail=Fail} ->
+ case Vs of
+ #{Bool:={is_ref,Matched}} ->
+ ref_used_in([{Succ,Vs#{ref_matched:=Matched}},
+ {Fail,Vs#{ref_matched:=not Matched}}],
+ Blocks);
+ #{} ->
+ ref_used_in([{Succ,Vs},{Fail,Vs}], Blocks)
+ end;
+ _ ->
+ SuccVs = [{Succ,Vs} || Succ <- beam_ssa:successors(Blk)],
+ ref_used_in(SuccVs, Blocks)
+ end.
+
+ref_used_in([{L,Vs0}|Ls], Blocks) ->
+ case opt_ref_used_1(L, Vs0, Blocks) of
+ not_used ->
+ not_used;
+ used ->
+ case ref_used_in(Ls, Blocks) of
+ done -> used;
+ Result -> Result
+ end;
+ done -> ref_used_in(Ls, Blocks)
+ end;
+ref_used_in([], _) -> done.
+
+update_vars(#b_set{args=Args,dst=#b_var{name=B}}, Vs) ->
+ Vars = [V || #b_var{name=V} <- Args],
+ All = all(fun(V) ->
+ Var = {var,V},
+ case Vs of
+ #{Var:=message} -> true;
+ #{} -> false
+ end
+ end, Vars),
+ case All of
+ true -> Vs#{{var,B}=>message};
+ false -> Vs
+ end.
+
+%% is_ref_msg_comparison(Args, Variables) -> true|false.
+%% Return 'true' if Args denotes a comparison between the
+%% reference and message or part of the message.
+
+is_ref_msg_comparison([#b_var{name=A1},#b_var{name=A2}], Vs) ->
+ V1 = {var,A1},
+ V2 = {var,A2},
+ case Vs of
+ #{V1:=ref,V2:=message} -> true;
+ #{V1:=message,V2:=ref} -> true;
+ #{} -> false
+ end;
+is_ref_msg_comparison(_, _) -> false.
diff --git a/lib/compiler/src/beam_ssa_type.erl b/lib/compiler/src/beam_ssa_type.erl
new file mode 100644
index 0000000000..e5f15da836
--- /dev/null
+++ b/lib/compiler/src/beam_ssa_type.erl
@@ -0,0 +1,1106 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2018. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+-module(beam_ssa_type).
+-export([opt/2]).
+
+-include("beam_ssa.hrl").
+-import(lists, [any/2,droplast/1,foldl/3,last/1,member/2,
+ reverse/1,search/2,sort/1]).
+
+-define(UNICODE_INT, #t_integer{elements={0,16#10FFFF}}).
+
+-record(d, {ds :: #{beam_ssa:var_name():=beam_ssa:b_set()},
+ ls :: #{beam_ssa:label():=type_db()}}).
+
+-define(ATOM_SET_SIZE, 5).
+
+%% Records that represent type information.
+-record(t_atom, {elements=any :: 'any' | [atom()]}).
+-record(t_integer, {elements=any :: 'any' | {integer(),integer()}}).
+-record(t_bs_match, {type :: type()}).
+-record(t_tuple, {size=0 :: integer(),
+ exact=false :: boolean(),
+ elements=[] :: [any()]
+ }).
+
+-type type() :: 'any' | 'none' |
+ #t_atom{} | #t_integer{} | #t_bs_match{} | #t_tuple{} |
+ {'binary',pos_integer()} | 'cons' | 'float' | 'list' | 'map' | 'nil' |'number'.
+-type type_db() :: #{beam_ssa:var_name():=type()}.
+
+-spec opt([{Label0,Block0}], Args) -> [{Label,Block}] when
+ Label0 :: beam_ssa:label(),
+ Block0 :: beam_ssa:b_blk(),
+ Args :: [beam_ssa:b_var()],
+ Label :: beam_ssa:label(),
+ Block :: beam_ssa:b_blk().
+
+opt(Linear, Args) ->
+ Ts = maps:from_list([{V,any} || #b_var{name=V} <- Args]),
+ FakeCall = #b_set{op=call,args=[#b_remote{mod=#b_literal{val=unknown},
+ name=#b_literal{val=unknown},
+ arity=0}]},
+ Defs = maps:from_list([{V,FakeCall#b_set{dst=Var}} ||
+ #b_var{name=V}=Var <- Args]),
+ D = #d{ds=Defs,ls=#{0=>Ts}},
+ opt_1(Linear, D).
+
+opt_1([{L,Blk}|Bs], #d{ls=Ls}=D) ->
+ case Ls of
+ #{L:=Ts} ->
+ opt_2(L, Blk, Bs, Ts, D);
+ #{} ->
+ %% This block is never reached. Discard it.
+ opt_1(Bs, D)
+ end;
+opt_1([], _) -> [].
+
+opt_2(L, #b_blk{is=Is0}=Blk0, Bs, Ts, D0) ->
+ case Is0 of
+ [#b_set{op=call,dst=Dst,
+ args=[#b_remote{mod=#b_literal{val=Mod},
+ name=#b_literal{val=Name}}=Rem|Args0]}=I0] ->
+ case erl_bifs:is_exit_bif(Mod, Name, length(Args0)) of
+ true ->
+ %% This call will never reach the successor block.
+ %% Rewrite the terminator to a 'ret', and remove
+ %% all type information for this label. That will
+ %% simplify the phi node in the former successor.
+ Args = [simplify_arg(Arg, Ts) || Arg <- Args0],
+ I = I0#b_set{args=[Rem|Args]},
+ Ret = #b_ret{arg=Dst},
+ Blk = Blk0#b_blk{is=[I],last=Ret},
+ Ls = maps:remove(L, D0#d.ls),
+ D = D0#d{ls=Ls},
+ [{L,Blk}|opt_1(Bs, D)];
+ false ->
+ opt_3(L, Blk0, Bs, Ts, D0)
+ end;
+ _ ->
+ opt_3(L, Blk0, Bs, Ts, D0)
+ end.
+
+opt_3(L, #b_blk{is=Is0,last=Last0}=Blk0, Bs, Ts0, #d{ds=Ds0,ls=Ls0}=D0) ->
+ {Is,Ts,Ds} = opt_is(Is0, Ts0, Ds0, Ls0, []),
+ D1 = D0#d{ds=Ds},
+ Last = opt_terminator(Last0, Ts, Ds),
+ D = update_successors(Last, Ts, D1),
+ Blk = Blk0#b_blk{is=Is,last=Last},
+ [{L,Blk}|opt_1(Bs, D)].
+
+opt_is([#b_set{op=phi,dst=#b_var{name=Dst},args=Args0}=I0|Is], Ts0, Ds0, Ls, Acc) ->
+ %% Simplify the phi node by removing all predecessor blocks that no
+ %% longer exists or no longer branches to this block.
+ Args = [P || {_,From}=P <- Args0, maps:is_key(From, Ls)],
+ I = I0#b_set{args=Args},
+ Ts = update_types(I, Ts0, Ds0),
+ Ds = Ds0#{Dst=>I},
+ opt_is(Is, Ts, Ds, Ls, [I|Acc]);
+opt_is([#b_set{dst=#b_var{name=Dst}}=I0|Is], Ts0, Ds0, Ls, Acc) ->
+ I = simplify(I0, Ts0),
+ Ts = update_types(I, Ts0, Ds0),
+ Ds = Ds0#{Dst=>I},
+ opt_is(Is, Ts, Ds, Ls, [I|Acc]);
+opt_is([], Ts, Ds, _Ls, Acc) ->
+ {reverse(Acc),Ts,Ds}.
+
+simplify(#b_set{op={bif,element},args=[#b_literal{val=Index},Tuple]}=I, Ts) ->
+ case t_tuple_size(get_type(Tuple, Ts)) of
+ {_,Size} when is_integer(Index), 1 =< Index, Index =< Size ->
+ I#b_set{op=get_tuple_element,args=[Tuple,#b_literal{val=Index-1}]};
+ _ ->
+ I
+ end;
+simplify(#b_set{op={bif,hd},args=[List]}=I, Ts) ->
+ case get_type(List, Ts) of
+ cons ->
+ I#b_set{op=get_hd};
+ _ ->
+ I
+ end;
+simplify(#b_set{op={bif,tl},args=[List]}=I, Ts) ->
+ case get_type(List, Ts) of
+ cons ->
+ I#b_set{op=get_tl};
+ _ ->
+ I
+ end;
+simplify(#b_set{op={bif,size},args=[Term]}=I, Ts) ->
+ case get_type(Term, Ts) of
+ #t_tuple{} ->
+ I#b_set{op={bif,tuple_size}};
+ _ ->
+ I
+ end;
+simplify(#b_set{op={bif,'=='},args=Args0}=I0, Ts) ->
+ Args = [simplify_arg(Arg, Ts) || Arg <- Args0],
+ I = I0#b_set{args=Args},
+ Types = get_types(Args, Ts),
+ EqEq = case {meet(Types),join(Types)} of
+ {none,any} -> true;
+ {#t_integer{},#t_integer{}} -> true;
+ {float,float} -> true;
+ {{binary,_},_} -> true;
+ {#t_atom{},_} -> true;
+ {_,_} -> false
+ end,
+ case EqEq of
+ true ->
+ I#b_set{op={bif,'=:='}};
+ false ->
+ I
+ end;
+simplify(#b_set{op={bif,Op},args=Args0}=I0, Ts) ->
+ Args = [simplify_arg(Arg, Ts) || Arg <- Args0],
+ I = I0#b_set{args=Args},
+ Types = get_types(Args, Ts),
+ case is_float_op(Op, Types) of
+ false ->
+ I;
+ true ->
+ AnnoArgs = [anno_float_arg(A) || A <- Types],
+ beam_ssa:add_anno(float_op, AnnoArgs, I)
+ end;
+simplify(#b_set{op=wait_timeout,args=[Timeout0]}=I, Ts) ->
+ case simplify_arg(Timeout0, Ts) of
+ #b_literal{val=infinity} ->
+ I#b_set{op=wait,args=[]};
+ Timeout ->
+ I#b_set{args=[Timeout]}
+ end;
+simplify(#b_set{op=Op,args=Args0}=I, Ts) ->
+ Safe = case Op of
+ call -> true;
+ put_list -> true;
+ put_tuple -> true;
+ _ -> false
+ end,
+ case Safe of
+ true ->
+ Args = [simplify_arg(Arg, Ts) || Arg <- Args0],
+ I#b_set{args=Args};
+ false ->
+ I
+ end.
+
+simplify_arg(#b_var{}=Arg, Ts) ->
+ Type = get_type(Arg, Ts),
+ case get_literal_from_type(Type) of
+ none -> Arg;
+ #b_literal{}=Lit -> Lit
+ end;
+simplify_arg(Arg, _Ts) -> Arg.
+
+is_float_op('-', [float]) ->
+ true;
+is_float_op('/', [_,_]) ->
+ true;
+is_float_op(Op, [float,_Other]) ->
+ is_float_op_1(Op);
+is_float_op(Op, [_Other,float]) ->
+ is_float_op_1(Op);
+is_float_op(_, _) -> false.
+
+is_float_op_1('+') -> true;
+is_float_op_1('-') -> true;
+is_float_op_1('*') -> true;
+is_float_op_1(_) -> false.
+
+anno_float_arg(float) -> float;
+anno_float_arg(_) -> convert.
+
+opt_terminator(#b_br{bool=#b_literal{}}=Br, _Ts, _Ds) ->
+ Br;
+opt_terminator(#b_br{bool=#b_var{name=V}=Var}=Br, Ts, Ds) ->
+ BoolType = get_type(Var, Ts),
+ case get_literal_from_type(BoolType) of
+ #b_literal{}=BoolLit ->
+ Br#b_br{bool=BoolLit};
+ none ->
+ #{V:=Set} = Ds,
+ case Set of
+ #b_set{op={bif,'=:='},args=[Bool,#b_literal{val=true}]} ->
+ case t_is_boolean(get_type(Bool, Ts)) of
+ true ->
+ %% Bool =:= true ==> Bool
+ simplify_not(Br#b_br{bool=Bool}, Ts, Ds);
+ false ->
+ Br
+ end;
+ #b_set{} ->
+ simplify_not(Br, Ts, Ds)
+ end
+ end;
+opt_terminator(#b_switch{arg=#b_literal{val=Val0}=Arg,fail=Fail,list=List},
+ _Ts, _Ds) ->
+ {value,{_,L}} = search(fun({#b_literal{val=Val1},_}) ->
+ Val1 =:= Val0
+ end, List ++ [{Arg,Fail}]),
+ #b_br{bool=#b_literal{val=true},succ=L,fail=L};
+opt_terminator(#b_switch{arg=V}=Sw0, Ts, Ds) ->
+ case get_literal_from_type(Ts) of
+ #b_literal{}=Lit ->
+ Sw = Sw0#b_switch{arg=Lit},
+ opt_terminator(Sw, Ts, Ds);
+ none ->
+ case get_type(V, Ts) of
+ #t_integer{elements={_,_}=Range} ->
+ simplify_switch_int(Sw0, Range);
+ Type ->
+ case t_is_boolean(Type) of
+ true ->
+ case simplify_switch_bool(Sw0, Ts, Ds) of
+ #b_br{}=Br ->
+ opt_terminator(Br, Ts, Ds);
+ Sw ->
+ Sw
+ end;
+ false ->
+ Sw0
+ end
+ end
+ end;
+opt_terminator(#b_ret{}=Ret, _Ts, _Ds) ->
+ Ret.
+
+update_successors(#b_br{bool=#b_literal{val=false},fail=S}, Ts, D) ->
+ update_successor(S, Ts, D);
+update_successors(#b_br{bool=#b_literal{val=true},succ=S}, Ts, D) ->
+ update_successor(S, Ts, D);
+update_successors(#b_br{bool=#b_var{name=V},succ=Succ,fail=Fail}, Ts, D0) ->
+ D = update_successor(Fail, Ts#{V:=t_atom(false)}, D0),
+ SuccTs = infer_types(V, Ts, D0),
+ update_successor(Succ, SuccTs#{V:=t_atom(true)}, D);
+update_successors(#b_switch{arg=#b_var{name=V},fail=Fail,list=List}, Ts, D0) ->
+ D = update_successor(Fail, Ts, D0),
+ foldl(fun({Val,S}, A) ->
+ T = get_type(Val, Ts),
+ update_successor(S, Ts#{V=>T}, A)
+ end, D, List);
+update_successors(#b_ret{}, _Ts, D) -> D.
+
+update_successor(S, Ts0, #d{ls=Ls}=D) ->
+ case Ls of
+ #{S:=Ts1} ->
+ Ts = join_types(Ts0, Ts1),
+ D#d{ls=Ls#{S:=Ts}};
+ #{} ->
+ D#d{ls=Ls#{S=>Ts0}}
+ end.
+
+update_types(#b_set{op=Op,dst=#b_var{name=Dst},args=Args}, Ts, Ds) ->
+ T = type(Op, Args, Ts, Ds),
+ Ts#{Dst=>T}.
+
+type(phi, Args, Ts, _Ds) ->
+ Types = [get_type(A, Ts) || {A,_} <- Args],
+ join(Types);
+type({bif,'=:='}, [Same,Same], _Ts, _Ds) ->
+ t_atom(true);
+type({bif,'=:='}, [_,_]=Args, Ts, _Ds) ->
+ case get_literals(Args, Ts) of
+ [#b_literal{val=Lit1},#b_literal{val=Lit2}] ->
+ t_atom(Lit1 =:= Lit2);
+ [_,_] ->
+ case meet(get_types(Args, Ts)) of
+ none -> t_atom(false);
+ _ -> t_boolean()
+ end
+ end;
+type({bif,tuple_size}, [Src], Ts, _Ds) ->
+ case t_tuple_size(get_type(Src, Ts)) of
+ {exact,Size} ->
+ t_integer(Size);
+ _ ->
+ t_integer()
+ end;
+type({bif,'band'}, Args, Ts, _Ds) ->
+ band_type(Args, Ts);
+type({bif,Bif}, [Src]=Args, Ts, _Ds) ->
+ case get_type(Src, Ts) of
+ any ->
+ bif_type(Bif, Args);
+ Type ->
+ case will_succeed(Bif, Type) of
+ yes ->
+ t_atom(true);
+ no ->
+ t_atom(false);
+ maybe ->
+ bif_type(Bif, Args)
+ end
+ end;
+type({bif,Bif}, Args, Ts, _Ds) ->
+ case bif_type(Bif, Args) of
+ number ->
+ arith_op_type(Args, Ts);
+ Type ->
+ Type
+ end;
+type(bs_init, [#b_literal{val=Type}|Args], _Ts, _Ds) ->
+ case {Type,Args} of
+ {new,[_,#b_literal{val=Unit}]} ->
+ {binary,Unit};
+ {append,[_,_,#b_literal{val=Unit}]} ->
+ {binary,Unit};
+ {private_append,[_,_,#b_literal{val=Unit}]} ->
+ {binary,Unit}
+ end;
+type(bs_extract, [Ctx], Ts, _Ds) ->
+ #t_bs_match{type=Type} = get_type(Ctx, Ts),
+ Type;
+type(bs_match, Args, _Ts, _Ds) ->
+ #t_bs_match{type=bs_match_type(Args)};
+type(call, [#b_remote{mod=#b_literal{val=Mod},
+ name=#b_literal{val=Name}}|Args], Ts, _Ds) ->
+ case {Mod,Name,Args} of
+ {erlang,setelement,[Pos,Tuple,_]} ->
+ case {get_type(Pos, Ts),get_type(Tuple, Ts)} of
+ {#t_integer{elements={MinIndex,_}},#t_tuple{}=T}
+ when MinIndex > 1 ->
+ %% First element is not updated. The result
+ %% will have the same type.
+ T;
+ {_,#t_tuple{}=T} ->
+ %% Position is 1 or unknown. May update the first
+ %% element of the tuple.
+ T#t_tuple{elements=[]};
+ {#t_integer{elements={MinIndex,_}},_} ->
+ #t_tuple{size=MinIndex};
+ {_,_} ->
+ #t_tuple{}
+ end;
+ {math,_,_} ->
+ case is_math_bif(Name, length(Args)) of
+ false -> any;
+ true -> float
+ end;
+ {_,_,_} ->
+ case erl_bifs:is_exit_bif(Mod, Name, length(Args)) of
+ true -> none;
+ false -> any
+ end
+ end;
+type(get_tuple_element, [Tuple,#b_literal{val=0}], Ts, _Ds) ->
+ case get_type(Tuple, Ts) of
+ #t_tuple{elements=[First]} ->
+ get_type(#b_literal{val=First}, Ts);
+ #t_tuple{} ->
+ any
+ end;
+type(is_nonempty_list, [Src], Ts, _Ds) ->
+ case get_type(Src, Ts) of
+ any ->
+ t_boolean();
+ list ->
+ t_boolean();
+ cons ->
+ t_atom(true);
+ _ ->
+ t_atom(false)
+ end;
+type(is_tagged_tuple, [Src,#b_literal{val=Size},#b_literal{val=Tag}], Ts, _Ds) ->
+ case get_type(Src, Ts) of
+ #t_tuple{exact=true,size=Size,elements=[Tag]} ->
+ t_atom(true);
+ #t_tuple{exact=true,size=ActualSize,elements=[]} ->
+ if
+ Size =/= ActualSize ->
+ t_atom(false);
+ true ->
+ t_boolean()
+ end;
+ #t_tuple{exact=false} ->
+ t_boolean();
+ any ->
+ t_boolean();
+ _ ->
+ t_atom(false)
+ end;
+type(put_list, _Args, _Ts, _Ds) ->
+ cons;
+type(put_tuple, Args, _Ts, _Ds) ->
+ case Args of
+ [#b_literal{val=First}|_] ->
+ #t_tuple{exact=true,size=length(Args),elements=[First]};
+ _ ->
+ #t_tuple{exact=true,size=length(Args)}
+ end;
+type(succeeded, [#b_var{name=Src}], Ts, Ds) ->
+ case maps:get(Src, Ds) of
+ #b_set{op={bif,Bif},args=BifArgs} ->
+ Types = get_types(BifArgs, Ts),
+ case {Bif,Types} of
+ {byte_size,[{binary,_}]} ->
+ t_atom(true);
+ {bit_size,[{binary,_}]} ->
+ t_atom(true);
+ {map_size,[map]} ->
+ t_atom(true);
+ {'not',[Type]} ->
+ case t_is_boolean(Type) of
+ true -> t_atom(true);
+ false -> t_boolean()
+ end;
+ {size,[{binary,_}]} ->
+ t_atom(true);
+ {tuple_size,[#t_tuple{}]} ->
+ t_atom(true);
+ {_,_} ->
+ t_boolean()
+ end;
+ #b_set{op=get_hd} ->
+ t_atom(true);
+ #b_set{op=get_tl} ->
+ t_atom(true);
+ #b_set{op=get_tuple_element} ->
+ t_atom(true);
+ #b_set{op=wait} ->
+ t_atom(false);
+ #b_set{} ->
+ t_boolean()
+ end;
+type(_, _, _, _) -> any.
+
+arith_op_type(Args, Ts) ->
+ Types = get_types(Args, Ts),
+ foldl(fun(#t_integer{}, unknown) -> t_integer();
+ (#t_integer{}, number) -> number;
+ (#t_integer{}, float) -> float;
+ (#t_integer{}, #t_integer{}) -> t_integer();
+ (float, unknown) -> float;
+ (float, #t_integer{}) -> float;
+ (float, number) -> float;
+ (number, unknown) -> number;
+ (number, #t_integer{}) -> number;
+ (number, float) -> float;
+ (any, _) -> number;
+ (_, any) -> number;
+ (Same, Same) -> Same;
+ (_, _) -> none
+ end, unknown, Types).
+
+%% will_succeed(TestOperation, Type) -> yes|no|maybe.
+%% Test whether TestOperation applied to an argument of type Type
+%% will succeed. Return yes, no, or maybe.
+%%
+%% Type is a type as described in the comment for verified_type/1 at
+%% the very end of this file, but it will *never* be 'any'.
+
+will_succeed(is_atom, Type) ->
+ case Type of
+ #t_atom{} -> yes;
+ _ -> no
+ end;
+will_succeed(is_binary, Type) ->
+ case Type of
+ {binary,U} when U rem 8 =:= 0 -> yes;
+ {binary,_} -> maybe;
+ _ -> no
+ end;
+will_succeed(is_bitstring, Type) ->
+ case Type of
+ {binary,_} -> yes;
+ _ -> no
+ end;
+will_succeed(is_boolean, Type) ->
+ case Type of
+ #t_atom{elements=any} ->
+ maybe;
+ #t_atom{elements=Es} ->
+ case t_is_boolean(Type) of
+ true ->
+ yes;
+ false ->
+ case any(fun is_boolean/1, Es) of
+ true -> maybe;
+ false -> no
+ end
+ end;
+ _ ->
+ no
+ end;
+will_succeed(is_float, Type) ->
+ case Type of
+ float -> yes;
+ number -> maybe;
+ _ -> no
+ end;
+will_succeed(is_integer, Type) ->
+ case Type of
+ #t_integer{} -> yes;
+ number -> maybe;
+ _ -> no
+ end;
+will_succeed(is_list, Type) ->
+ case Type of
+ list -> yes;
+ cons -> yes;
+ nil -> yes;
+ _ -> no
+ end;
+will_succeed(is_map, Type) ->
+ case Type of
+ map -> yes;
+ _ -> no
+ end;
+will_succeed(is_number, Type) ->
+ case Type of
+ float -> yes;
+ #t_integer{} -> yes;
+ number -> yes;
+ _ -> no
+ end;
+will_succeed(is_tuple, Type) ->
+ case Type of
+ #t_tuple{} -> yes;
+ _ -> no
+ end;
+will_succeed(_, _) -> maybe.
+
+
+band_type([#b_literal{val=Int},Other], Ts) when is_integer(Int) ->
+ band_type_1(Int, Other, Ts);
+band_type([Other,#b_literal{val=Int}], Ts) when is_integer(Int) ->
+ band_type_1(Int, Other, Ts);
+band_type([_,_], _) -> t_integer().
+
+band_type_1(Int, OtherSrc, Ts) ->
+ Type = band_type_2(Int, 0),
+ OtherType = get_type(OtherSrc, Ts),
+ meet(Type, OtherType).
+
+band_type_2(N, Bits) when Bits < 64 ->
+ case 1 bsl Bits of
+ P when P =:= N + 1 ->
+ t_integer(0, N);
+ P when P > N + 1 ->
+ t_integer();
+ _ ->
+ band_type_2(N, Bits+1)
+ end;
+band_type_2(_, _) ->
+ %% Negative or large positive number. Give up.
+ t_integer().
+
+bs_match_type([#b_literal{val=Type}|Args]) ->
+ bs_match_type(Type, Args).
+
+bs_match_type(binary, Args) ->
+ [_,_,_,#b_literal{val=U}] = Args,
+ {binary,U};
+bs_match_type(float, _) ->
+ float;
+bs_match_type(integer, Args) ->
+ case Args of
+ [_,
+ #b_literal{val=Flags},
+ #b_literal{val=Size},
+ #b_literal{val=Unit}] when Size * Unit < 64 ->
+ NumBits = Size * Unit,
+ case member(unsigned, Flags) of
+ true ->
+ t_integer(0, (1 bsl NumBits)-1);
+ false ->
+ %% Signed integer. Don't bother.
+ t_integer()
+ end;
+ [_|_] ->
+ t_integer()
+ end;
+bs_match_type(skip, _) ->
+ any;
+bs_match_type(string, _) ->
+ any;
+bs_match_type(utf8, _) ->
+ ?UNICODE_INT;
+bs_match_type(utf16, _) ->
+ ?UNICODE_INT;
+bs_match_type(utf32, _) ->
+ ?UNICODE_INT.
+
+simplify_switch_int(#b_switch{list=List0}=Sw, {Min,Max}) ->
+ List1 = sort(List0),
+ Vs = [V || {#b_literal{val=V},_} <- List1],
+ case eq_ranges(Vs, Min, Max) of
+ true ->
+ {_,LastL} = last(List1),
+ List = droplast(List1),
+ Sw#b_switch{fail=LastL,list=List};
+ false ->
+ Sw
+ end.
+
+eq_ranges([H], H, H) -> true;
+eq_ranges([H|T], H, Max) -> eq_ranges(T, H+1, Max);
+eq_ranges(_, _, _) -> false.
+
+simplify_switch_bool(#b_switch{arg=B,list=List0}=Sw, Ts, Ds) ->
+ List = sort(List0),
+ case List of
+ [{#b_literal{val=false},Fail},{#b_literal{val=true},Succ}] ->
+ simplify_not(#b_br{bool=B,succ=Succ,fail=Fail}, Ts, Ds);
+ [_|_] ->
+ Sw
+ end.
+
+simplify_not(#b_br{bool=#b_var{name=V},succ=Succ,fail=Fail}=Br, Ts, Ds) ->
+ case Ds of
+ #{V:=#b_set{op={bif,'not'},args=[Bool]}} ->
+ case t_is_boolean(get_type(Bool, Ts)) of
+ true ->
+ Br#b_br{bool=Bool,succ=Fail,fail=Succ};
+ false ->
+ Br
+ end;
+ #{} ->
+ Br
+ end.
+
+get_literals(Values, Ts) ->
+ [get_literal_from_type(get_type(Val, Ts)) || Val <- Values].
+
+get_types(Values, Ts) ->
+ [get_type(Val, Ts) || Val <- Values].
+
+-spec get_type(beam_ssa:value(), type_db()) -> type().
+
+get_type(#b_var{name=V}, Ts) ->
+ #{V:=T} = Ts,
+ T;
+get_type(#b_literal{val=Val}, _Ts) ->
+ if
+ is_atom(Val) ->
+ t_atom(Val);
+ is_float(Val) ->
+ float;
+ is_integer(Val) ->
+ t_integer(Val);
+ is_list(Val), Val =/= [] ->
+ cons;
+ is_map(Val) ->
+ map;
+ Val =:= {} ->
+ #t_tuple{exact=true};
+ is_tuple(Val) ->
+ #t_tuple{exact=true,size=tuple_size(Val),
+ elements=[element(1, Val)]};
+ Val =:= [] ->
+ nil;
+ true ->
+ any
+ end.
+
+infer_types(V, Ts, #d{ds=Ds}) ->
+ #{V:=#b_set{op=Op,args=Args}} = Ds,
+ Types = infer_type(Op, Args, Ds),
+ meet_types(Types, Ts).
+
+infer_type({bif,element}, [#b_literal{val=Pos},#b_var{name=Tuple}], _Ds) ->
+ if
+ is_integer(Pos), 1 =< Pos ->
+ [{Tuple,#t_tuple{size=Pos}}];
+ true ->
+ []
+ end;
+infer_type({bif,'=:='}, [#b_var{name=Src},#b_literal{}=Lit], Ds) ->
+ Def = maps:get(Src, Ds),
+ Type = get_type(Lit, #{}),
+ [{Src,Type}|infer_tuple_size(Def, Lit) ++
+ infer_first_element(Def, Lit)];
+infer_type({bif,Bif}, [#b_var{name=Src}]=Args, _Ds) ->
+ case inferred_bif_type(Bif, Args) of
+ any -> [];
+ T -> [{Src,T}]
+ end;
+infer_type({bif,is_map_key}, [_,#b_var{name=Src}], _Ds) ->
+ [{Src,map}];
+infer_type({bif,map_get}, [_,#b_var{name=Src}], _Ds) ->
+ [{Src,map}];
+infer_type(bs_start_match, [#b_var{name=Bin}], _Ds) ->
+ [{Bin,{binary,1}}];
+infer_type(is_nonempty_list, [#b_var{name=Src}], _Ds) ->
+ [{Src,cons}];
+infer_type(is_tagged_tuple, [#b_var{name=Src},#b_literal{val=Size},
+ #b_literal{val=Tag}], _Ds) ->
+ [{Src,#t_tuple{exact=true,size=Size,elements=[Tag]}}];
+infer_type(succeeded, [#b_var{name=Src}], Ds) ->
+ #b_set{op=Op,args=Args} = maps:get(Src, Ds),
+ infer_type(Op, Args, Ds);
+infer_type(_Op, _Args, _Ds) ->
+ [].
+
+%% bif_type(Name, Args) -> Type
+%% Return the return type for the guard BIF or operator Name with
+%% arguments Args.
+%%
+%% Note that that the following BIFs are handle elsewhere:
+%%
+%% band/2
+%% tuple_size/1
+
+bif_type(abs, [_]) -> number;
+bif_type(bit_size, [_]) -> t_integer();
+bif_type(byte_size, [_]) -> t_integer();
+bif_type(ceil, [_]) -> t_integer();
+bif_type(float, [_]) -> float;
+bif_type(floor, [_]) -> t_integer();
+bif_type(is_map_key, [_,_]) -> t_boolean();
+bif_type(length, [_]) -> t_integer();
+bif_type(map_size, [_]) -> t_integer();
+bif_type(round, [_]) -> t_integer();
+bif_type(size, [_]) -> t_integer();
+bif_type(trunc, [_]) -> t_integer();
+bif_type('bnot', [_]) -> t_integer();
+bif_type('bor', [_,_]) -> t_integer();
+bif_type('bsl', [_,_]) -> t_integer();
+bif_type('bsr', [_,_]) -> t_integer();
+bif_type('bxor', [_,_]) -> t_integer();
+bif_type('div', [_,_]) -> t_integer();
+bif_type('rem', [_,_]) -> t_integer();
+bif_type('/', [_,_]) -> float;
+bif_type(Name, Args) ->
+ Arity = length(Args),
+ case erl_internal:new_type_test(Name, Arity) orelse
+ erl_internal:bool_op(Name, Arity) orelse
+ erl_internal:comp_op(Name, Arity) of
+ true ->
+ t_boolean();
+ false ->
+ case erl_internal:arith_op(Name, Arity) of
+ true -> number;
+ false -> any
+ end
+ end.
+
+inferred_bif_type(is_atom, [_]) -> t_atom();
+inferred_bif_type(is_binary, [_]) -> {binary,8};
+inferred_bif_type(is_bitstring, [_]) -> {binary,1};
+inferred_bif_type(is_boolean, [_]) -> t_boolean();
+inferred_bif_type(is_float, [_]) -> float;
+inferred_bif_type(is_integer, [_]) -> t_integer();
+inferred_bif_type(is_list, [_]) -> list;
+inferred_bif_type(is_map, [_]) -> map;
+inferred_bif_type(is_number, [_]) -> number;
+inferred_bif_type(is_tuple, [_]) -> #t_tuple{};
+inferred_bif_type(abs, [_]) -> number;
+inferred_bif_type(bit_size, [_]) -> {binary,1};
+inferred_bif_type(byte_size, [_]) -> {binary,1};
+inferred_bif_type(ceil, [_]) -> number;
+inferred_bif_type(float, [_]) -> number;
+inferred_bif_type(floor, [_]) -> number;
+inferred_bif_type(round, [_]) -> number;
+inferred_bif_type(trunc, [_]) -> number;
+inferred_bif_type(tuple_size, [_]) -> #t_tuple{};
+inferred_bif_type(_, _) -> any.
+
+infer_tuple_size(#b_set{op={bif,tuple_size},args=[#b_var{name=Tuple}]},
+ #b_literal{val=Size}) when is_integer(Size) ->
+ [{Tuple,#t_tuple{exact=true,size=Size}}];
+infer_tuple_size(_, _) -> [].
+
+infer_first_element(#b_set{op=get_tuple_element,
+ args=[#b_var{name=Tuple},#b_literal{val=0}]},
+ #b_literal{val=First}) ->
+ [{Tuple,#t_tuple{size=1,elements=[First]}}];
+infer_first_element(_, _) -> [].
+
+is_math_bif(cos, 1) -> true;
+is_math_bif(cosh, 1) -> true;
+is_math_bif(sin, 1) -> true;
+is_math_bif(sinh, 1) -> true;
+is_math_bif(tan, 1) -> true;
+is_math_bif(tanh, 1) -> true;
+is_math_bif(acos, 1) -> true;
+is_math_bif(acosh, 1) -> true;
+is_math_bif(asin, 1) -> true;
+is_math_bif(asinh, 1) -> true;
+is_math_bif(atan, 1) -> true;
+is_math_bif(atanh, 1) -> true;
+is_math_bif(erf, 1) -> true;
+is_math_bif(erfc, 1) -> true;
+is_math_bif(exp, 1) -> true;
+is_math_bif(log, 1) -> true;
+is_math_bif(log2, 1) -> true;
+is_math_bif(log10, 1) -> true;
+is_math_bif(sqrt, 1) -> true;
+is_math_bif(atan2, 2) -> true;
+is_math_bif(pow, 2) -> true;
+is_math_bif(ceil, 1) -> true;
+is_math_bif(floor, 1) -> true;
+is_math_bif(fmod, 2) -> true;
+is_math_bif(pi, 0) -> true;
+is_math_bif(_, _) -> false.
+
+join_types(Ts0, Ts1) ->
+ if
+ map_size(Ts0) < map_size(Ts1) ->
+ join_types_1(maps:keys(Ts0), Ts1, Ts0);
+ true ->
+ join_types_1(maps:keys(Ts1), Ts0, Ts1)
+ end.
+
+join_types_1([V|Vs], Ts0, Ts1) ->
+ case {Ts0,Ts1} of
+ {#{V:=Same},#{V:=Same}} ->
+ join_types_1(Vs, Ts0, Ts1);
+ {#{V:=T0},#{V:=T1}} ->
+ case join(T0, T1) of
+ T1 ->
+ join_types_1(Vs, Ts0, Ts1);
+ T ->
+ join_types_1(Vs, Ts0, Ts1#{V:=T})
+ end;
+ {#{},#{V:=_}} ->
+ join_types_1(Vs, Ts0, Ts1)
+ end;
+join_types_1([], Ts0, Ts1) ->
+ maps:merge(Ts0, Ts1).
+
+join([T1,T2|Ts]) ->
+ join([join(T1, T2)|Ts]);
+join([T]) -> T.
+
+get_literal_from_type(#t_atom{elements=[Atom]}) ->
+ #b_literal{val=Atom};
+get_literal_from_type(#t_integer{elements={Int,Int}}) ->
+ #b_literal{val=Int};
+get_literal_from_type(nil) ->
+ #b_literal{val=[]};
+get_literal_from_type(_) -> none.
+
+t_atom() ->
+ #t_atom{elements=any}.
+
+t_atom(Atom) when is_atom(Atom) ->
+ #t_atom{elements=[Atom]}.
+
+t_boolean() ->
+ #t_atom{elements=[false,true]}.
+
+t_integer() ->
+ #t_integer{elements=any}.
+
+t_integer(Int) when is_integer(Int) ->
+ #t_integer{elements={Int,Int}}.
+
+t_integer(Min, Max) when is_integer(Min), is_integer(Max) ->
+ #t_integer{elements={Min,Max}}.
+
+t_is_boolean(#t_atom{elements=[F,T]}) ->
+ F =:= false andalso T =:= true;
+t_is_boolean(#t_atom{elements=[B]}) ->
+ is_boolean(B);
+t_is_boolean(_) -> false.
+
+t_tuple_size(#t_tuple{size=Size,exact=false}) ->
+ {at_least,Size};
+t_tuple_size(#t_tuple{size=Size,exact=true}) ->
+ {exact,Size};
+t_tuple_size(_) ->
+ none.
+
+%% join(Type1, Type2) -> Type
+%% Return the "join" of Type1 and Type2. The join is a more general
+%% type than Type1 and Type2. For example:
+%%
+%% join(#t_integer{elements=any}, #t_integer=elements={0,3}}) ->
+%% #t_integer{}
+%%
+%% The join for two different types result in 'any', which is
+%% the top element for our type lattice:
+%%
+%% join(#t_integer{}, map) -> any
+
+-spec join(type(), type()) -> type().
+
+join(T, T) ->
+ verified_type(T);
+join(none, T) ->
+ verified_type(T);
+join(T, none) ->
+ verified_type(T);
+join(any, _) -> any;
+join(_, any) -> any;
+join(#t_atom{elements=[_|_]=Set1}, #t_atom{elements=[_|_]=Set2}) ->
+ Set = ordsets:union(Set1, Set2),
+ case ordsets:size(Set) of
+ Size when Size =< ?ATOM_SET_SIZE ->
+ #t_atom{elements=Set};
+ _Size ->
+ #t_atom{elements=any}
+ end;
+join(#t_atom{elements=any}=T, #t_atom{elements=[_|_]}) -> T;
+join(#t_atom{elements=[_|_]}, #t_atom{elements=any}=T) -> T;
+join({binary,U1}, {binary,U2}) ->
+ {binary,gcd(U1, U2)};
+join(#t_integer{}, #t_integer{}) -> t_integer();
+join(list, cons) -> list;
+join(cons, list) -> list;
+join(nil, cons) -> list;
+join(cons, nil) -> list;
+join(nil, list) -> list;
+join(list, nil) -> list;
+join(#t_integer{}, float) -> number;
+join(float, #t_integer{}) -> number;
+join(#t_integer{}, number) -> number;
+join(number, #t_integer{}) -> number;
+join(float, number) -> number;
+join(number, float) -> number;
+join(#t_tuple{size=Sz,exact=Exact1}, #t_tuple{size=Sz,exact=Exact2}) ->
+ Exact = Exact1 and Exact2,
+ #t_tuple{size=Sz,exact=Exact};
+join(#t_tuple{size=Sz1}, #t_tuple{size=Sz2}) ->
+ #t_tuple{size=min(Sz1, Sz2)};
+join(_T1, _T2) ->
+ %%io:format("~p ~p\n", [_T1,_T2]),
+ any.
+
+gcd(A, B) ->
+ case A rem B of
+ 0 -> B;
+ X -> gcd(B, X)
+ end.
+
+meet_types([{V,T0}|Vs], Ts) ->
+ #{V:=T1} = Ts,
+ T = meet(T0, T1),
+ meet_types(Vs, Ts#{V:=T});
+meet_types([], Ts) -> Ts.
+
+meet([T1,T2|Ts]) ->
+ meet([meet(T1, T2)|Ts]);
+meet([T]) -> T.
+
+%% meet(Type1, Type2) -> Type
+%% Return the "meet" of Type1 and Type2. The meet is a narrower
+%% type than Type1 and Type2. For example:
+%%
+%% meet(#t_integer{elements=any}, #t_integer{elements={0,3}}) ->
+%% #t_integer{elements={0,3}}
+%%
+%% The meet for two different types result in 'none', which is
+%% the bottom element for our type lattice:
+%%
+%% meet(#t_integer{}, map) -> none
+
+-spec meet(type(), type()) -> type().
+
+meet(T, T) ->
+ verified_type(T);
+meet(#t_atom{elements=[_|_]=Set1}, #t_atom{elements=[_|_]=Set2}) ->
+ case ordsets:intersection(Set1, Set2) of
+ [] ->
+ none;
+ [_|_]=Set ->
+ #t_atom{elements=Set}
+ end;
+meet(#t_atom{elements=[_|_]}=T, #t_atom{elements=any}) ->
+ T;
+meet(#t_atom{elements=any}, #t_atom{elements=[_|_]}=T) ->
+ T;
+meet(#t_integer{elements={_,_}}=T, #t_integer{elements=any}) ->
+ T;
+meet(#t_integer{elements=any}, #t_integer{elements={_,_}}=T) ->
+ T;
+meet(#t_integer{elements={Min1,Max1}},
+ #t_integer{elements={Min2,Max2}}) ->
+ #t_integer{elements={max(Min1, Min2),min(Max1, Max2)}};
+meet(#t_integer{}=T, number) -> T;
+meet(float, number) -> float;
+meet(#t_integer{}=T, number) -> T;
+meet(float, number) -> float;
+meet(number, #t_integer{}=T) -> T;
+meet(#t_integer{}=T, number) -> T;
+meet(number, float=T) -> T;
+meet(float=T, number) -> T;
+meet(list, cons) -> cons;
+meet(list, nil) -> nil;
+meet(cons, list) -> cons;
+meet(nil, list) -> nil;
+meet(#t_tuple{}=T1, #t_tuple{}=T2) ->
+ meet_tuples(T1, T2);
+meet({binary,U1}, {binary,U2}) ->
+ {binary,max(U1, U2)};
+meet(any, T) ->
+ verified_type(T);
+meet(T, any) ->
+ verified_type(T);
+meet(_, _) ->
+ %% Inconsistent types. There will be an exception at runtime.
+ none.
+
+meet_tuples(#t_tuple{elements=[E1]}, #t_tuple{elements=[E2]})
+ when E1 =/= E2 ->
+ none;
+meet_tuples(#t_tuple{size=Sz1,exact=true},
+ #t_tuple{size=Sz2,exact=true}) when Sz1 =/= Sz2 ->
+ none;
+meet_tuples(#t_tuple{size=Sz1,exact=Ex1,elements=Es1},
+ #t_tuple{size=Sz2,exact=Ex2,elements=Es2}) ->
+ Size = max(Sz1, Sz2),
+ Exact = Ex1 or Ex2,
+ Es = case {Es1,Es2} of
+ {[],[_|_]} -> Es2;
+ {[_|_],[]} -> Es1;
+ {_,_} -> Es1
+ end,
+ #t_tuple{size=Size,exact=Exact,elements=Es}.
+
+%% verified_type(Type) -> Type
+%% Returns the passed in type if it is one of the defined types.
+%% Crashes if there is anything wrong with the type.
+%%
+%% Here are all possible types:
+%%
+%% any Any Erlang term (top element for the type lattice).
+%%
+%% #t_atom{} Any atom or some specific atoms.
+%% {binary,Unit} Binary/bitstring aligned to unit Unit.
+%% float Floating point number.
+%% #t_integer{} Integer
+%% list Empty or nonempty list.
+%% map Map.
+%% nil Empty list.
+%% cons Cons (nonempty list).
+%% number A number (float or integer).
+%% #t_tuple{} Tuple.
+%%
+%% none No type (bottom element for the type lattice).
+
+-spec verified_type(T) -> T when
+ T :: type().
+
+verified_type(any=T) -> T;
+verified_type(none=T) -> T;
+verified_type(#t_atom{elements=any}=T) -> T;
+verified_type(#t_atom{elements=[_|_]}=T) -> T;
+verified_type({binary,U}=T) when is_integer(U) -> T;
+verified_type(#t_integer{elements=any}=T) -> T;
+verified_type(#t_integer{elements={Min,Max}}=T)
+ when is_integer(Min), is_integer(Max) -> T;
+verified_type(list=T) -> T;
+verified_type(map=T) -> T;
+verified_type(nil=T) -> T;
+verified_type(cons=T) -> T;
+verified_type(number=T) -> T;
+verified_type(#t_tuple{}=T) -> T;
+verified_type(float=T) -> T.
diff --git a/lib/compiler/src/beam_utils.erl b/lib/compiler/src/beam_utils.erl
index af1e719206..686d314c2d 100644
--- a/lib/compiler/src/beam_utils.erl
+++ b/lib/compiler/src/beam_utils.erl
@@ -193,8 +193,7 @@ bif_to_test('=:=', [C,A], Fail) when ?is_const(C) ->
bif_to_test('=:=', [_,_]=Ops, Fail) -> {test,is_eq_exact,Fail,Ops};
bif_to_test('=/=', [C,A], Fail) when ?is_const(C) ->
{test,is_ne_exact,Fail,[A,C]};
-bif_to_test('=/=', [_,_]=Ops, Fail) -> {test,is_ne_exact,Fail,Ops};
-bif_to_test(is_record, [_,_,_]=Ops, Fail) -> {test,is_record,Fail,Ops}.
+bif_to_test('=/=', [_,_]=Ops, Fail) -> {test,is_ne_exact,Fail,Ops}.
%% is_pure_test({test,Op,Fail,Ops}) -> true|false.
diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl
index 5fdea23a26..3a835cfb2f 100644
--- a/lib/compiler/src/compile.erl
+++ b/lib/compiler/src/compile.erl
@@ -31,6 +31,9 @@
%% Erlc interface.
-export([compile/3,compile_beam/3,compile_asm/3,compile_core/3]).
+%% Utility functions for compiler passes.
+-export([run_sub_passes/2]).
+
-export_type([option/0]).
-include("erl_compile.hrl").
@@ -39,6 +42,8 @@
-import(lists, [member/2,reverse/1,reverse/2,keyfind/3,last/1,
map/2,flatmap/2,foreach/2,foldr/3,any/2]).
+-define(SUB_PASS_TIMES, compile__sub_pass_times).
+
%%----------------------------------------------------------------------
-type abstract_code() :: [erl_parse:abstract_form()].
@@ -64,6 +69,7 @@
-type err_ret() :: 'error' | {'error', errors(), warnings()}.
-type comp_ret() :: mod_ret() | bin_ret() | err_ret().
+
%%----------------------------------------------------------------------
%%
@@ -143,6 +149,30 @@ noenv_output_generated(Opts) ->
env_compiler_options() -> env_default_opts().
+
+%%%
+%%% Run sub passes from a compiler pass.
+%%%
+
+-spec run_sub_passes([term()], term()) -> term().
+
+run_sub_passes(Ps, St) ->
+ case get(?SUB_PASS_TIMES) of
+ undefined ->
+ Runner = fun(_Name, Run, S) -> Run(S) end,
+ run_sub_passes_1(Ps, Runner, St);
+ Times when is_list(Times) ->
+ Runner = fun(Name, Run, S0) ->
+ T1 = erlang:monotonic_time(),
+ S = Run(S0),
+ T2 = erlang:monotonic_time(),
+ put(?SUB_PASS_TIMES,
+ [{Name,T2-T1}|get(?SUB_PASS_TIMES)]),
+ S
+ end,
+ run_sub_passes_1(Ps, Runner, St)
+ end.
+
%%
%% Local functions
%%
@@ -219,22 +249,24 @@ expand_opt(report, Os) ->
expand_opt(return, Os) ->
[return_errors,return_warnings|Os];
expand_opt(r16, Os) ->
- [no_get_hd_tl,no_record_opt,no_utf8_atoms|Os];
+ expand_opt_before_21(Os);
expand_opt(r17, Os) ->
- [no_get_hd_tl,no_record_opt,no_utf8_atoms|Os];
+ expand_opt_before_21(Os);
expand_opt(r18, Os) ->
- [no_get_hd_tl,no_record_opt,no_utf8_atoms|Os];
+ expand_opt_before_21(Os);
expand_opt(r19, Os) ->
- [no_get_hd_tl,no_record_opt,no_utf8_atoms|Os];
+ expand_opt_before_21(Os);
expand_opt(r20, Os) ->
- [no_get_hd_tl,no_record_opt,no_utf8_atoms|Os];
+ expand_opt_before_21(Os);
+expand_opt(r21, Os) ->
+ Os;
expand_opt({debug_info_key,_}=O, Os) ->
[encrypt_debug_info,O|Os];
-expand_opt(no_float_opt, Os) ->
- %%Turn off the entire type optimization pass.
- [no_topt|Os];
expand_opt(O, Os) -> [O|Os].
+expand_opt_before_21(Os) ->
+ [no_get_hd_tl,no_ssa_opt_record,no_utf8_atoms|Os].
+
%% format_error(ErrorDescriptor) -> string()
-spec format_error(term()) -> iolist().
@@ -387,17 +419,57 @@ fold_comp([{Name,Pass}|Ps], Run, Code0, St0) ->
end;
fold_comp([], _Run, Code, St) -> {ok,Code,St}.
+run_sub_passes_1([{Name,Run}|Ps], Runner, St0)
+ when is_atom(Name), is_function(Run, 1) ->
+ try Runner(Name, Run, St0) of
+ St ->
+ run_sub_passes_1(Ps, Runner, St)
+ catch
+ C:E:Stk ->
+ io:format("Sub pass ~s\n", [Name]),
+ erlang:raise(C, E, Stk)
+ end;
+run_sub_passes_1([], _, St) -> St.
+
run_tc({Name,Fun}, Code, St) ->
+ put(?SUB_PASS_TIMES, []),
T1 = erlang:monotonic_time(),
Val = (catch Fun(Code, St)),
T2 = erlang:monotonic_time(),
- Elapsed = erlang:convert_time_unit(T2 - T1, native, millisecond),
+ Times = erase(?SUB_PASS_TIMES),
+ Elapsed = erlang:convert_time_unit(T2 - T1, native, microsecond),
Mem0 = erts_debug:flat_size(Val)*erlang:system_info(wordsize),
Mem = lists:flatten(io_lib:format("~.1f kB", [Mem0/1024])),
io:format(" ~-30s: ~10.3f s ~12s\n",
- [Name,Elapsed/1000,Mem]),
+ [Name,Elapsed/1000000,Mem]),
+ print_times(Times, Name),
Val.
+print_times(Times0, Name) ->
+ Fam0 = sofs:relation(Times0),
+ Fam1 = sofs:rel2fam(Fam0),
+ Fam2 = sofs:to_external(Fam1),
+ Fam3 = [{W,lists:sum(Times)} || {W,Times} <- Fam2],
+ Fam = reverse(lists:keysort(2, Fam3)),
+ Total = case lists:sum([T || {_,T} <- Fam]) of
+ 0 -> 1;
+ Total0 -> Total0
+ end,
+ case Fam of
+ [] ->
+ ok;
+ [_|_] ->
+ io:format(" %% Sub passes of ~s from slowest to fastest:\n", [Name]),
+ print_times_1(Fam, Total)
+ end.
+
+print_times_1([{Name,T}|Ts], Total) ->
+ Elapsed = erlang:convert_time_unit(T, native, microsecond),
+ io:format(" ~-27s: ~10.3f s ~3w %\n",
+ [Name,Elapsed/1000000,round(100*T/Total)]),
+ print_times_1(Ts, Total);
+print_times_1([], _Total) -> ok.
+
run_eprof({Name,Fun}, Code, Name, St) ->
io:format("~p: Running eprof\n", [Name]),
c:appcall(tools, eprof, start_profiling, [[self()]]),
@@ -741,8 +813,20 @@ kernel_passes() ->
?pass(v3_kernel),
{iff,dkern,{listing,"kernel"}},
{iff,'to_kernel',{done,"kernel"}},
- {pass,v3_codegen},
- {iff,dcg,{listing,"codegen"}}
+ {pass,beam_kernel_to_ssa},
+ {iff,dssa,{listing,"ssa"}},
+ {iff,ssalint,{pass,beam_ssa_lint}},
+ {unless,no_ssa_opt,{pass,beam_ssa_opt}},
+ {iff,dssaopt,{listing,"ssaopt"}},
+ {iff,ssalint,{pass,beam_ssa_lint}},
+ {unless,no_recv_opt,{pass,beam_ssa_recv}},
+ {iff,drecv,{listing,"recv"}},
+ {pass,beam_ssa_pre_codegen},
+ {iff,dprecg,{listing,"precodegen"}},
+ {iff,ssalint,{pass,beam_ssa_lint}},
+ {pass,beam_ssa_codegen},
+ {iff,dcg,{listing,"codegen"}},
+ {iff,doldcg,{listing,"codegen"}}
| asm_passes()].
asm_passes() ->
@@ -1958,8 +2042,15 @@ pre_load() ->
beam_except,
beam_flatten,
beam_jump,
+ beam_kernel_to_ssa,
beam_opcodes,
beam_peep,
+ beam_ssa,
+ beam_ssa_codegen,
+ beam_ssa_opt,
+ beam_ssa_pre_codegen,
+ beam_ssa_recv,
+ beam_ssa_type,
beam_split,
beam_trim,
beam_utils,
@@ -1980,7 +2071,6 @@ pre_load() ->
sys_core_bsm,
sys_core_dsetel,
sys_core_fold,
- v3_codegen,
v3_core,
v3_kernel],
_ = code:ensure_modules_loaded(L),
diff --git a/lib/compiler/src/compiler.app.src b/lib/compiler/src/compiler.app.src
index 9fa5a1c6ea..74529f7fef 100644
--- a/lib/compiler/src/compiler.app.src
+++ b/lib/compiler/src/compiler.app.src
@@ -33,9 +33,18 @@
beam_except,
beam_flatten,
beam_jump,
+ beam_kernel_to_ssa,
beam_listing,
beam_opcodes,
beam_peep,
+ beam_ssa,
+ beam_ssa_codegen,
+ beam_ssa_lint,
+ beam_ssa_opt,
+ beam_ssa_pp,
+ beam_ssa_pre_codegen,
+ beam_ssa_recv,
+ beam_ssa_type,
beam_split,
beam_trim,
beam_utils,
@@ -61,7 +70,6 @@
sys_core_fold_lists,
sys_core_inline,
sys_pre_attributes,
- v3_codegen,
v3_core,
v3_kernel,
v3_kernel_pp
diff --git a/lib/compiler/src/v3_kernel.erl b/lib/compiler/src/v3_kernel.erl
index aef0b6cc9f..fe8e252e5a 100644
--- a/lib/compiler/src/v3_kernel.erl
+++ b/lib/compiler/src/v3_kernel.erl
@@ -2043,9 +2043,6 @@ get_match(#k_cons{}, St0) ->
get_match(#k_binary{}, St0) ->
{[V]=Mes,St1} = new_vars(1, St0),
{#k_binary{segs=V},Mes,St1};
-get_match(#k_bin_seg{size=#k_atom{val=all},next={k_bin_end,[]}}=Seg, St0) ->
- {[S]=Vars,St1} = new_vars(1, St0),
- {Seg#k_bin_seg{seg=S,next=[]},Vars,St1};
get_match(#k_bin_seg{}=Seg, St0) ->
{[S,N0],St1} = new_vars(2, St0),
N = set_kanno(N0, [no_usage]),
@@ -2073,9 +2070,6 @@ new_clauses(Cs0, U, St) ->
#k_cons{hd=H,tl=T} -> [H,T|As];
#k_tuple{es=Es} -> Es ++ As;
#k_binary{segs=E} -> [E|As];
- #k_bin_seg{size=#k_atom{val=all},
- seg=S,next={k_bin_end,[]}} ->
- [S|As];
#k_bin_seg{seg=S,next=N} ->
[S,N|As];
#k_bin_int{next=N} ->
@@ -2374,9 +2368,10 @@ uexpr(#k_try{anno=A,arg=A0,vars=Vs,body=B0,evars=Evs,handler=H0},
true ->
{[#k_var{name=X}],#k_var{name=X}} = {Vs,B0}, %Assertion.
#k_atom{val=false} = H0, %Assertion.
- {A1,Bu,St1} = uexpr(A0, Br, St0),
+ {Avs,St1} = new_vars(length(Rs0), St0),
+ {A1,Bu,St} = uexpr(A0, {break,Avs}, St1),
{#k_protected{anno=#k{us=Bu,ns=lit_list_vars(Rs0),a=A},
- arg=A1,ret=Rs0},Bu,St1};
+ arg=A1,ret=Rs0,inner=Avs},Bu,St};
false ->
{Avs,St1} = new_vars(length(Vs), St0),
{A1,Au,St2} = ubody(A0, {break,Avs}, St1),
diff --git a/lib/compiler/src/v3_kernel.hrl b/lib/compiler/src/v3_kernel.hrl
index e6f0d3c1f7..e26360a6da 100644
--- a/lib/compiler/src/v3_kernel.hrl
+++ b/lib/compiler/src/v3_kernel.hrl
@@ -66,7 +66,7 @@
-record(k_receive_next, {anno=[]}).
-record(k_try, {anno=[],arg,vars,body,evars,handler,ret=[]}).
-record(k_try_enter, {anno=[],arg,vars,body,evars,handler}).
--record(k_protected, {anno=[],arg,ret=[]}).
+-record(k_protected, {anno=[],arg,ret=[],inner}).
-record(k_catch, {anno=[],body,ret=[]}).
-record(k_guard_match, {anno=[],vars,body,ret=[]}).
diff --git a/lib/compiler/test/Makefile b/lib/compiler/test/Makefile
index da5d207db9..2a5004aa4c 100644
--- a/lib/compiler/test/Makefile
+++ b/lib/compiler/test/Makefile
@@ -14,6 +14,7 @@ MODULES= \
beam_except_SUITE \
beam_jump_SUITE \
beam_reorder_SUITE \
+ beam_ssa_SUITE \
beam_type_SUITE \
beam_utils_SUITE \
bif_SUITE \
@@ -52,6 +53,7 @@ NO_OPT= \
beam_except \
beam_jump \
beam_reorder \
+ beam_ssa \
beam_type \
beam_utils \
bif \
@@ -75,6 +77,7 @@ INLINE= \
andor \
apply \
beam_block \
+ beam_ssa \
beam_utils \
bif \
bs_bincomp \
@@ -124,7 +127,7 @@ RELSYSDIR = $(RELEASE_PATH)/compiler_test
# ----------------------------------------------------
ERL_MAKE_FLAGS +=
-ERL_COMPILE_FLAGS += +clint +clint0
+ERL_COMPILE_FLAGS += +clint +clint0 +ssalint
EBIN = .
@@ -135,7 +138,8 @@ EBIN = .
make_emakefile: $(NO_OPT_ERL_FILES) $(POST_OPT_ERL_FILES) $(INLINE_ERL_FILES)
$(ERL_TOP)/make/make_emakefile $(ERL_COMPILE_FLAGS) -o$(EBIN) $(MODULES) \
> $(EMAKEFILE)
- $(ERL_TOP)/make/make_emakefile +no_copt +no_postopt $(ERL_COMPILE_FLAGS) \
+ $(ERL_TOP)/make/make_emakefile +no_copt +no_postopt \
+ +no_ssa_opt +no_recv_opt $(ERL_COMPILE_FLAGS) \
-o$(EBIN) $(NO_OPT_MODULES) >> $(EMAKEFILE)
$(ERL_TOP)/make/make_emakefile +no_copt $(ERL_COMPILE_FLAGS) \
-o$(EBIN) $(POST_OPT_MODULES) >> $(EMAKEFILE)
diff --git a/lib/compiler/test/beam_ssa_SUITE.erl b/lib/compiler/test/beam_ssa_SUITE.erl
new file mode 100644
index 0000000000..0356c99c5a
--- /dev/null
+++ b/lib/compiler/test/beam_ssa_SUITE.erl
@@ -0,0 +1,290 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2018. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+-module(beam_ssa_SUITE).
+
+-export([all/0,suite/0,groups/0,init_per_suite/1,end_per_suite/1,
+ init_per_group/2,end_per_group/2,
+ calls/1,tuple_matching/1,recv/1,maps/1]).
+
+suite() -> [{ct_hooks,[ts_install_cth]}].
+
+all() ->
+ [{group,p}].
+
+groups() ->
+ [{p,test_lib:parallel(),
+ [tuple_matching,
+ calls,
+ recv,
+ maps
+ ]}].
+
+init_per_suite(Config) ->
+ test_lib:recompile(?MODULE),
+ Config.
+
+end_per_suite(_Config) ->
+ ok.
+
+init_per_group(_GroupName, Config) ->
+ Config.
+
+end_per_group(_GroupName, Config) ->
+ Config.
+
+calls(Config) ->
+ Ret = {return,value,Config},
+ Ret = fun_call(fun(42) -> ok end, Ret),
+ Ret = apply_fun(fun(a, b) -> ok end, [a,b], Ret),
+ Ret = apply_mfa(test_lib, id, [anything], Ret),
+ {'EXIT',{badarg,_}} = (catch call_error()),
+ {'EXIT',{badarg,_}} = (catch call_error(42)),
+ 5 = start_it([erlang,length,1,2,3,4,5]),
+ ok.
+
+fun_call(Fun, X0) ->
+ X = id(X0),
+ Fun(42),
+ X.
+
+apply_fun(Fun, Args, X0) ->
+ X = id(X0),
+ apply(Fun, Args),
+ X.
+
+apply_mfa(Mod, Name, Args, X0) ->
+ X = id(X0),
+ apply(Mod, Name, Args),
+ X.
+
+call_error() ->
+ error(badarg),
+ ok.
+
+call_error(I) ->
+ <<I:(-8)>>,
+ ok.
+
+start_it([_|_]=MFA) ->
+ case MFA of
+ [M,F|Args] -> M:F(Args)
+ end.
+
+tuple_matching(_Config) ->
+ do_tuple_matching({tag,42}).
+
+do_tuple_matching(Arg) ->
+ Res = do_tuple_matching_1(Arg),
+ Res = do_tuple_matching_2(Arg),
+ Res = do_tuple_matching_3(Arg),
+ Res.
+
+do_tuple_matching_1({tag,V}) ->
+ {ok,V}.
+
+do_tuple_matching_2(Tuple) when is_tuple(Tuple) ->
+ Size = tuple_size(Tuple),
+ if
+ Size =:= 2 ->
+ {ok,element(2, Tuple)}
+ end.
+
+do_tuple_matching_3(Tuple) when is_tuple(Tuple) ->
+ Size = tuple_size(Tuple),
+ if
+ Size =:= 2 ->
+ 2 = id(Size),
+ {ok,element(2, Tuple)}
+ end.
+
+-record(reporter_state, {res,run_config}).
+-record(run_config, {report_interval=0}).
+
+recv(_Config) ->
+ Parent = self(),
+
+ %% Test sync_wait_mon/2.
+ Succ = fun() -> Parent ! {ack,self(),{result,42}} end,
+ {result,42} = sync_wait_mon(spawn_monitor(Succ), infinity),
+
+ Down = fun() -> exit(down) end,
+ {error,down} = sync_wait_mon(spawn_monitor(Down), infinity),
+
+ Exit = fun() ->
+ Self = self(),
+ spawn(fun() -> exit(Self, kill_me) end),
+ receive _ -> ok end
+ end,
+ {error,kill_me} = sync_wait_mon(spawn_monitor(Exit), infinity),
+
+ Timeout = fun() -> receive _ -> ok end end,
+ {error,timeout} = sync_wait_mon(spawn_monitor(Timeout), 0),
+
+ %% Test reporter_loop/1.
+ {a,Parent} = reporter_loop(#reporter_state{res={a,Parent},
+ run_config=#run_config{}}),
+
+ %% Test bad_sink/0.
+ bad_sink(),
+
+ %% Test tricky_recv_1/0.
+ self() ! 1,
+ a = tricky_recv_1(),
+ self() ! 2,
+ b = tricky_recv_1(),
+
+ %% Test tricky_recv_2/0.
+ self() ! 1,
+ {1,yes} = tricky_recv_2(),
+ self() ! 2,
+ {2,maybe} = tricky_recv_2(),
+
+ %% Test 'receive after infinity' in try/catch.
+ Pid = spawn(fun recv_after_inf_in_try/0),
+ exit(Pid, done),
+
+ %% Test tricky_recv_3().
+ self() ! {{self(),r0},{1,42,"name"}},
+ {Parent,r0,[<<1:32,1:8,42:8>>,"name",0]} = tricky_recv_3(),
+ self() ! {{self(),r1},{2,99,<<"data">>}},
+ {Parent,r1,<<1:32,2:8,99:8,"data">>} = tricky_recv_3(),
+
+ %% Test tricky_recv_4().
+ self() ! {[self(),r0],{1,42,"name"}},
+ {Parent,r0,[<<1:32,1:8,42:8>>,"name",0]} = tricky_recv_4(),
+ self() ! {[self(),r1],{2,99,<<"data">>}},
+ {Parent,r1,<<1:32,2:8,99:8,"data">>} = tricky_recv_4(),
+
+ ok.
+
+sync_wait_mon({Pid, Ref}, Timeout) ->
+ receive
+ {ack,Pid,Return} ->
+ erlang:demonitor(Ref, [flush]),
+ Return;
+ {'DOWN',Ref,_Type,Pid,Reason} ->
+ {error,Reason};
+ {'EXIT',Pid,Reason} ->
+ erlang:demonitor(Ref, [flush]),
+ {error,Reason}
+ after Timeout ->
+ erlang:demonitor(Ref, [flush]),
+ exit(Pid, kill),
+ {error,timeout}
+ end.
+
+reporter_loop(State) ->
+ RC = State#reporter_state.run_config,
+ receive after RC#run_config.report_interval ->
+ State#reporter_state.res
+ end.
+
+bad_sink() ->
+ {ok,Pid} = my_spawn(self()),
+ %% The get_tuple_element instruction for the matching
+ %% above was sinked into the receive loop. That will
+ %% not work (and would be bad for performance if it
+ %% would work).
+ receive
+ {ok,Pid} ->
+ ok;
+ error ->
+ exit(failed)
+ end,
+ exit(Pid, kill).
+
+my_spawn(Parent) ->
+ Pid = spawn(fun() ->
+ Parent ! {ok,self()},
+ receive _ -> ok end
+ end),
+ {ok,Pid}.
+
+tricky_recv_1() ->
+ receive
+ X=1 ->
+ id(42),
+ a;
+ X=2 ->
+ b
+ end,
+ case X of
+ 1 -> a;
+ 2 -> b
+ end.
+
+tricky_recv_2() ->
+ receive
+ X=1 ->
+ Y = case id(X) of
+ 1 -> yes;
+ _ -> no
+ end,
+ a;
+ X=2 ->
+ Y = maybe,
+ b
+ end,
+ {X,Y}.
+
+recv_after_inf_in_try() ->
+ try
+ %% Used to crash beam_kernel_to_ssa.
+ receive after infinity -> ok end
+ catch
+ _A:_B ->
+ receive after infinity -> ok end
+ end.
+
+tricky_recv_3() ->
+ {Pid, R, Request} =
+ receive
+ {{Pid0,R0}, {1, Proto0, Name0}} ->
+ {Pid0, R0,
+ [<<1:32, 1:8, Proto0:8>>,Name0,0]};
+ {{Pid1,R1}, {2, Proto1, Data1}} ->
+ {Pid1, R1,
+ <<1:32, 2:8, Proto1:8, Data1/binary>>}
+ end,
+ id({Pid,R,Request}).
+
+tricky_recv_4() ->
+ {Pid, R, Request} =
+ receive
+ {[Pid0,R0], {1, Proto0, Name0}} ->
+ {Pid0, R0,
+ [<<1:32, 1:8, Proto0:8>>,Name0,0]};
+ {[Pid1,R1], {2, Proto1, Data1}} ->
+ {Pid1, R1,
+ <<1:32, 2:8, Proto1:8, Data1/binary>>}
+ end,
+ id({Pid,R,Request}).
+
+maps(_Config) ->
+ {'EXIT',{{badmatch,#{}},_}} = (catch maps_1(any)),
+ ok.
+
+maps_1(K) ->
+ _ = id(42),
+ #{K:=V} = #{},
+ V.
+
+%% The identity function.
+id(I) -> I.
diff --git a/lib/compiler/test/beam_type_SUITE.erl b/lib/compiler/test/beam_type_SUITE.erl
index 061076b3ff..caf43dad40 100644
--- a/lib/compiler/test/beam_type_SUITE.erl
+++ b/lib/compiler/test/beam_type_SUITE.erl
@@ -23,7 +23,8 @@
init_per_group/2,end_per_group/2,
integers/1,coverage/1,booleans/1,setelement/1,cons/1,
tuple/1,record_float/1,binary_float/1,float_compare/1,
- arity_checks/1,elixir_binaries/1,find_best/1]).
+ arity_checks/1,elixir_binaries/1,find_best/1,
+ test_size/1]).
suite() -> [{ct_hooks,[ts_install_cth]}].
@@ -43,7 +44,8 @@ groups() ->
float_compare,
arity_checks,
elixir_binaries,
- find_best
+ find_best,
+ test_size
]}].
init_per_suite(Config) ->
@@ -177,11 +179,50 @@ setelement(_Config) ->
cons(_Config) ->
[did] = cons(assigned, did),
+
+ true = cons_is_empty_list([]),
+ false = cons_is_empty_list([a]),
+
+ false = cons_not(true),
+ true = cons_not(false),
+
+ {$a,"bc"} = cons_hdtl(true),
+ {$d,"ef"} = cons_hdtl(false),
ok.
cons(assigned, Instrument) ->
[Instrument] = [did].
+cons_is_empty_list(L) ->
+ Cons = case L of
+ [] -> "true";
+ _ -> "false"
+ end,
+ id(1),
+ case Cons of
+ "true" -> true;
+ "false" -> false
+ end.
+
+cons_not(B) ->
+ Cons = case B of
+ true -> "true";
+ false -> "false"
+ end,
+ id(1),
+ case Cons of
+ "true" -> false;
+ "false" -> true
+ end.
+
+cons_hdtl(B) ->
+ Cons = case B of
+ true -> "abc";
+ false -> "def"
+ end,
+ id(1),
+ {id(hd(Cons)),id(tl(Cons))}.
+
tuple(_Config) ->
{'EXIT',{{badmatch,{necessary}},_}} = (catch do_tuple()),
ok.
@@ -320,6 +361,15 @@ find_best([], <<"a">>) ->
find_best([], nil) ->
{error,<<"should not get here">>}.
+test_size(Config) ->
+ 2 = do_test_size({a,b}),
+ 4 = do_test_size(<<42:32>>),
+ ok.
+
+do_test_size(Term) when is_tuple(Term) ->
+ size(Term);
+do_test_size(Term) when is_binary(Term) ->
+ size(Term).
id(I) ->
I.
diff --git a/lib/compiler/test/bs_match_SUITE.erl b/lib/compiler/test/bs_match_SUITE.erl
index 7814738449..b4277f0705 100644
--- a/lib/compiler/test/bs_match_SUITE.erl
+++ b/lib/compiler/test/bs_match_SUITE.erl
@@ -248,6 +248,12 @@ bin_tail(Config) when is_list(Config) ->
ok = bin_tail_e(<<2:2,1:1,1:5,42:64>>),
error = bin_tail_e(<<3:2,1:1,1:5,42:64>>),
error = bin_tail_e(<<>>),
+
+ MD5 = erlang:md5(<<42>>),
+ <<"abc">> = bin_tail_f(<<MD5/binary,"abc">>, MD5, 3),
+ error = bin_tail_f(<<MD5/binary,"abc">>, MD5, 999),
+ {'EXIT',{_,_}} = (catch bin_tail_f(<<0:16/unit:8>>, MD5, 0)),
+
ok.
bin_tail_c(Bin, Offset) ->
@@ -304,6 +310,14 @@ bin_tail_e_var(Bin) ->
<<2:2,_:1,1:5,Tail/binary>> -> Tail;
_ -> error
end.
+
+bin_tail_f(Bin, MD5, Size) ->
+ case Bin of
+ <<MD5:16/binary, Tail:Size/binary>> ->
+ Tail;
+ <<MD5:16/binary, _/binary>> ->
+ error
+ end.
save_restore(Config) when is_list(Config) ->
0 = save_restore_1(<<0:2,42:6>>),
@@ -455,6 +469,15 @@ unit(Config) when is_list(Config) ->
127 = peek7(<<127:7>>),
100 = peek7(<<100:7,19:7>>),
fc(peek7, [<<1,2>>], catch peek7(<<1,2>>)),
+
+ 1 = unit_opt(1, -1),
+ 8 = unit_opt(8, -1),
+
+ <<1:32,"abc">> = unit_opt_2(<<1:32,"abc">>),
+ <<"def">> = unit_opt_2(<<2:32,"def">>),
+ {'EXIT',_} = (catch unit_opt_2(<<1:32,33:7>>)),
+ {'EXIT',_} = (catch unit_opt_2(<<2:32,55:7>>)),
+
ok.
peek1(<<B:8,_/bitstring>>) -> B.
@@ -465,6 +488,27 @@ peek8(<<B:8,_/binary>>) -> B.
peek16(<<B:16,_/binary-unit:16>>) -> B.
+unit_opt(U, X) ->
+ %% Cover type analysis in beam_ssa_type.
+ Bin = case U of
+ 1 -> <<X:7>>;
+ 8 -> <<X>>
+ end,
+ %% The type of Bin will be set to {binary,gcd(1, 8)}.
+ case Bin of
+ <<_/binary-unit:8>> -> 8;
+ <<_/binary-unit:1>> -> 1
+ end.
+
+unit_opt_2(<<St:32,KO/binary>> = Bin0) ->
+ Bin = if
+ St =:= 1 ->
+ Bin0;
+ St =:= 2 ->
+ <<KO/binary>>
+ end,
+ id(Bin).
+
shared_sub_bins(Config) when is_list(Config) ->
{15,[<<>>,<<5>>,<<4,5>>,<<3,4,5>>,<<2,3,4,5>>]} = sum(<<1,2,3,4,5>>, [], 0),
ok.
diff --git a/lib/compiler/test/compilation_SUITE.erl b/lib/compiler/test/compilation_SUITE.erl
index 3ba3ce7cdf..def7a60f45 100644
--- a/lib/compiler/test/compilation_SUITE.erl
+++ b/lib/compiler/test/compilation_SUITE.erl
@@ -170,7 +170,7 @@ try_it(Module, Conf) ->
atom_to_list(Module)),
Out = proplists:get_value(priv_dir,Conf),
io:format("Compiling: ~s\n", [Src]),
- CompRc0 = compile:file(Src, [clint0,clint,{outdir,Out},report,
+ CompRc0 = compile:file(Src, [clint0,clint,ssalint,{outdir,Out},report,
bin_opt_info|OtherOpts]),
io:format("Result: ~p\n",[CompRc0]),
{ok,_Mod} = CompRc0,
@@ -189,7 +189,7 @@ try_it(Module, Conf) ->
ct:timetrap(Timetrap),
io:format("Compiling (with old inliner): ~s\n", [Src]),
- CompRc2 = compile:file(Src, [clint,
+ CompRc2 = compile:file(Src, [clint,ssalint,
{outdir,Out},report,bin_opt_info,
{inline,1000}|OtherOpts]),
io:format("Result: ~p\n",[CompRc2]),
@@ -355,7 +355,7 @@ compile_compiler(Files, OutDir, Version, InlineOpts) ->
io:format("~ts", [code:which(compile)]),
io:format("Compiling ~s into ~ts", [Version,OutDir]),
Opts = [report,
- clint0,clint,
+ clint0,clint,ssalint,
bin_opt_info,
{outdir,OutDir},
{d,'COMPILER_VSN',"\""++Version++"\""},
diff --git a/lib/compiler/test/compile_SUITE.erl b/lib/compiler/test/compile_SUITE.erl
index 8056982d8e..85f0b7dc46 100644
--- a/lib/compiler/test/compile_SUITE.erl
+++ b/lib/compiler/test/compile_SUITE.erl
@@ -385,6 +385,9 @@ do_file_listings(DataDir, PrivDir, [File|Files]) ->
do_listing(Simple, TargetDir, dcbsm, ".core_bsm"),
do_listing(Simple, TargetDir, dsetel, ".dsetel"),
do_listing(Simple, TargetDir, dkern, ".kernel"),
+ do_listing(Simple, TargetDir, dssa, ".ssa"),
+ do_listing(Simple, TargetDir, dssaopt, ".ssaopt"),
+ do_listing(Simple, TargetDir, dprecg, ".precodegen"),
do_listing(Simple, TargetDir, dcg, ".codegen"),
do_listing(Simple, TargetDir, dblk, ".block"),
do_listing(Simple, TargetDir, dexcept, ".except"),
@@ -423,6 +426,9 @@ listings_big(Config) when is_list(Config) ->
do_listing(Big, TargetDir, 'E'),
do_listing(Big, TargetDir, 'P'),
do_listing(Big, TargetDir, dkern, ".kernel"),
+ do_listing(Big, TargetDir, dssa, ".ssa"),
+ do_listing(Big, TargetDir, dssaopt, ".ssaopt"),
+ do_listing(Big, TargetDir, dprecg, ".precodegen"),
do_listing(Big, TargetDir, to_dis, ".dis"),
TargetNoext = filename:rootname(Target, code:objfile_extension()),
@@ -919,7 +925,7 @@ do_core_pp_1(M, A, Outdir) ->
ok = file:delete(CoreFile),
%% Compile as usual (including optimizations).
- compile_forms(M, Core, [clint,from_core,binary]),
+ compile_forms(M, Core, [clint,ssalint,from_core,binary]),
%% Don't optimize to test that we are not dependent
%% on the Core Erlang optmimization passes.
@@ -928,7 +934,7 @@ do_core_pp_1(M, A, Outdir) ->
%% records; if sys_core_fold was run it would fix
%% that; if sys_core_fold was not run v3_kernel would
%% crash.)
- compile_forms(M, Core, [clint,from_core,no_copt,binary]),
+ compile_forms(M, Core, [clint,ssalint,from_core,no_copt,binary]),
ok.
@@ -1241,21 +1247,14 @@ do_opt_guards_fun([_|Is]) ->
do_opt_guards_fun(Is);
do_opt_guards_fun([]) -> [].
-is_exception(bs_match_SUITE, {matching_and_andalso_2,2}) -> true;
-is_exception(bs_match_SUITE, {matching_and_andalso_3,2}) -> true;
is_exception(guard_SUITE, {'-complex_not/1-fun-4-',1}) -> true;
is_exception(guard_SUITE, {'-complex_not/1-fun-5-',1}) -> true;
-is_exception(guard_SUITE, {basic_andalso_orelse,1}) -> true;
is_exception(guard_SUITE, {bad_guards,1}) -> true;
is_exception(guard_SUITE, {bad_guards_2,2}) -> true;
is_exception(guard_SUITE, {bad_guards_3,2}) -> true;
-is_exception(guard_SUITE, {cqlc,4}) -> true;
is_exception(guard_SUITE, {csemi7,3}) -> true;
-is_exception(guard_SUITE, {misc,1}) -> true;
is_exception(guard_SUITE, {nested_not_2b,4}) -> true;
is_exception(guard_SUITE, {tricky_1,2}) -> true;
-is_exception(map_SUITE, {map_guard_update,2}) -> true;
-is_exception(map_SUITE, {map_guard_update_variables,3}) -> true;
is_exception(_, _) -> false.
sys_pre_attributes(Config) ->
@@ -1477,18 +1476,18 @@ bc_options(Config) ->
101 = highest_opcode(DataDir, small_float, [no_get_hd_tl,no_line_info]),
103 = highest_opcode(DataDir, big,
- [no_get_hd_tl,no_record_opt,
+ [no_get_hd_tl,no_ssa_opt_record,
no_line_info,no_stack_trimming]),
125 = highest_opcode(DataDir, small_float,
- [no_get_hd_tl,no_line_info,no_float_opt]),
+ [no_get_hd_tl,no_line_info,no_ssa_opt_float]),
132 = highest_opcode(DataDir, small,
- [no_get_hd_tl,no_record_opt,no_float_opt,no_line_info]),
+ [no_get_hd_tl,no_ssa_opt_record,no_ssa_opt_float,no_line_info]),
- 136 = highest_opcode(DataDir, big, [no_get_hd_tl,no_record_opt,no_line_info]),
+ 136 = highest_opcode(DataDir, big, [no_get_hd_tl,no_ssa_opt_record,no_line_info]),
- 153 = highest_opcode(DataDir, big, [no_get_hd_tl,no_record_opt]),
+ 153 = highest_opcode(DataDir, big, [no_get_hd_tl,no_ssa_opt_record]),
153 = highest_opcode(DataDir, big, [r16]),
153 = highest_opcode(DataDir, big, [r17]),
153 = highest_opcode(DataDir, big, [r18]),
diff --git a/lib/compiler/test/core_fold_SUITE.erl b/lib/compiler/test/core_fold_SUITE.erl
index 30d145582a..68bc5c6e2f 100644
--- a/lib/compiler/test/core_fold_SUITE.erl
+++ b/lib/compiler/test/core_fold_SUITE.erl
@@ -373,7 +373,7 @@ unused_multiple_values_error(Config) when is_list(Config) ->
PrivDir = proplists:get_value(priv_dir, Config),
Dir = test_lib:get_data_dir(Config),
Core = filename:join(Dir, "unused_multiple_values_error"),
- Opts = [no_copt,clint,return,from_core,{outdir,PrivDir}
+ Opts = [no_copt,clint,ssalint,return,from_core,{outdir,PrivDir}
|test_lib:opt_opts(?MODULE)],
{error,[{unused_multiple_values_error,
[{none,core_lint,{return_mismatch,{hello,1}}}]}],
diff --git a/lib/compiler/test/guard_SUITE.erl b/lib/compiler/test/guard_SUITE.erl
index 99dc06b525..73a8dc0fda 100644
--- a/lib/compiler/test/guard_SUITE.erl
+++ b/lib/compiler/test/guard_SUITE.erl
@@ -1788,6 +1788,11 @@ t_tuple_size(Config) when is_list(Config) ->
14 = Mod:t({1,2,3,4}),
_ = code:delete(Mod),
_ = code:purge(Mod),
+
+ good_ip({1,2,3,4}),
+ good_ip({1,2,3,4,5,6,7,8}),
+ error = validate_ip({42,11}),
+ error = validate_ip(atom),
ok.
@@ -1805,6 +1810,16 @@ ludicrous_tuple_size(T)
when tuple_size(T) =:= 16#FFFFFFFFFFFFFFFF -> ok;
ludicrous_tuple_size(_) -> error.
+good_ip(IP) ->
+ IP = validate_ip(IP).
+
+validate_ip(Value) when is_tuple(Value) andalso
+ ((size(Value) =:= 4) orelse (size(Value) =:= 8)) ->
+ %% size/1 (converted to tuple_size) used more than once.
+ Value;
+validate_ip(_) ->
+ error.
+
%%
%% The binary_part/2,3 guard BIFs
%%
diff --git a/lib/compiler/test/inline_SUITE.erl b/lib/compiler/test/inline_SUITE.erl
index fdf2fe88b4..e03769012b 100644
--- a/lib/compiler/test/inline_SUITE.erl
+++ b/lib/compiler/test/inline_SUITE.erl
@@ -96,7 +96,8 @@ try_inline(Mod, Config) ->
%% Normal compilation.
io:format("Compiling: ~s\n", [Src]),
- {ok,Mod} = compile:file(Src, [{outdir,Out},report,bin_opt_info,clint]),
+ {ok,Mod} = compile:file(Src, [{outdir,Out},report,
+ bin_opt_info,clint,ssalint]),
ct:timetrap({minutes,10}),
NormalResult = rpc:call(Node, ?MODULE, load_and_call, [Out,Mod]),
@@ -104,7 +105,7 @@ try_inline(Mod, Config) ->
%% Inlining.
io:format("Compiling with old inliner: ~s\n", [Src]),
{ok,Mod} = compile:file(Src, [{outdir,Out},report,bin_opt_info,
- {inline,1000},clint]),
+ {inline,1000},clint,ssalint]),
%% Run inlined code.
ct:timetrap({minutes,10}),
@@ -117,7 +118,7 @@ try_inline(Mod, Config) ->
%% Inlining.
io:format("Compiling with new inliner: ~s\n", [Src]),
{ok,Mod} = compile:file(Src, [{outdir,Out},report,
- bin_opt_info,inline,clint]),
+ bin_opt_info,inline,clint,ssalint]),
%% Run inlined code.
ct:timetrap({minutes,10}),
@@ -351,7 +352,8 @@ otp_7223_2({a}) ->
coverage(Config) when is_list(Config) ->
Mod = bsdecode,
Src = filename:join(proplists:get_value(data_dir, Config), Mod),
- {ok,Mod,_} = compile:file(Src, [binary,report,{inline,0},clint]),
+ {ok,Mod,_} = compile:file(Src, [binary,report,{inline,0},
+ clint,ssalint]),
{ok,Mod,_} = compile:file(Src, [binary,report,{inline,20},
- verbose,clint]),
+ verbose,clint,ssalint]),
ok.
diff --git a/lib/compiler/test/misc_SUITE.erl b/lib/compiler/test/misc_SUITE.erl
index 497cbad636..beae5eb618 100644
--- a/lib/compiler/test/misc_SUITE.erl
+++ b/lib/compiler/test/misc_SUITE.erl
@@ -171,7 +171,7 @@ silly_coverage(Config) when is_list(Config) ->
expect_error(fun() -> sys_core_dsetel:module(BadCoreErlang, []) end),
expect_error(fun() -> v3_kernel:module(BadCoreErlang, []) end),
- %% v3_codegen
+ %% beam_kernel_to_ssa
BadKernel = {k_mdef,[],?MODULE,
[{foo,0}],
[],
@@ -179,7 +179,31 @@ silly_coverage(Config) when is_list(Config) ->
{k,[],[],[]},
f,0,[],
seriously_bad_body}]},
- expect_error(fun() -> v3_codegen:module(BadKernel, []) end),
+ expect_error(fun() -> beam_kernel_to_ssa:module(BadKernel, []) end),
+
+ %% beam_ssa_lint
+ %% beam_ssa_recv
+ %% beam_ssa_pre_codegen
+ %% beam_ssa_opt
+ %% beam_ssa_codegen
+ BadSSA = {b_module,#{},a,b,c,
+ [{b_function,#{func_info=>{mod,foo,0}},args,bad_blocks,0}]},
+ expect_error(fun() -> beam_ssa_lint:module(BadSSA, []) end),
+ expect_error(fun() -> beam_ssa_recv:module(BadSSA, []) end),
+ expect_error(fun() -> beam_ssa_pre_codegen:module(BadSSA, []) end),
+ expect_error(fun() -> beam_ssa_opt:module(BadSSA, []) end),
+ expect_error(fun() -> beam_ssa_codegen:module(BadSSA, []) end),
+
+ %% beam_ssa_lint, beam_ssa_pp
+ {error,[{_,Errors}]} = beam_ssa_lint:module(bad_ssa_lint_input(), []),
+ _ = [io:put_chars(Mod:format_error(Reason)) ||
+ {Mod,Reason} <- Errors],
+
+ %% Cover printing of annotations in beam_ssa_pp
+ PPAnno = #{func_info=>{mod,foo,0},other_anno=>value,map_anno=>#{k=>v}},
+ PPBlocks = #{0=>{b_blk,#{},[],{b_ret,#{},{b_literal,42}}}},
+ PP = {b_function,PPAnno,[],PPBlocks,0},
+ io:put_chars(beam_ssa_pp:format_function(PP)),
%% beam_a
BeamAInput = {?MODULE,[{foo,0}],[],
@@ -268,6 +292,31 @@ silly_coverage(Config) when is_list(Config) ->
ok.
+bad_ssa_lint_input() ->
+ {b_module,#{},t,
+ [{foobar,1},{module_info,0},{module_info,1}],
+ [],
+ [{b_function,
+ #{func_info => {t,foobar,1},location => {"t.erl",4}},
+ [{b_var,0}],
+ #{0 => {b_blk,#{},[],{b_ret,#{},{b_var,'@undefined_var'}}}},
+ 3},
+ {b_function,
+ #{func_info => {t,module_info,0}},
+ [],
+ #{0 =>
+ {b_blk,#{},
+ [{b_set,#{},
+ {b_var,{'@ssa_ret',3}},
+ call,
+ [{b_remote,
+ {b_literal,erlang},
+ {b_literal,get_module_info},
+ 1},
+ {b_var,'@unknown_variable'}]}],
+ {b_ret,#{},{b_var,{'@ssa_ret',3}}}}},
+ 4}]}.
+
expect_error(Fun) ->
try Fun() of
Any ->
diff --git a/lib/compiler/test/receive_SUITE_data/ref_opt/no_5.erl b/lib/compiler/test/receive_SUITE_data/ref_opt/no_5.erl
new file mode 100644
index 0000000000..4fbde3a83d
--- /dev/null
+++ b/lib/compiler/test/receive_SUITE_data/ref_opt/no_5.erl
@@ -0,0 +1,38 @@
+-module(no_5).
+-compile([export_all,nowarn_export_all]).
+
+?MODULE() ->
+ ok.
+
+%% Nested receives were not handled properly.
+
+confusing_recv_mark(Pid) ->
+ Ref = make_ref(),
+ %% There would be a recv_mark here.
+ MRef = erlang:monitor(process, Pid),
+ receive
+ Ref ->
+ %% And a recv_set here.
+ receive
+ MRef -> gurka
+ end;
+ MRef ->
+ gaffel
+ end.
+
+%% The optimization could potentially be improved to
+%% handle matching of multiple refs, like this:
+
+proper_recv_mark(Pid) ->
+ %% Place the recv_mark before the creation of both refs.
+ Ref = make_ref(),
+ MRef = erlang:monitor(process, Pid),
+ %% Place the recv_set here.
+ receive
+ Ref ->
+ receive
+ MRef -> gurka
+ end;
+ MRef ->
+ gaffel
+ end.
diff --git a/lib/compiler/test/receive_SUITE_data/ref_opt/yes_14.S b/lib/compiler/test/receive_SUITE_data/ref_opt/yes_14.S
deleted file mode 100644
index fd14228135..0000000000
--- a/lib/compiler/test/receive_SUITE_data/ref_opt/yes_14.S
+++ /dev/null
@@ -1,71 +0,0 @@
-{module, yes_14}. %% version = 0
-
-{exports, [{f,2},{module_info,0},{module_info,1},{yes_14,0}]}.
-
-{attributes, []}.
-
-{labels, 12}.
-
-
-{function, yes_14, 0, 2}.
- {label,1}.
- {func_info,{atom,yes_14},{atom,yes_14},0}.
- {label,2}.
- {move,{atom,ok},{x,0}}.
- return.
-
-
-{function, f, 2, 4}.
- {label,3}.
- {func_info,{atom,yes_14},{atom,f},2}.
- {label,4}.
- {allocate_heap,2,3,2}.
- {move,{x,0},{y,1}}.
- {put_tuple,2,{y,0}}.
- {put,{atom,data}}.
- {put,{x,1}}.
- {call_ext,0,{extfunc,erlang,make_ref,0}}. % Ref in [x0]
- {test_heap,4,1}.
- {put_tuple,3,{x,1}}.
- {put,{atom,request}}.
- {put,{x,0}}.
- {put,{y,0}}.
- {move,{x,0},{y,0}}. % Ref in [x0,y0]
- {move,{y,1},{x,0}}. % Ref in [y0]
- {kill,{y,1}}.
- send.
- {move,{y,0},{x,0}}. % Ref in [x0,y0]
- {move,{x,0},{y,1}}. % Ref in [x0,y0,y1]
- {label,5}.
- {loop_rec,{f,7},{x,0}}. % Ref in [y0,y1]
- {test,is_tuple,{f,6},[{x,0}]}.
- {test,test_arity,{f,6},[{x,0},2]}.
- {get_tuple_element,{x,0},0,{x,1}}.
- {get_tuple_element,{x,0},1,{x,2}}.
- {test,is_eq_exact,{f,6},[{x,1},{atom,reply}]}.
- {test,is_eq_exact,{f,6},[{x,2},{y,1}]}.
- remove_message.
- {move,{atom,ok},{x,0}}.
- {deallocate,2}.
- return.
- {label,6}.
- {loop_rec_end,{f,5}}.
- {label,7}.
- {wait,{f,5}}.
-
-
-{function, module_info, 0, 9}.
- {label,8}.
- {func_info,{atom,yes_14},{atom,module_info},0}.
- {label,9}.
- {move,{atom,yes_14},{x,0}}.
- {call_ext_only,1,{extfunc,erlang,get_module_info,1}}.
-
-
-{function, module_info, 1, 11}.
- {label,10}.
- {func_info,{atom,yes_14},{atom,module_info},1}.
- {label,11}.
- {move,{x,0},{x,1}}.
- {move,{atom,yes_14},{x,0}}.
- {call_ext_only,2,{extfunc,erlang,get_module_info,2}}.
diff --git a/lib/compiler/test/receive_SUITE_data/ref_opt/yes_14.erl b/lib/compiler/test/receive_SUITE_data/ref_opt/yes_14.erl
new file mode 100644
index 0000000000..aa47c02af9
--- /dev/null
+++ b/lib/compiler/test/receive_SUITE_data/ref_opt/yes_14.erl
@@ -0,0 +1,27 @@
+-module(yes_14).
+-compile(export_all).
+
+?MODULE() ->
+ ok.
+
+do_call(Process, Request) ->
+ Mref = erlang:monitor(process, Process),
+ Process ! Request,
+ Local = case node(Process) of
+ Node when Node =:= node() -> true;
+ _Node -> false
+ end,
+ id(Local),
+ receive
+ {X,Y,Z} when Mref =/= X, Z =:= 42, Mref =:= Y ->
+ error;
+ {X,Y,_} when Mref =/= X, Mref =:= Y ->
+ error;
+ {Mref, Reply} ->
+ erlang:demonitor(Mref, [flush]),
+ {ok, Reply};
+ {'DOWN', Mref, _, _, _} ->
+ error
+ end.
+
+id(I) -> I.
diff --git a/lib/compiler/test/test_lib.erl b/lib/compiler/test/test_lib.erl
index 8954a9f5fb..c6baa611ec 100644
--- a/lib/compiler/test/test_lib.erl
+++ b/lib/compiler/test/test_lib.erl
@@ -72,11 +72,9 @@ opt_opts(Mod) ->
{options,Opts} = lists:keyfind(options, 1, Comp),
lists:filter(fun(no_copt) -> true;
(no_postopt) -> true;
- (no_float_opt) -> true;
- (no_new_funs) -> true;
- (no_new_binaries) -> true;
- (no_new_apply) -> true;
- (no_gc_bifs) -> true;
+ (no_ssa_opt) -> true;
+ (no_recv_opt) -> true;
+ (no_ssa_float) -> true;
(no_stack_trimming) -> true;
(debug_info) -> true;
(inline) -> true;