aboutsummaryrefslogtreecommitdiffstats
path: root/lib/compiler/src
diff options
context:
space:
mode:
Diffstat (limited to 'lib/compiler/src')
-rw-r--r--lib/compiler/src/beam_block.erl89
-rw-r--r--lib/compiler/src/beam_record.erl126
-rw-r--r--lib/compiler/src/beam_utils.erl268
-rw-r--r--lib/compiler/src/v3_codegen.erl94
4 files changed, 457 insertions, 120 deletions
diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl
index 79c4b651db..fe1ce6f60b 100644
--- a/lib/compiler/src/beam_block.erl
+++ b/lib/compiler/src/beam_block.erl
@@ -37,13 +37,14 @@ function({function,Name,Arity,CLabel,Is0}) ->
%% Collect basic blocks and optimize them.
Is1 = blockify(Is0),
Is2 = embed_lines(Is1),
- Is3 = move_allocates(Is2),
- Is4 = beam_utils:live_opt(Is3),
- Is5 = opt_blocks(Is4),
- Is6 = beam_utils:delete_live_annos(Is5),
-
- %% Done.
- {function,Name,Arity,CLabel,Is6}
+ Is3 = beam_utils:anno_defs(Is2),
+ Is4 = move_allocates(Is3),
+ Is5 = beam_utils:live_opt(Is4),
+ Is6 = opt_blocks(Is5),
+ Is = beam_utils:delete_live_annos(Is6),
+
+ %% Done.
+ {function,Name,Arity,CLabel,Is}
catch
Class:Error:Stack ->
io:fwrite("Function: ~w/~w\n", [Name,Arity]),
@@ -172,7 +173,7 @@ find_fixpoint(OptFun, Is0) ->
%% safe to assume that if x(N) is initialized, then all lower-numbered
%% x registers are also initialized.
%%
-%% For example, in general it is not safe to transform the following
+%% For example, we must be careful when transforming the following
%% instructions:
%%
%% get_tuple_element x(0) Element => x(1)
@@ -184,13 +185,9 @@ find_fixpoint(OptFun, Is0) ->
%% get_tuple_element x(0) Element => x(1)
%%
%% The transformation is safe if and only if x(1) has been
-%% initialized previously. Unfortunately, beam_reorder may have moved
-%% a get_tuple_element instruction so that x(1) is not always
-%% initialized when this code is reached. To find whether or not x(1)
-%% is initialized, we would need to analyze all code preceding these
-%% two instructions (across branches). Since we currently don't have
-%% any practical mechanism for doing that, we will have to
-%% conservatively assume that the transformation is unsafe.
+%% initialized previously. We will use the annotations added by
+%% beam_utils:anno_defs/1 to determine whether x(a) has been
+%% initialized.
move_allocates([{block,Bl0}|Is]) ->
Bl = move_allocates_1(reverse(Bl0), []),
@@ -199,15 +196,20 @@ move_allocates([I|Is]) ->
[I|move_allocates(Is)];
move_allocates([]) -> [].
+move_allocates_1([{'%def',_}|Is], Acc) ->
+ move_allocates_1(Is, Acc);
move_allocates_1([I|Is], [{set,[],[],{alloc,Live0,Info}}|Acc]=Acc0) ->
- case {alloc_may_pass(I),alloc_live_regs(I, Live0)} of
- {false,_} ->
- move_allocates_1(Is, [I|Acc0]);
- {true,not_possible} ->
- move_allocates_1(Is, [I|Acc0]);
- {true,Live} when is_integer(Live) ->
- A = {set,[],[],{alloc,Live,Info}},
- move_allocates_1(Is, [A,I|Acc])
+ case alloc_may_pass(I) of
+ false ->
+ move_allocates_1(Is, [I|Acc0]);
+ true ->
+ case alloc_live_regs(I, Is, Live0) of
+ not_possible ->
+ move_allocates_1(Is, [I|Acc0]);
+ Live when is_integer(Live) ->
+ A = {set,[],[],{alloc,Live,Info}},
+ move_allocates_1(Is, [A,I|Acc])
+ end
end;
move_allocates_1([I|Is], Acc) ->
move_allocates_1(Is, [I|Acc]);
@@ -471,16 +473,34 @@ count_ones(Bits, Acc) ->
%% Calculate the new number of live registers when we move an allocate
%% instruction upwards, passing a 'set' instruction.
-alloc_live_regs({set,Ds,Ss,_}, Regs0) ->
+alloc_live_regs({set,Ds,Ss,_}, Is, Regs0) ->
Rset = x_live(Ss, x_dead(Ds, (1 bsl Regs0)-1)),
- live_regs(0, Rset).
+ Live = live_regs(0, Rset),
+ case ensure_contiguous(Rset, Live) of
+ not_possible ->
+ %% Liveness information (looking forward in the
+ %% instruction stream) can't prove that moving this
+ %% allocation instruction is safe. Now use the annotation
+ %% of defined registers at the beginning of the current
+ %% block to see whether moving would be safe.
+ Def0 = defined_regs(Is, 0),
+ Def = Def0 band ((1 bsl Live) - 1),
+ ensure_contiguous(Rset bor Def, Live);
+ Live ->
+ %% Safe based on liveness information.
+ Live
+ end.
live_regs(N, 0) ->
N;
-live_regs(N, Regs) when Regs band 1 =:= 1 ->
- live_regs(N+1, Regs bsr 1);
-live_regs(_, _) ->
- not_possible.
+live_regs(N, Regs) ->
+ live_regs(N+1, Regs bsr 1).
+
+ensure_contiguous(Regs, Live) ->
+ case (1 bsl Live) - 1 of
+ Regs -> Live;
+ _ -> not_possible
+ end.
x_dead([{x,N}|Rs], Regs) -> x_dead(Rs, Regs band (bnot (1 bsl N)));
x_dead([_|Rs], Regs) -> x_dead(Rs, Regs);
@@ -489,3 +509,14 @@ x_dead([], Regs) -> Regs.
x_live([{x,N}|Rs], Regs) -> x_live(Rs, Regs bor (1 bsl N));
x_live([_|Rs], Regs) -> x_live(Rs, Regs);
x_live([], Regs) -> Regs.
+
+%% defined_regs(ReversedInstructions) -> RegBitmap.
+%% Given a reversed instruction stream, determine the
+%% the registers that are defined.
+
+defined_regs([{'%def',Def}|_], Regs) ->
+ Def bor Regs;
+defined_regs([{set,Ds,_,{alloc,Live,_}}|_], Regs) ->
+ x_live(Ds, Regs bor ((1 bsl Live) - 1));
+defined_regs([{set,Ds,_,_}|Is], Regs) ->
+ defined_regs(Is, x_live(Ds, Regs)).
diff --git a/lib/compiler/src/beam_record.erl b/lib/compiler/src/beam_record.erl
index e4775ca6a4..db1053e48c 100644
--- a/lib/compiler/src/beam_record.erl
+++ b/lib/compiler/src/beam_record.erl
@@ -15,19 +15,12 @@
%%
%% %CopyrightEnd%
%%
-%% File: beam_record.erl
-%% Author: Björn-Egil Dahlberg
-%% Created: 2014-09-03
-%%
-
--module(beam_record).
--export([module/2]).
%% Rewrite the instruction stream on tagged tuple tests.
-%% Tagged tuples means a tuple of any arity with an atom as its first element.
-%% Typically records, ok-tuples and error-tuples.
-%%
-%% from:
+%% Tagged tuples means a tuple of any arity with an atom as its
+%% first element, such as records and error tuples.
+%%
+%% From:
%% ...
%% {test,is_tuple,Fail,[Src]}.
%% {test,test_arity,Fail,[Src,Sz]}.
@@ -36,13 +29,16 @@
%% ...
%% {test,is_eq_exact,Fail,[Dst,Atom]}.
%% ...
-%% to:
+%% To:
%% ...
%% {test,is_tagged_tuple,Fail,[Src,Sz,Atom]}.
%% ...
+%%
+-module(beam_record).
+-export([module/2]).
--import(lists, [reverse/1]).
+-import(lists, [reverse/1,reverse/2]).
-spec module(beam_utils:module_code(), [compile:option()]) ->
{'ok',beam_utils:module_code()}.
@@ -51,55 +47,85 @@ module({Mod,Exp,Attr,Fs0,Lc}, _Opt) ->
Fs = [function(F) || F <- Fs0],
{ok,{Mod,Exp,Attr,Fs,Lc}}.
-function({function,Name,Arity,CLabel,Is}) ->
+function({function,Name,Arity,CLabel,Is0}) ->
try
- Idx = beam_utils:index_labels(Is),
- {function,Name,Arity,CLabel,rewrite(Is,Idx)}
+ Is1 = beam_utils:anno_defs(Is0),
+ Idx = beam_utils:index_labels(Is1),
+ Is = rewrite(reverse(Is1), Idx),
+ {function,Name,Arity,CLabel,Is}
catch
Class:Error:Stack ->
io:fwrite("Function: ~w/~w\n", [Name,Arity]),
erlang:raise(Class, Error, Stack)
end.
-rewrite(Is,Idx) ->
- rewrite(Is,Idx,[]).
+rewrite(Is, Idx) ->
+ rewrite(Is, Idx, 0, []).
-rewrite([{test,is_tuple,Fail,[Src]}=I1,
- {test,test_arity,Fail,[Src,N]}=I2|Is],Idx,Acc) ->
- case is_tagged_tuple(Is,Fail,Src,Idx) of
+rewrite([{test,test_arity,Fail,[Src,N]}=TA,
+ {test,is_tuple,Fail,[Src]}=TT|Is], Idx, Def, Acc0) ->
+ case is_tagged_tuple(Acc0, Def, Fail, Src, Idx) of
no ->
- rewrite(Is,Idx,[I2,I1|Acc]);
- {Atom,[{block,[]}|Is1]} ->
- rewrite(Is1,Idx,[{test,is_tagged_tuple,Fail,[Src,N,Atom]}|Acc]);
- {Atom,Is1} ->
- rewrite(Is1,Idx,[{test,is_tagged_tuple,Fail,[Src,N,Atom]}|Acc])
+ rewrite(Is, Idx, 0, [TT,TA|Acc0]);
+ {yes,Atom,Acc} ->
+ I = {test,is_tagged_tuple,Fail,[Src,N,Atom]},
+ rewrite(Is, Idx, Def, [I|Acc])
end;
-rewrite([I|Is],Idx,Acc) ->
- rewrite(Is,Idx,[I|Acc]);
-rewrite([],_,Acc) -> reverse(Acc).
-
-is_tagged_tuple([{block,[{set,[Dst],[Src],{get_tuple_element,0}}=B|Bs]},
- {test,is_eq_exact,Fail,[Dst,{atom,_}=Atom]}|Is],Fail,Src,Idx) ->
+rewrite([{block,[{'%def',Def}|Bl]}|Is], Idx, _Def, Acc) ->
+ rewrite(Is, Idx, Def, [{block,Bl}|Acc]);
+rewrite([{label,L}=I|Is], Idx0, Def, Acc) ->
+ Idx = beam_utils:index_label(L, Acc, Idx0),
+ rewrite(Is, Idx, Def, [I|Acc]);
+rewrite([I|Is], Idx, Def, Acc) ->
+ rewrite(Is, Idx, Def, [I|Acc]);
+rewrite([], _, _, Acc) -> Acc.
- %% if Dst is killed in the instruction stream and at fail label,
- %% we can safely remove get_tuple_element.
- %%
- %% if Dst is not killed in the stream, we cannot remove get_tuple_element
- %% since it is referenced.
-
- case is_killed(Dst,Is,Fail,Idx) of
- true -> {Atom,[{block,Bs}|Is]};
- false -> {Atom,[{block,[B|Bs]}|Is]}
+is_tagged_tuple([{block,Bl},
+ {test,is_eq_exact,Fail,[Dst,{atom,_}=Atom]}|Is],
+ Def, Fail, Src, Idx) ->
+ case is_tagged_tuple_1(Bl, Is, Fail, Src, Dst, Idx, Def, []) of
+ no ->
+ no;
+ {yes,[]} ->
+ {yes,Atom,Is};
+ {yes,[_|_]=Block} ->
+ {yes,Atom,[{block,Block}|Is]}
end;
-is_tagged_tuple([{block,[{set,_,_,_}=B|Bs]},
- {test,is_eq_exact,_,_}=I|Is],Fail,Src,Idx) ->
- case is_tagged_tuple([{block,Bs},I|Is],Fail,Src,Idx) of
- {Atom,[{block,Bsr}|Isr]} -> {Atom,[{block,[B|Bsr]}|Isr]};
- no -> no
+is_tagged_tuple(_, _, _, _, _) ->
+ no.
+
+is_tagged_tuple_1([{set,[Dst],[Src],{get_tuple_element,0}}=I|Bl],
+ Is, Fail, Src, Dst, Idx, Def, Acc) ->
+ %% Check usage of Dst to find out whether the get_tuple_element
+ %% is needed.
+ case usage(Dst, Is, Fail, Idx) of
+ killed ->
+ %% Safe to remove the get_tuple_element instruction.
+ {yes,reverse(Acc, Bl)};
+ used ->
+ %% Actively used. Must keep instruction.
+ {yes,reverse(Acc, [I|Bl])};
+ not_used ->
+ %% Not actually used (but must be initialized).
+ case is_defined(Dst, Def) of
+ false ->
+ %% Dst must be initialized, but the
+ %% actual value does not matter.
+ Kill = {set,[Dst],[nil],move},
+ {yes,reverse(Acc, [Kill|Bl])};
+ true ->
+ %% The register is previously initialized.
+ %% We can remove the instruction.
+ {yes,reverse(Acc, Bl)}
+ end
end;
-is_tagged_tuple(_Is,_Fail,_Src,_Idx) ->
+is_tagged_tuple_1([I|Bl], Is, Fail, Src, Dst, Idx, Def, Acc) ->
+ is_tagged_tuple_1(Bl, Is, Fail, Src, Dst, Idx, Def, [I|Acc]);
+is_tagged_tuple_1(_, _, _, _, _, _, _, _) ->
no.
-is_killed(Dst,Is,{_,Lbl},Idx) ->
- beam_utils:is_killed(Dst,Is,Idx) andalso
- beam_utils:is_killed_at(Dst,Lbl,Idx).
+usage(Dst, Is, Fail, Idx) ->
+ beam_utils:usage(Dst, [{test,is_number,Fail,[nil]}|Is], Idx).
+
+is_defined({x,X}, Def) ->
+ (Def bsr X) band 1 =:= 1.
diff --git a/lib/compiler/src/beam_utils.erl b/lib/compiler/src/beam_utils.erl
index 941ae95110..e61d6a43b4 100644
--- a/lib/compiler/src/beam_utils.erl
+++ b/lib/compiler/src/beam_utils.erl
@@ -22,11 +22,13 @@
-module(beam_utils).
-export([is_killed_block/2,is_killed/3,is_killed_at/3,
- is_not_used/3,
+ is_not_used/3,usage/3,
empty_label_index/0,index_label/3,index_labels/1,replace_labels/4,
code_at/2,bif_to_test/3,is_pure_test/1,
live_opt/1,delete_live_annos/1,combine_heap_needs/2,
- split_even/1]).
+ anno_defs/1,
+ split_even/1
+ ]).
-export_type([code_index/0,module_code/0,instruction/0]).
@@ -60,6 +62,23 @@
{lbl :: code_index(), %Label to code index.
res :: result_cache()}). %Result cache for each label.
+%% usage(Register, [Instruction], State) -> killed|not_used|used.
+%% Determine the usage of Register in the instruction sequence.
+%% The return value is one of:
+%%
+%% killed - The register is not used in any way.
+%% not_used - The register is referenced only by an allocating instruction
+%% (the actual value does not matter).
+%% used - The register is used (its value do matter).
+
+-spec usage(beam_asm:reg(), [instruction()], code_index()) ->
+ 'killed' | 'not_used' | 'used'.
+
+usage(R, Is, D) ->
+ St = #live{lbl=D,res=gb_trees:empty()},
+ {Usage,_} = check_liveness(R, Is, St),
+ Usage.
+
%% is_killed_block(Register, [Instruction]) -> true|false
%% Determine whether a register is killed by the instruction sequence inside
@@ -293,6 +312,23 @@ combine_heap_needs(H1, H2) ->
combine_alloc_lists([H1,H2]).
+%% anno_defs(Instructions) -> Instructions'
+%% Add {'%def',RegisterBitmap} annotations to the beginning of
+%% each block. Iff bit X is set in the the bitmap, it means
+%% that {x,X} is defined when the block is entered.
+
+-spec anno_defs([instruction()]) -> [instruction()].
+
+anno_defs(Is0) ->
+ {Bef,[Fi|Is1]} =
+ splitwith(fun({func_info,_,_,_}) -> false;
+ (_) -> true
+ end, Is0),
+ {func_info,_,_,Arity} = Fi,
+ Regs = init_def_regs(Arity),
+ Is = defs(Is1, Regs, #{}),
+ Bef ++ [Fi|Is].
+
%% split_even/1
%% [1,2,3,4,5,6] -> {[1,3,5],[2,4,6]}
@@ -300,7 +336,6 @@ combine_heap_needs(H1, H2) ->
split_even(Rs) -> split_even(Rs, [], []).
-
%%%
%%% Local functions.
%%%
@@ -318,6 +353,10 @@ check_liveness(R, [{block,Blk}|Is], St0) ->
case check_liveness_block(R, Blk, St0) of
{transparent,St1} ->
check_liveness(R, Is, St1);
+ {alloc_used,St1} ->
+ %% Used by an allocating instruction, but value not referenced.
+ %% Must check the rest of the instructions.
+ not_used(check_liveness(R, Is, St1));
{Other,_}=Res when is_atom(Other) ->
Res
end;
@@ -376,9 +415,16 @@ check_liveness(R, [{bs_init,_,_,none,Ss,Dst}|Is], St) ->
check_liveness(R, [{bs_init,_,_,Live,Ss,Dst}|Is], St) ->
case R of
{x,X} ->
- case X < Live orelse member(R, Ss) of
- true -> {used,St};
- false -> {killed,St}
+ case member(R, Ss) of
+ true ->
+ {used,St};
+ false ->
+ if
+ X < Live ->
+ not_used(check_liveness(R, Is, St));
+ true ->
+ {killed,St}
+ end
end;
{y,_} ->
case member(R, Ss) of
@@ -588,7 +634,7 @@ check_liveness_ret(R, R, St) -> {used,St};
check_liveness_ret(_, _, St) -> {killed,St}.
%% check_liveness_block(Reg, [Instruction], State) ->
-%% {killed | not_used | used | transparent,State'}
+%% {killed | not_used | used | alloc_used | transparent,State'}
%% Finds out how Reg is used in the instruction sequence inside a block.
%% Returns one of:
%% killed - Reg is assigned a new value or killed by an
@@ -596,6 +642,7 @@ check_liveness_ret(_, _, St) -> {killed,St}.
%% not_used - The value is not used, but the register is referenced
%% e.g. by an allocation instruction
%% transparent - Reg is neither used nor killed
+%% alloc_used - Used only in an allocate instruction
%% used - Reg is explicitly used by an instruction
%%
%% '%live' annotations are not allowed.
@@ -609,7 +656,7 @@ check_liveness_block({x,X}=R, [{set,Ds,Ss,{alloc,Live,Op}}|Is], St0) ->
true ->
case check_liveness_block_1(R, Ss, Ds, Op, Is, St0) of
{killed,St} -> {not_used,St};
- {transparent,St} -> {not_used,St};
+ {transparent,St} -> {alloc_used,St};
{_,_}=Res -> Res
end
end;
@@ -949,3 +996,208 @@ split_even([], Ss, Ds) ->
{reverse(Ss),reverse(Ds)};
split_even([S,D|Rs], Ss, Ds) ->
split_even(Rs, [S|Ss], [D|Ds]).
+
+%%%
+%%% Add annotations for defined registers.
+%%%
+%%% This analysis is done by scanning the instructions in
+%%% execution order.
+%%%
+
+defs([{apply,_}=I|Is], _Regs, D) ->
+ [I|defs(Is, 1, D)];
+defs([{bif,_,{f,Fail},_Src,Dst}=I|Is], Regs0, D) ->
+ Regs = def_regs([Dst], Regs0),
+ [I|defs(Is, Regs, update_regs(Fail, Regs0, D))];
+defs([{block,Block0}|Is], Regs0, D0) ->
+ {Block,Regs,D} = defs_list(Block0, Regs0, D0),
+ [{block,[{'%def',Regs0}|Block]}|defs(Is, Regs, D)];
+defs([{bs_init,{f,L},_,_,_,Dst}=I|Is], Regs0, D) ->
+ Regs = def_regs([Dst], Regs0),
+ [I|defs(Is, Regs, update_regs(L, Regs, D))];
+defs([{bs_put,{f,L},_,_}=I|Is], Regs, D) ->
+ [I|defs(Is, Regs, update_regs(L, Regs, D))];
+defs([build_stacktrace=I|Is], _Regs, D) ->
+ [I|defs(Is, 1, D)];
+defs([{call,_,_}=I|Is], _Regs, D) ->
+ [I|defs(Is, 1, D)];
+defs([{call_ext,_,{extfunc,M,F,A}}=I|Is], _Regs, D) ->
+ case erl_bifs:is_exit_bif(M, F, A) of
+ false ->
+ [I|defs(Is, 1, D)];
+ true ->
+ [I|defs_unreachable(Is, D)]
+ end;
+defs([{call_ext,_,_}=I|Is], _Regs, D) ->
+ [I|defs(Is, 1, D)];
+defs([{call_fun,_}=I|Is], _Regs, D) ->
+ [I|defs(Is, 1, D)];
+defs([{'catch',_,{f,L}}=I|Is], Regs, D) ->
+ RegsAtLabel = init_def_regs(1),
+ [I|defs(Is, Regs, update_regs(L, RegsAtLabel, D))];
+defs([{catch_end,_}=I|Is], _Regs, D) ->
+ Regs = init_def_regs(1),
+ [I|defs(Is, Regs, D)];
+defs([{gc_bif,_,{f,Fail},Live,_Src,Dst}=I|Is], Regs0, D) ->
+ true = all_defined(Live, Regs0), %Assertion.
+ Regs = def_regs([Dst], init_def_regs(Live)),
+ [I|defs(Is, Regs, update_regs(Fail, Regs0, D))];
+defs([{get_map_elements,{f,L},_Src,{list,DstList}}=I|Is], Regs0, D) ->
+ {_,Ds} = beam_utils:split_even(DstList),
+ Regs = def_regs(Ds, Regs0),
+ [I|defs(Is, Regs, update_regs(L, Regs0, D))];
+defs([{get_tuple_element,_,_,Dst}=I|Is], Regs0, D) ->
+ Regs = def_regs([Dst], Regs0),
+ [I|defs(Is, Regs, D)];
+defs([{jump,{f,L}}=I|Is], Regs, D) ->
+ [I|defs_unreachable(Is, update_regs(L, Regs, D))];
+defs([{label,L}=I|Is], Regs0, D) ->
+ case D of
+ #{L:=Regs1} ->
+ Regs = Regs0 band Regs1,
+ [I|defs(Is, Regs, D)];
+ #{} ->
+ [I|defs(Is, Regs0, D)]
+ end;
+defs([{loop_rec,{f,L},{x,0}}=I|Is], _Regs, D0) ->
+ RegsAtLabel = init_def_regs(0),
+ D = update_regs(L, RegsAtLabel, D0),
+ [I|defs(Is, init_def_regs(1), D)];
+defs([{loop_rec_end,_}=I|Is], _Regs, D) ->
+ [I|defs(Is, 0, D)];
+defs([{make_fun2,_,_,_,_}=I|Is], _Regs, D) ->
+ [I|defs(Is, 1, D)];
+defs([{move,_,Dst}=I|Is], Regs0, D) ->
+ Regs = def_regs([Dst], Regs0),
+ [I|defs(Is, Regs, D)];
+defs([{put_map,{f,Fail},_,_,Dst,_,_}=I|Is], Regs0, D) ->
+ Regs = def_regs([Dst], Regs0),
+ [I|defs(Is, Regs, update_regs(Fail, Regs0, D))];
+defs([return=I|Is], _Regs, D) ->
+ [I|defs_unreachable(Is, D)];
+defs([{select,_,_Src,Fail,List}=I|Is], Regs, D0) ->
+ D = update_list([Fail|List], Regs, D0),
+ [I|defs_unreachable(Is, D)];
+defs([{test,_,{f,L},_}=I|Is], Regs, D) ->
+ [I|defs(Is, Regs, update_regs(L, Regs, D))];
+defs([{test,_,{f,L},Live,_,Dst}=I|Is], Regs0, D) ->
+ true = all_defined(Live, Regs0), %Assertion.
+ Regs = def_regs([Dst], init_def_regs(Live)),
+ [I|defs(Is, Regs, update_regs(L, Regs0, D))];
+defs([{'try',_,{f,L}}=I|Is], Regs, D) ->
+ RegsAtLabel = init_def_regs(3),
+ [I|defs(Is, Regs, update_regs(L, RegsAtLabel, D))];
+defs([{try_case,_}=I|Is], _Regs, D) ->
+ [I|defs(Is, init_def_regs(3), D)];
+defs([{wait,_}=I|Is], _Regs, D) ->
+ [I|defs_unreachable(Is, D)];
+defs([{wait_timeout,_,_}=I|Is], _Regs, D) ->
+ [I|defs(Is, 0, D)];
+
+%% Exceptions.
+defs([{badmatch,_}=I|Is], _Regs, D) ->
+ [I|defs_unreachable(Is, D)];
+defs([{case_end,_}=I|Is], _Regs, D) ->
+ [I|defs_unreachable(Is, D)];
+defs([if_end=I|Is], _Regs, D) ->
+ [I|defs_unreachable(Is, D)];
+defs([{try_case_end,_}=I|Is], _Regs, D) ->
+ [I|defs_unreachable(Is, D)];
+
+%% Neutral instructions
+defs([{bs_context_to_binary,_}=I|Is], Regs, D) ->
+ [I|defs(Is, Regs, D)];
+defs([{bs_restore2,_,_}=I|Is], Regs, D) ->
+ [I|defs(Is, Regs, D)];
+defs([{bs_save2,_,_}=I|Is], Regs, D) ->
+ [I|defs(Is, Regs, D)];
+defs([{deallocate,_}=I|Is], Regs, D) ->
+ [I|defs(Is, Regs, D)];
+defs([{kill,_}=I|Is], Regs, D) ->
+ [I|defs(Is, Regs, D)];
+defs([{line,_}=I|Is], Regs, D) ->
+ [I|defs(Is, Regs, D)];
+defs([{recv_mark,_}=I|Is], Regs, D) ->
+ [I|defs(Is, Regs, D)];
+defs([{recv_set,_}=I|Is], Regs, D) ->
+ [I|defs(Is, Regs, D)];
+defs([timeout=I|Is], Regs, D) ->
+ [I|defs(Is, Regs, D)];
+defs([{trim,_,_}=I|Is], Regs, D) ->
+ [I|defs(Is, Regs, D)];
+defs([{try_end,_}=I|Is], Regs, D) ->
+ [I|defs(Is, Regs, D)];
+defs([{'%',_}=I|Is], Regs, D) ->
+ [I|defs(Is, Regs, D)];
+defs([], _, _) -> [].
+
+defs_unreachable([{label,L}=I|Is], D) ->
+ case D of
+ #{L:=Regs} ->
+ [I|defs(Is, Regs, D)];
+ #{} ->
+ defs_unreachable(Is, D)
+ end;
+defs_unreachable([_|Is], D) ->
+ defs_unreachable(Is, D);
+defs_unreachable([], _D) -> [].
+
+defs_list(Is, Regs, D) ->
+ defs_list(Is, Regs, D, []).
+
+defs_list([{set,Ds,_,{alloc,Live,Info}}=I|Is], Regs0, D0, Acc) ->
+ true = all_defined(Live, Regs0), %Assertion.
+ D = case Info of
+ {gc_bif,_,{f,Fail}} ->
+ update_regs(Fail, Regs0, D0);
+ {put_map,_,{f,Fail}} ->
+ update_regs(Fail, Regs0, D0);
+ _ ->
+ D0
+ end,
+ Regs = def_regs(Ds, init_def_regs(Live)),
+ defs_list(Is, Regs, D, [I|Acc]);
+defs_list([{set,Ds,_,Info}=I|Is], Regs0, D0, Acc) ->
+ D = case Info of
+ {bif,_,{f,Fail}} ->
+ update_regs(Fail, Regs0, D0);
+ {try_catch,'catch',{f,Fail}} ->
+ update_regs(Fail, init_def_regs(1), D0);
+ {try_catch,'try',{f,Fail}} ->
+ update_regs(Fail, init_def_regs(3), D0);
+ _ ->
+ D0
+ end,
+ Regs = def_regs(Ds, Regs0),
+ defs_list(Is, Regs, D, [I|Acc]);
+defs_list([], Regs, D, Acc) ->
+ {reverse(Acc),Regs,D}.
+
+init_def_regs(Arity) ->
+ (1 bsl Arity) - 1.
+
+def_regs([{x,X}|T], Regs) ->
+ def_regs(T, Regs bor (1 bsl X));
+def_regs([_|T], Regs) ->
+ def_regs(T, Regs);
+def_regs([], Regs) -> Regs.
+
+update_list([{f,L}|T], Regs, D0) ->
+ D = update_regs(L, Regs, D0),
+ update_list(T, Regs, D);
+update_list([_|T], Regs, D) ->
+ update_list(T, Regs, D);
+update_list([], _Regs, D) -> D.
+
+update_regs(L, Regs0, D) ->
+ case D of
+ #{L:=Regs1} ->
+ Regs = Regs0 band Regs1,
+ D#{L:=Regs};
+ #{} ->
+ D#{L=>Regs0}
+ end.
+
+all_defined(Live, Regs) ->
+ All = (1 bsl Live) - 1,
+ Regs band All =:= All.
diff --git a/lib/compiler/src/v3_codegen.erl b/lib/compiler/src/v3_codegen.erl
index 8189420c1c..62c6c54b9f 100644
--- a/lib/compiler/src/v3_codegen.erl
+++ b/lib/compiler/src/v3_codegen.erl
@@ -446,7 +446,7 @@ cg_fun(Les, Hvs, Vdb, AtomMod, NameArity, Anno, St0) ->
put_reg(V, Reg)
end, [], Hvs),
stk=[]}, 0, Vdb),
- {B,_Aft,St} = cg_list(Les, 0, Vdb, Bef,
+ {B,_Aft,St} = cg_list(Les, Vdb, Bef,
St3#cg{bfail=0,
ultimate_failure=UltimateMatchFail,
is_top_block=true}),
@@ -498,19 +498,19 @@ cg(#cg_need_heap{h=H}, _Vdb, Bef, St) ->
%% cg_list([Kexpr], FirstI, Vdb, StackReg, St) -> {[Ainstr],StackReg,St}.
-cg_list(Kes, I, Vdb, Bef, St0) ->
+cg_list(Kes, Vdb, Bef, St0) ->
{Keis,{Aft,St1}} =
flatmapfoldl(fun (Ke, {Inta,Sta}) ->
{Keis,Intb,Stb} = cg(Ke, Vdb, Inta, Sta),
{Keis,{Intb,Stb}}
- end, {Bef,St0}, need_heap(Kes, I)),
+ end, {Bef,St0}, need_heap(Kes)),
{Keis,Aft,St1}.
%% need_heap([Lkexpr], I, St) -> [Lkexpr].
%% Insert need_heap instructions in Kexpr list. Try to be smart and
%% collect them together as much as possible.
-need_heap(Kes0, _I) ->
+need_heap(Kes0) ->
{Kes,H} = need_heap_0(reverse(Kes0), 0, []),
%% Prepend need_heap if necessary.
@@ -741,26 +741,55 @@ block_cg(Es, Le, _Vdb, Bef, St) ->
block_cg(Es, Le, Bef, St).
block_cg(Es, Le, Bef, #cg{is_top_block=false}=St) ->
- cg_block(Es, Le#l.i, Le#l.vdb, Bef, St);
-block_cg(Es, Le, Bef, St0) ->
- {Is0,Aft,St} = cg_block(Es, Le#l.i, Le#l.vdb, Bef,
- St0#cg{is_top_block=false,need_frame=false}),
- Is = top_level_block(Is0, Aft, max_reg(Bef#sr.reg), St),
- {Is,Aft,St#cg{is_top_block=true}}.
-
-cg_block([], _I, _Vdb, Bef, St0) ->
+ cg_block(Es, Le#l.vdb, Bef, St);
+block_cg(Es, Le, Bef, #cg{is_top_block=true}=St0) ->
+ %% No stack frame has been established yet. Do we need one?
+ case need_stackframe(Es) of
+ true ->
+ %% We need a stack frame. Generate the code and add the
+ %% code for creating and deallocating the stack frame.
+ {Is0,Aft,St} = cg_block(Es, Le#l.vdb, Bef,
+ St0#cg{is_top_block=false,need_frame=false}),
+ Is = top_level_block(Is0, Aft, max_reg(Bef#sr.reg), St),
+ {Is,Aft,St#cg{is_top_block=true}};
+ false ->
+ %% This sequence of instructions ending in a #k_match{} (a
+ %% 'case' or 'if') in the Erlang code does not need a
+ %% stack frame yet. Delay the creation (if a stack frame
+ %% is needed at all, it will be created inside the
+ %% #k_match{}).
+ cg_list(Es, Le#l.vdb, Bef, St0)
+ end.
+
+%% need_stackframe([Kexpr]) -> true|false.
+%% Does this list of instructions need a stack frame?
+%%
+%% A sequence of instructions that don't clobber the X registers
+%% followed by a single #k_match{} doesn't need a stack frame.
+
+need_stackframe([H|T]) ->
+ case H of
+ #k_bif{op=#k_internal{}} -> true;
+ #k_put{arg=#k_binary{}} -> true;
+ #k_bif{} -> need_stackframe(T);
+ #k_put{} -> need_stackframe(T);
+ #k_guard_match{} -> need_stackframe(T);
+ #k_match{} when T =:= [] -> false;
+ _ -> true
+ end;
+need_stackframe([]) -> false.
+
+cg_block([], _Vdb, Bef, St0) ->
{[],Bef,St0};
-cg_block(Kes0, I, Vdb, Bef, St0) ->
+cg_block(Kes0, Vdb, Bef, St0) ->
{Kes2,Int1,St1} =
case basic_block(Kes0) of
{Kes1,LastI,Args,Rest} ->
- Ke = hd(Kes1),
- #l{i=Fb} = get_kanno(Ke),
- cg_basic_block(Kes1, Fb, LastI, Args, Vdb, Bef, St0);
+ cg_basic_block(Kes1, LastI, Args, Vdb, Bef, St0);
{Kes1,Rest} ->
- cg_list(Kes1, I, Vdb, Bef, St0)
+ cg_list(Kes1, Vdb, Bef, St0)
end,
- {Kes3,Int2,St2} = cg_block(Rest, I, Vdb, Int1, St1),
+ {Kes3,Int2,St2} = cg_block(Rest, Vdb, Int1, St1),
{Kes2 ++ Kes3,Int2,St2}.
basic_block(Kes) -> basic_block(Kes, []).
@@ -838,12 +867,12 @@ func_vars(_) -> [].
%% save_carefully/4 during code generation to only save the variables
%% that can be saved without growing the stack frame.
-cg_basic_block(Kes, Fb, Lf, As, Vdb, Bef, St0) ->
+cg_basic_block(Kes, Lf, As, Vdb, Bef, St0) ->
Int0 = reserve_arg_regs(As, Bef),
Int = extend_stack(Int0, Lf, Lf+1, Vdb),
{Keis,{Aft,St1}} =
flatmapfoldl(fun(Ke, St) -> cg_basic_block(Ke, St, Lf, Vdb) end,
- {Int,St0}, need_heap(Kes, Fb)),
+ {Int,St0}, need_heap(Kes)),
{Keis,Aft,St1}.
cg_basic_block(#cg_need_heap{}=Ke, {Bef,St0}, _Lf, Vdb) ->
@@ -1478,8 +1507,8 @@ guard_clause_cg(#k_guard_clause{anno=#l{vdb=Vdb},guard=G,body=B}, Fail, Bef, St0
%% the correct exit point. Primops and tests all go to the next
%% instruction on success or jump to a failure label.
-guard_cg(#k_protected{arg=Ts,ret=Rs,anno=#l{i=I,vdb=Pdb}}, Fail, _Vdb, Bef, St) ->
- protected_cg(Ts, Rs, Fail, I, Pdb, Bef, St);
+guard_cg(#k_protected{arg=Ts,ret=Rs,anno=#l{vdb=Pdb}}, Fail, _Vdb, Bef, St) ->
+ protected_cg(Ts, Rs, Fail, Pdb, Bef, St);
guard_cg(#k_test{anno=#l{i=I},op=Test0,args=As,inverted=Inverted},
Fail, Vdb, Bef, St0) ->
#k_remote{mod=#k_atom{val=erlang},name=#k_atom{val=Test}} = Test0,
@@ -1500,13 +1529,13 @@ guard_cg(G, _Fail, Vdb, Bef, St) ->
%% guard_cg_list([Kexpr], Fail, I, Vdb, StackReg, St) ->
%% {[Ainstr],StackReg,St}.
-guard_cg_list(Kes, Fail, I, Vdb, Bef, St0) ->
+guard_cg_list(Kes, Fail, Vdb, Bef, St0) ->
{Keis,{Aft,St1}} =
flatmapfoldl(fun (Ke, {Inta,Sta}) ->
{Keis,Intb,Stb} =
guard_cg(Ke, Fail, Vdb, Inta, Sta),
{Keis,{Intb,Stb}}
- end, {Bef,St0}, need_heap(Kes, I)),
+ end, {Bef,St0}, need_heap(Kes)),
{Keis,Aft,St1}.
%% protected_cg([Kexpr], [Ret], Fail, I, Vdb, Bef, St) -> {[Ainstr],Aft,St}.
@@ -1516,15 +1545,14 @@ guard_cg_list(Kes, Fail, I, Vdb, Bef, St0) ->
%% return values then these must be set to 'false' on failure,
%% control always passes to the next instruction.
-protected_cg(Ts, [], Fail, I, Vdb, Bef, St0) ->
+protected_cg(Ts, [], Fail, Vdb, Bef, St0) ->
%% Protect these calls, revert when done.
- {Tis,Aft,St1} = guard_cg_list(Ts, Fail, I, Vdb, Bef,
- St0#cg{bfail=Fail}),
+ {Tis,Aft,St1} = guard_cg_list(Ts, Fail, Vdb, Bef, St0#cg{bfail=Fail}),
{Tis,Aft,St1#cg{bfail=St0#cg.bfail}};
-protected_cg(Ts, Rs, _Fail, I, Vdb, Bef, St0) ->
+protected_cg(Ts, Rs, _Fail, Vdb, Bef, St0) ->
{Pfail,St1} = new_label(St0),
{Psucc,St2} = new_label(St1),
- {Tis,Aft,St3} = guard_cg_list(Ts, Pfail, I, Vdb, Bef,
+ {Tis,Aft,St3} = guard_cg_list(Ts, Pfail, Vdb, Bef,
St2#cg{bfail=Pfail}),
%%ok = io:fwrite("cg ~w: ~p~n", [?LINE,{Rs,I,Vdb,Aft}]),
%% Set return values to false.
@@ -1896,17 +1924,17 @@ cg_recv_wait(#k_atom{val=infinity}, #cg_block{anno=Le,es=Tes}, I, Bef, St0) ->
%% But to keep the stack and register information up to date,
%% we will generate the code for the 'after' body, and then discard it.
Int1 = clear_dead(Bef, I, Le#l.vdb),
- {_,Int2,St1} = cg_block(Tes, Le#l.i, Le#l.vdb,
+ {_,Int2,St1} = cg_block(Tes, Le#l.vdb,
Int1#sr{reg=clear_regs(Int1#sr.reg)}, St0),
{[{wait,{f,St1#cg.recv}}],Int2,St1};
cg_recv_wait(#k_int{val=0}, #cg_block{anno=Le,es=Tes}, _I, Bef, St0) ->
- {Tis,Int,St1} = cg_block(Tes, Le#l.i, Le#l.vdb, Bef, St0),
+ {Tis,Int,St1} = cg_block(Tes, Le#l.vdb, Bef, St0),
{[timeout|Tis],Int,St1};
cg_recv_wait(Te, #cg_block{anno=Le,es=Tes}, I, Bef, St0) ->
Reg = cg_reg_arg(Te, Bef),
%% Must have empty registers here! Bug if anything in registers.
Int0 = clear_dead(Bef, I, Le#l.vdb),
- {Tis,Int,St1} = cg_block(Tes, Le#l.i, Le#l.vdb,
+ {Tis,Int,St1} = cg_block(Tes, Le#l.vdb,
Int0#sr{reg=clear_regs(Int0#sr.reg)}, St0),
{[{wait_timeout,{f,St1#cg.recv},Reg},timeout] ++ Tis,Int,St1}.
@@ -1967,7 +1995,7 @@ catch_cg(#cg_block{es=C}, #k_var{name=R}, Le, Vdb, Bef, St0) ->
CatchTag = Le#l.i,
Int1 = Bef#sr{stk=put_catch(CatchTag, Bef#sr.stk)},
CatchReg = fetch_stack({catch_tag,CatchTag}, Int1#sr.stk),
- {Cis,Int2,St2} = cg_block(C, Le#l.i, Le#l.vdb, Int1,
+ {Cis,Int2,St2} = cg_block(C, Le#l.vdb, Int1,
St1#cg{break=B,in_catch=true}),
[] = Int2#sr.reg, %Assertion.
Aft = Int2#sr{reg=[{0,R}],stk=drop_catch(CatchTag, Int2#sr.stk)},