aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorBjörn Gustavsson <[email protected]>2017-12-11 22:00:24 +0100
committerBjörn Gustavsson <[email protected]>2017-12-15 12:31:29 +0100
commitb89044a800c4950072b14d5e372cb2db55f1966c (patch)
tree07787239d192171ead8b1521e343c5cae0d5811e /lib
parentcd708cf3cd5ea4402ea8cec2d9570506b7bf8e92 (diff)
downloadotp-b89044a800c4950072b14d5e372cb2db55f1966c.tar.gz
otp-b89044a800c4950072b14d5e372cb2db55f1966c.tar.bz2
otp-b89044a800c4950072b14d5e372cb2db55f1966c.zip
v3_codegen: Delay creation of stack frames
v3_codegen currently wraps a stack frame around each clause in a function (unless the clause is simple without any 'case' or other complex constructions). Consider this function: f({a,X}) -> A = abs(X), case A of 0 -> {result,"0"}; _ -> {result,integer_to_list(A)} end; f(_) -> error. The first clause needs a stack frame because there is a function call to integer_to_list/1 not in the tail position. v3_codegen currently wraps the entire first clause in stack frame. We can delay the creation of the stack frame, and create a stack frame in each arm of the 'case' (if needed): f({a,X}) -> A = abs(X), case A of 0 -> %% Don't create a stack frame here. {result,"0"}; _ -> %% Create a stack frame here. {result,integer_to_list(A)} end; f(_) -> error. There are pros and cons of this approach. The cons are that the code size may increase if there are many 'case' clauses and each needs its own stack frame. The allocation instructions may also interfere with other optimizations, but the new optimizations introduced in previous commits will mitigate most of those issues. The pros are the following: * For some clauses in a 'case', there is no need to create any stack frame at all. * Often when moving an allocation instruction into a 'case' clause, the slightly cheaper 'allocate' instruction can be used instead of 'allocate_zero'. There is also the possibility that the allocate instruction can be be combined with a 'test_heap' instruction. * Each stack frame for each arm of the 'case' will have exactly as many slots as needed.
Diffstat (limited to 'lib')
-rw-r--r--lib/compiler/src/v3_codegen.erl41
1 files changed, 36 insertions, 5 deletions
diff --git a/lib/compiler/src/v3_codegen.erl b/lib/compiler/src/v3_codegen.erl
index 4c2a1c6dea..2138321a39 100644
--- a/lib/compiler/src/v3_codegen.erl
+++ b/lib/compiler/src/v3_codegen.erl
@@ -743,11 +743,42 @@ block_cg(Es, Le, _Vdb, Bef, St) ->
block_cg(Es, Le, Bef, #cg{is_top_block=false}=St) ->
cg_block(Es, Le#l.vdb, Bef, St);
-block_cg(Es, Le, Bef, St0) ->
- {Is0,Aft,St} = cg_block(Es, Le#l.vdb, Bef,
- St0#cg{is_top_block=false,need_frame=false}),
- Is = top_level_block(Is0, Aft, max_reg(Bef#sr.reg), St),
- {Is,Aft,St#cg{is_top_block=true}}.
+block_cg(Es, Le, Bef, #cg{is_top_block=true}=St0) ->
+ %% No stack frame has been established yet. Do we need one?
+ case need_stackframe(Es) of
+ true ->
+ %% We need a stack frame. Generate the code and add the
+ %% code for creating and deallocating the stack frame.
+ {Is0,Aft,St} = cg_block(Es, Le#l.vdb, Bef,
+ St0#cg{is_top_block=false,need_frame=false}),
+ Is = top_level_block(Is0, Aft, max_reg(Bef#sr.reg), St),
+ {Is,Aft,St#cg{is_top_block=true}};
+ false ->
+ %% This sequence of instructions ending in a #k_match{} (a
+ %% 'case' or 'if') in the Erlang code does not need a
+ %% stack frame yet. Delay the creation (if a stack frame
+ %% is needed at all, it will be created inside the
+ %% #k_match{}).
+ cg_list(Es, Le#l.vdb, Bef, St0)
+ end.
+
+%% need_stackframe([Kexpr]) -> true|false.
+%% Does this list of instructions need a stack frame?
+%%
+%% A sequence of instructions that don't clobber the X registers
+%% followed by a single #k_match{} doesn't need a stack frame.
+
+need_stackframe([H|T]) ->
+ case H of
+ #k_bif{op=#k_internal{}} -> true;
+ #k_put{arg=#k_binary{}} -> true;
+ #k_bif{} -> need_stackframe(T);
+ #k_put{} -> need_stackframe(T);
+ #k_guard_match{} -> need_stackframe(T);
+ #k_match{} when T =:= [] -> false;
+ _ -> true
+ end;
+need_stackframe([]) -> false.
cg_block([], _Vdb, Bef, St0) ->
{[],Bef,St0};