aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjörn Gustavsson <[email protected]>2015-07-07 10:45:38 +0200
committerBjörn Gustavsson <[email protected]>2015-08-21 15:55:35 +0200
commitc288ab87fd6cafe22ce46be551baa2e815b495b0 (patch)
treebda0b5f6646ae4b00ffca4df5ba9dc4a8e97f641
parent5f431276f1044c673c2e434e003e2f1ffddab341 (diff)
downloadotp-c288ab87fd6cafe22ce46be551baa2e815b495b0.tar.gz
otp-c288ab87fd6cafe22ce46be551baa2e815b495b0.tar.bz2
otp-c288ab87fd6cafe22ce46be551baa2e815b495b0.zip
Delay get_tuple_element instructions until they are needed
When matching tuples, the pattern matching compiler would generate code that would fetch all elements of the tuple that will ultimately be used, *before* testing that (for example) the first element is the correct record tag. For example: is_tuple Fail {x,0} test_arity Fail {x,0} 3 get_tuple_element {x,0} 0 {x,1} get_tuple_element {x,0} 1 {x,2} get_tuple_element {x,0} 2 {x,3} is_eq_exact Fail {x,1} some_tag If {x,2} and {x,3} are not used at label Fail, we can re-arrange the code like this: is_tuple Fail {x,0} test_arity Fail {x,0} 3 get_tuple_element {x,0} 0 {x,1} is_eq_exact Fail {x,1} some_tag get_tuple_element {x,0} 1 {x,2} get_tuple_element {x,0} 2 {x,3} Doing that may be beneficial in two ways. If the branch is taken, we have eliminated the execution of two unnecessary instructions. Even if the branch is never or rarely taken, there is the possibility for more optimizations following the is_eq_exact instructions. For example, imagine that the code looks like this: get_tuple_element {x,0} 1 {x,2} get_tuple_element {x,0} 2 {x,3} move {x,2} {y,0} move {x,3} {y,1} Assuming that {x,2} and {x,3} have no further uses in the code that follows, that can be rewritten to: get_tuple_element {x,0} 1 {y,0} get_tuple_element {x,0} 2 {y,1} When should we perform this optimization? At the very latest, it must be done before opt_blocks/1 in beam_block which does the elimination of unnecessary moves. Actually, we want do the optimization before the blocks have been established, since moving instructions out of one block into another is cumbersome. Therefore, we will do the optimization in a new pass that is run before beam_block. A new pass will make debugging easier, and beam_block already has a fair number of sub passes.
-rw-r--r--lib/compiler/src/Makefile1
-rw-r--r--lib/compiler/src/beam_reorder.erl113
-rw-r--r--lib/compiler/src/beam_utils.erl9
-rw-r--r--lib/compiler/src/compile.erl4
-rw-r--r--lib/compiler/src/compiler.app.src1
-rw-r--r--lib/compiler/test/misc_SUITE.erl8
6 files changed, 135 insertions, 1 deletions
diff --git a/lib/compiler/src/Makefile b/lib/compiler/src/Makefile
index 299b2892fc..ae4007c61c 100644
--- a/lib/compiler/src/Makefile
+++ b/lib/compiler/src/Makefile
@@ -62,6 +62,7 @@ MODULES = \
beam_opcodes \
beam_peep \
beam_receive \
+ beam_reorder \
beam_split \
beam_trim \
beam_type \
diff --git a/lib/compiler/src/beam_reorder.erl b/lib/compiler/src/beam_reorder.erl
new file mode 100644
index 0000000000..3230e33dbd
--- /dev/null
+++ b/lib/compiler/src/beam_reorder.erl
@@ -0,0 +1,113 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1999-2013. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+-module(beam_reorder).
+
+-export([module/2]).
+-import(lists, [member/2,reverse/1]).
+
+module({Mod,Exp,Attr,Fs0,Lc}, _Opt) ->
+ Fs = [function(F) || F <- Fs0],
+ {ok,{Mod,Exp,Attr,Fs,Lc}}.
+
+function({function,Name,Arity,CLabel,Is0}) ->
+ try
+ Is = reorder(Is0),
+ {function,Name,Arity,CLabel,Is}
+ catch
+ Class:Error ->
+ Stack = erlang:get_stacktrace(),
+ io:fwrite("Function: ~w/~w\n", [Name,Arity]),
+ erlang:raise(Class, Error, Stack)
+ end.
+
+%% reorder(Instructions0) -> Instructions
+%% Reorder instructions before the beam_block pass, because reordering
+%% will be more cumbersome when the blocks are in place.
+%%
+%% Execution of get_tuple_element instructions can be delayed until
+%% they are actually needed. Consider the sequence:
+%%
+%% get_tuple_element Tuple Pos Dst
+%% test Test Fail Operands
+%%
+%% If Dst is killed at label Fail (and not referenced in Operands),
+%% we can can swap the instructions:
+%%
+%% test Test Fail Operands
+%% get_tuple_element Tuple Pos Dst
+%%
+%% That can be beneficial in two ways: Firstly, if the branch is taken
+%% we have avoided execution of the get_tuple_element instruction.
+%% Secondly, even if the branch is not taken, subsequent optimization
+%% (opt_blocks/1) may be able to change Dst to the final destination
+%% register and eliminate a 'move' instruction.
+
+reorder(Is) ->
+ D = beam_utils:index_labels(Is),
+ reorder_1(Is, D, []).
+
+reorder_1([{label,L}=I|_], D, Acc) ->
+ Is = beam_utils:code_at(L, D),
+ reorder_1(Is, D, [I|Acc]);
+reorder_1([{test,is_nonempty_list,_,_}=I|Is], D, Acc) ->
+ %% The run-time system may combine the is_nonempty_list test with
+ %% the following get_list instruction.
+ reorder_1(Is, D, [I|Acc]);
+reorder_1([{test,_,_,_}=I,
+ {select,_,_,_,_}=S|Is], D, Acc) ->
+ %% There is nothing to gain by inserting a get_tuple_element
+ %% instruction between the test instruction and the select
+ %% instruction.
+ reorder_1(Is, D, [S,I|Acc]);
+reorder_1([{test,_,{f,L},Ss}=I|Is0], D0,
+ [{get_tuple_element,_,_,El}=G|Acc0]=Acc) ->
+ case member(El, Ss) of
+ true ->
+ reorder_1(Is0, D0, [I|Acc]);
+ false ->
+ case beam_utils:is_killed_at(El, L, D0) of
+ true ->
+ Is = [I,G|Is0],
+ reorder_1(Is, D0, Acc0);
+ false ->
+ case beam_utils:is_killed(El, Is0, D0) of
+ true ->
+ Code0 = beam_utils:code_at(L, D0),
+ Code = [G|Code0],
+ D = beam_utils:index_label(L, Code, D0),
+ Is = [I|Is0],
+ reorder_1(Is, D, Acc0);
+ false ->
+ reorder_1(Is0, D0, [I|Acc])
+ end
+ end
+ end;
+reorder_1([{allocate_zero,N,Live}|Is], D,
+ [{get_tuple_element,_,_,{x,X}}=G|Acc])
+ when X+1 =:= Live ->
+ %% Move allocation instruction upwards past get_tuple_element
+ %% instructions to give more opportunities for moving
+ %% get_tuple_element instructions.
+ I = {allocate_zero,N,X},
+ reorder_1([I,G|Is], D, Acc);
+reorder_1([I|Is], D, Acc) ->
+ reorder_1(Is, D, [I|Acc]);
+reorder_1([], _, Acc) -> reverse(Acc).
diff --git a/lib/compiler/src/beam_utils.erl b/lib/compiler/src/beam_utils.erl
index fbcd5de1bb..68d6105cfa 100644
--- a/lib/compiler/src/beam_utils.erl
+++ b/lib/compiler/src/beam_utils.erl
@@ -484,6 +484,15 @@ check_liveness(R, [{get_map_elements,{f,Fail},S,{list,L}}|Is], St0) ->
Other
end
end;
+check_liveness(R, [{test_heap,N,Live}|Is], St) ->
+ I = {block,[{set,[],[],{alloc,Live,{nozero,nostack,N,[]}}}]},
+ check_liveness(R, [I|Is], St);
+check_liveness(R, [{allocate_zero,N,Live}|Is], St) ->
+ I = {block,[{set,[],[],{alloc,Live,{zero,N,0,[]}}}]},
+ check_liveness(R, [I|Is], St);
+check_liveness(R, [{get_list,S,D1,D2}|Is], St) ->
+ I = {block,[{set,[D1,D2],[S],get_list}]},
+ check_liveness(R, [I|Is], St);
check_liveness(_R, Is, St) when is_list(Is) ->
%% case Is of
%% [I|_] ->
diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl
index cf79fdc9f9..605f5b8fd5 100644
--- a/lib/compiler/src/compile.erl
+++ b/lib/compiler/src/compile.erl
@@ -673,7 +673,9 @@ asm_passes() ->
[{pass,beam_a},
{iff,da,{listing,"a"}},
{unless,no_postopt,
- [{pass,beam_block},
+ [{unless,no_reorder,{pass,beam_reorder}},
+ {iff,dre,{listing,"reorder"}},
+ {pass,beam_block},
{iff,dblk,{listing,"block"}},
{unless,no_except,{pass,beam_except}},
{iff,dexcept,{listing,"except"}},
diff --git a/lib/compiler/src/compiler.app.src b/lib/compiler/src/compiler.app.src
index afb85f4710..62ea9cee80 100644
--- a/lib/compiler/src/compiler.app.src
+++ b/lib/compiler/src/compiler.app.src
@@ -37,6 +37,7 @@
beam_opcodes,
beam_peep,
beam_receive,
+ beam_reorder,
beam_split,
beam_trim,
beam_type,
diff --git a/lib/compiler/test/misc_SUITE.erl b/lib/compiler/test/misc_SUITE.erl
index 8606935504..3582e055c8 100644
--- a/lib/compiler/test/misc_SUITE.erl
+++ b/lib/compiler/test/misc_SUITE.erl
@@ -192,6 +192,14 @@ silly_coverage(Config) when is_list(Config) ->
{label,2}|non_proper_list]}],99},
expect_error(fun() -> beam_a:module(BeamAInput, []) end),
+ %% beam_reorder
+ BlockInput = {?MODULE,[{foo,0}],[],
+ [{function,foo,0,2,
+ [{label,1},
+ {func_info,{atom,?MODULE},{atom,foo},0},
+ {label,2}|non_proper_list]}],99},
+ expect_error(fun() -> beam_reorder:module(BlockInput, []) end),
+
%% beam_block
BlockInput = {?MODULE,[{foo,0}],[],
[{function,foo,0,2,