aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjörn Gustavsson <[email protected]>2018-01-22 08:04:44 +0100
committerBjörn Gustavsson <[email protected]>2018-01-24 12:34:24 +0100
commite7b49a3e657e1bd7bbf92050e6360b1e0f142a1e (patch)
tree5f39dca92303acc7547c87796719f3862a5e62fd
parentd5fbb64374247af0a90974a9c37a3eba93774f28 (diff)
downloadotp-e7b49a3e657e1bd7bbf92050e6360b1e0f142a1e.tar.gz
otp-e7b49a3e657e1bd7bbf92050e6360b1e0f142a1e.tar.bz2
otp-e7b49a3e657e1bd7bbf92050e6360b1e0f142a1e.zip
Apply common subexpression elimination in blocks
Eliminate repeated evaluation of guard BIFs and building of cons cells in blocks. This optimization is applicable in more places than might be expected, because code generation for binaries and record can generate common sub expressions not visible in the original source code. For example, consider this function: make_binary(Term) -> Bin = term_to_binary(Term), Size = byte_size(Bin), <<Size:32,Bin/binary>>. The compiler inserts a call to byte_size/2 to calculate the size of the binary being built: {function, make_binary, 1, 2}. {label,1}. {line,...}. {func_info,{atom,t},{atom,make_binary},1}. {label,2}. {allocate,0,1}. {line,...}. {call_ext,1,{extfunc,erlang,term_to_binary,1}}. {line,...}. {gc_bif,byte_size,{f,0},1,[{x,0}],{x,1}}. %Present in original code. {line,...}. {gc_bif,byte_size,{f,0},2,[{x,0}],{x,2}}. %Inserted by compiler. {bs_add,{f,0},[{x,2},{integer,4},1],{x,2}}. {bs_init2,{f,0},{x,2},0,2,{field_flags,[]},{x,2}}. {bs_put_integer,{f,0},{integer,32},1,{field_flags,[unsigned,big]},{x,1}}. {bs_put_binary,{f,0},{atom,all},8,{field_flags,[unsigned,big]},{x,0}}. {move,{x,2},{x,0}}. {deallocate,0}. return. Common sub expression elimination (CSE) eliminates the second call to byte_size/2: {function, make_binary, 1, 2}. {label,1}. {line,...}. {func_info,{atom,t},{atom,make_binary},1}. {label,2}. {allocate,0,1}. {line,...}. {call_ext,1,{extfunc,erlang,term_to_binary,1}}. {line,...}. {gc_bif,byte_size,{f,0},1,[{x,0}],{x,1}}. {move,{x,1},{x,2}}. {bs_add,{f,0},[{x,2},{integer,4},1],{x,2}}. {bs_init2,{f,0},{x,2},0,2,{field_flags,[]},{x,2}}. {bs_put_integer,{f,0},{integer,32},1,{field_flags,[unsigned,big]},{x,1}}. {bs_put_binary,{f,0},{atom,all},8,{field_flags,[unsigned,big]},{x,0}}. {move,{x,2},{x,0}}. {deallocate,0}. return. Note: A possible future optimization would be to include binary construction instructions in blocks. If that is done, the {move,{x,1},{x,2}} instruction could also be eliminated.
-rw-r--r--lib/compiler/src/beam_block.erl113
-rw-r--r--lib/compiler/test/beam_block_SUITE.erl62
2 files changed, 167 insertions, 8 deletions
diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl
index 18f325f172..d0536e0669 100644
--- a/lib/compiler/src/beam_block.erl
+++ b/lib/compiler/src/beam_block.erl
@@ -43,12 +43,13 @@ function({function,Name,Arity,CLabel,Is0}, Blockify) ->
false ->
Is0
end,
- Is3 = beam_utils:anno_defs(Is2),
- Is4 = move_allocates(Is3),
- Is5 = beam_utils:live_opt(Is4),
- Is6 = opt_blocks(Is5),
- Is7 = beam_utils:delete_annos(Is6),
- Is = opt_allocs(Is7),
+ Is3 = local_cse(Is2),
+ Is4 = beam_utils:anno_defs(Is3),
+ Is5 = move_allocates(Is4),
+ Is6 = beam_utils:live_opt(Is5),
+ Is7 = opt_blocks(Is6),
+ Is8 = beam_utils:delete_annos(Is7),
+ Is = opt_allocs(Is8),
%% Done.
{function,Name,Arity,CLabel,Is}
@@ -564,3 +565,103 @@ defined_regs([{set,Ds,_,{alloc,Live,_}}|_], Regs) ->
x_live(Ds, Regs bor ((1 bsl Live) - 1));
defined_regs([{set,Ds,_,_}|Is], Regs) ->
defined_regs(Is, x_live(Ds, Regs)).
+
+%%%
+%%% Do local common sub expression elimination (CSE) in each block.
+%%%
+
+local_cse([{block,Bl0}|Is]) ->
+ Bl = cse_block(Bl0, orddict:new(), []),
+ [{block,Bl}|local_cse(Is)];
+local_cse([I|Is]) ->
+ [I|local_cse(Is)];
+local_cse([]) -> [].
+
+cse_block([I|Is], Es0, Acc0) ->
+ Es1 = cse_clear(I, Es0),
+ case cse_expr(I) of
+ none ->
+ %% Instruction is not suitable for CSE.
+ cse_block(Is, Es1, [I|Acc0]);
+ {ok,D,Expr} ->
+ %% Suitable instruction. First update the dictionary of
+ %% suitable expressions for the next iteration.
+ Es = cse_add(D, Expr, Es1),
+
+ %% Search for a previous identical expression.
+ case cse_find(Expr, Es0) of
+ error ->
+ %% Nothing found
+ cse_block(Is, Es, [I|Acc0]);
+ Src ->
+ %% Use the previously calculated result.
+ %% Also eliminate any line instruction.
+ Move = {set,[D],[Src],move},
+ case Acc0 of
+ [{set,_,_,{line,_}}|Acc] ->
+ cse_block(Is, Es, [Move|Acc]);
+ [_|_] ->
+ cse_block(Is, Es, [Move|Acc0])
+ end
+ end
+ end;
+cse_block([], _, Acc) ->
+ reverse(Acc).
+
+%% cse_find(Expr, Expressions) -> error | Register.
+%% Find a previously evaluated expression whose result can be reused,
+%% or return 'error' if no such expression is found.
+
+cse_find(Expr, Es) ->
+ case orddict:find(Expr, Es) of
+ {ok,{Src,_}} -> Src;
+ error -> error
+ end.
+
+cse_expr({set,[D],Ss,{bif,N,_}}) ->
+ {ok,D,{{bif,N},Ss}};
+cse_expr({set,[D],Ss,{alloc,_,{gc_bif,N,_}}}) ->
+ {ok,D,{{gc_bif,N},Ss}};
+cse_expr({set,[D],Ss,put_list}) ->
+ {ok,D,{put_list,Ss}};
+cse_expr(_) -> none.
+
+%% cse_clear(Instr, Expressions0) -> Expressions.
+%% Remove all previous expressions that will become
+%% invalid when this instruction is executed. Basically,
+%% an expression is no longer safe to reuse when the
+%% register it has been stored to has been modified, killed,
+%% or if any of the source operands have changed.
+
+cse_clear({set,Ds,_,{alloc,Live,_}}, Es) ->
+ cse_clear_1(Es, Live, Ds);
+cse_clear({set,Ds,_,_}, Es) ->
+ cse_clear_1(Es, all, Ds).
+
+cse_clear_1(Es, Live, Ds0) ->
+ Ds = ordsets:from_list(Ds0),
+ [E || E <- Es, cse_is_safe(E, Live, Ds)].
+
+cse_is_safe({_,{Dst,Interfering}}, Live, Ds) ->
+ ordsets:is_disjoint(Interfering, Ds) andalso
+ case Dst of
+ {x,X} ->
+ X < Live;
+ _ ->
+ true
+ end.
+
+%% cse_add(Dest, Expr, Expressions0) -> Expressions.
+%% Provided that it is safe, add a new expression to the dictionary
+%% of already evaluated expressions.
+
+cse_add(D, {_,Ss}=Expr, Es) ->
+ case member(D, Ss) of
+ false ->
+ Interfering = ordsets:from_list([D|Ss]),
+ orddict:store(Expr, {D,Interfering}, Es);
+ true ->
+ %% Unsafe because the instruction overwrites one of
+ %% source operands.
+ Es
+ end.
diff --git a/lib/compiler/test/beam_block_SUITE.erl b/lib/compiler/test/beam_block_SUITE.erl
index 55d5f2dbe8..fac18789e0 100644
--- a/lib/compiler/test/beam_block_SUITE.erl
+++ b/lib/compiler/test/beam_block_SUITE.erl
@@ -22,7 +22,7 @@
-export([all/0,suite/0,groups/0,init_per_suite/1,end_per_suite/1,
init_per_group/2,end_per_group/2,
get_map_elements/1,otp_7345/1,move_opt_across_gc_bif/1,
- erl_202/1,repro/1]).
+ erl_202/1,repro/1,local_cse/1]).
%% The only test for the following functions is that
%% the code compiles and is accepted by beam_validator.
@@ -40,7 +40,8 @@ groups() ->
otp_7345,
move_opt_across_gc_bif,
erl_202,
- repro
+ repro,
+ local_cse
]}].
init_per_suite(Config) ->
@@ -237,6 +238,63 @@ find_operands(Cfg,XsiGraph,ActiveList,Count) ->
[Count+1, length(NewActiveList), length(digraph:vertices(XsiGraph))],
find_operands(NewCfg,XsiGraph,NewActiveList,Count+1).
+%% Some tests of local common subexpression elimination (CSE).
+
+local_cse(_Config) ->
+ {Self,{ok,Self}} = local_cse_1(),
+
+ local_cse_2([]),
+ local_cse_2(lists:seq(1, 512)),
+ local_cse_2(?MODULE:module_info()),
+
+ {[b],[a,b]} = local_cse_3(a, b),
+
+ {2000,Self,{Self,write_cache}} = local_cse_4(),
+
+ ok.
+
+local_cse_1() ->
+ %% Cover handling of unsafe tuple construction in
+ %% eliminate_use_of_from_reg/4. It became necessary to handle
+ %% unsafe tuples when local CSE was introduced.
+
+ {self(),{ok,self()}}.
+
+local_cse_2(Term) ->
+ case cse_make_binary(Term) of
+ <<Size:8,BinTerm:Size/binary>> ->
+ Term = binary_to_term(BinTerm);
+ <<Size:8,SizeTerm:Size/binary,BinTerm/binary>> ->
+ {'$size',TermSize} = binary_to_term(SizeTerm),
+ TermSize = byte_size(BinTerm),
+ Term = binary_to_term(BinTerm)
+ end.
+
+%% Copy of observer_backend:ttb_make_binary/1. During development of
+%% the local CSE optimization this function was incorrectly optimized.
+
+cse_make_binary(Term) ->
+ B = term_to_binary(Term),
+ SizeB = byte_size(B),
+ if SizeB > 255 ->
+ SB = term_to_binary({'$size',SizeB}),
+ <<(byte_size(SB)):8, SB/binary, B/binary>>;
+ true ->
+ <<SizeB:8, B/binary>>
+ end.
+
+local_cse_3(X, Y) ->
+ %% The following expression was incorrectly transformed to {[X,Y],[X,Y]}
+ %% during development of the local CSE optimization.
+
+ {[Y],[X,Y]}.
+
+local_cse_4() ->
+ do_local_cse_4(2000, self(), {self(), write_cache}).
+
+do_local_cse_4(X, Y, Z) ->
+ {X,Y,Z}.
+
%%%
%%% Common functions.
%%%