diff options
author | Björn Gustavsson <[email protected]> | 2018-01-22 08:04:44 +0100 |
---|---|---|
committer | Björn Gustavsson <[email protected]> | 2018-01-24 12:34:24 +0100 |
commit | e7b49a3e657e1bd7bbf92050e6360b1e0f142a1e (patch) | |
tree | 5f39dca92303acc7547c87796719f3862a5e62fd | |
parent | d5fbb64374247af0a90974a9c37a3eba93774f28 (diff) | |
download | otp-e7b49a3e657e1bd7bbf92050e6360b1e0f142a1e.tar.gz otp-e7b49a3e657e1bd7bbf92050e6360b1e0f142a1e.tar.bz2 otp-e7b49a3e657e1bd7bbf92050e6360b1e0f142a1e.zip |
Apply common subexpression elimination in blocks
Eliminate repeated evaluation of guard BIFs and building of cons cells
in blocks. This optimization is applicable in more places than might be
expected, because code generation for binaries and record can generate
common sub expressions not visible in the original source code.
For example, consider this function:
make_binary(Term) ->
Bin = term_to_binary(Term),
Size = byte_size(Bin),
<<Size:32,Bin/binary>>.
The compiler inserts a call to byte_size/2 to calculate the size of
the binary being built:
{function, make_binary, 1, 2}.
{label,1}.
{line,...}.
{func_info,{atom,t},{atom,make_binary},1}.
{label,2}.
{allocate,0,1}.
{line,...}.
{call_ext,1,{extfunc,erlang,term_to_binary,1}}.
{line,...}.
{gc_bif,byte_size,{f,0},1,[{x,0}],{x,1}}. %Present in original code.
{line,...}.
{gc_bif,byte_size,{f,0},2,[{x,0}],{x,2}}. %Inserted by compiler.
{bs_add,{f,0},[{x,2},{integer,4},1],{x,2}}.
{bs_init2,{f,0},{x,2},0,2,{field_flags,[]},{x,2}}.
{bs_put_integer,{f,0},{integer,32},1,{field_flags,[unsigned,big]},{x,1}}.
{bs_put_binary,{f,0},{atom,all},8,{field_flags,[unsigned,big]},{x,0}}.
{move,{x,2},{x,0}}.
{deallocate,0}.
return.
Common sub expression elimination (CSE) eliminates the second call to
byte_size/2:
{function, make_binary, 1, 2}.
{label,1}.
{line,...}.
{func_info,{atom,t},{atom,make_binary},1}.
{label,2}.
{allocate,0,1}.
{line,...}.
{call_ext,1,{extfunc,erlang,term_to_binary,1}}.
{line,...}.
{gc_bif,byte_size,{f,0},1,[{x,0}],{x,1}}.
{move,{x,1},{x,2}}.
{bs_add,{f,0},[{x,2},{integer,4},1],{x,2}}.
{bs_init2,{f,0},{x,2},0,2,{field_flags,[]},{x,2}}.
{bs_put_integer,{f,0},{integer,32},1,{field_flags,[unsigned,big]},{x,1}}.
{bs_put_binary,{f,0},{atom,all},8,{field_flags,[unsigned,big]},{x,0}}.
{move,{x,2},{x,0}}.
{deallocate,0}.
return.
Note: A possible future optimization would be to include binary
construction instructions in blocks. If that is done, the
{move,{x,1},{x,2}} instruction could also be eliminated.
-rw-r--r-- | lib/compiler/src/beam_block.erl | 113 | ||||
-rw-r--r-- | lib/compiler/test/beam_block_SUITE.erl | 62 |
2 files changed, 167 insertions, 8 deletions
diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl index 18f325f172..d0536e0669 100644 --- a/lib/compiler/src/beam_block.erl +++ b/lib/compiler/src/beam_block.erl @@ -43,12 +43,13 @@ function({function,Name,Arity,CLabel,Is0}, Blockify) -> false -> Is0 end, - Is3 = beam_utils:anno_defs(Is2), - Is4 = move_allocates(Is3), - Is5 = beam_utils:live_opt(Is4), - Is6 = opt_blocks(Is5), - Is7 = beam_utils:delete_annos(Is6), - Is = opt_allocs(Is7), + Is3 = local_cse(Is2), + Is4 = beam_utils:anno_defs(Is3), + Is5 = move_allocates(Is4), + Is6 = beam_utils:live_opt(Is5), + Is7 = opt_blocks(Is6), + Is8 = beam_utils:delete_annos(Is7), + Is = opt_allocs(Is8), %% Done. {function,Name,Arity,CLabel,Is} @@ -564,3 +565,103 @@ defined_regs([{set,Ds,_,{alloc,Live,_}}|_], Regs) -> x_live(Ds, Regs bor ((1 bsl Live) - 1)); defined_regs([{set,Ds,_,_}|Is], Regs) -> defined_regs(Is, x_live(Ds, Regs)). + +%%% +%%% Do local common sub expression elimination (CSE) in each block. +%%% + +local_cse([{block,Bl0}|Is]) -> + Bl = cse_block(Bl0, orddict:new(), []), + [{block,Bl}|local_cse(Is)]; +local_cse([I|Is]) -> + [I|local_cse(Is)]; +local_cse([]) -> []. + +cse_block([I|Is], Es0, Acc0) -> + Es1 = cse_clear(I, Es0), + case cse_expr(I) of + none -> + %% Instruction is not suitable for CSE. + cse_block(Is, Es1, [I|Acc0]); + {ok,D,Expr} -> + %% Suitable instruction. First update the dictionary of + %% suitable expressions for the next iteration. + Es = cse_add(D, Expr, Es1), + + %% Search for a previous identical expression. + case cse_find(Expr, Es0) of + error -> + %% Nothing found + cse_block(Is, Es, [I|Acc0]); + Src -> + %% Use the previously calculated result. + %% Also eliminate any line instruction. + Move = {set,[D],[Src],move}, + case Acc0 of + [{set,_,_,{line,_}}|Acc] -> + cse_block(Is, Es, [Move|Acc]); + [_|_] -> + cse_block(Is, Es, [Move|Acc0]) + end + end + end; +cse_block([], _, Acc) -> + reverse(Acc). + +%% cse_find(Expr, Expressions) -> error | Register. +%% Find a previously evaluated expression whose result can be reused, +%% or return 'error' if no such expression is found. + +cse_find(Expr, Es) -> + case orddict:find(Expr, Es) of + {ok,{Src,_}} -> Src; + error -> error + end. + +cse_expr({set,[D],Ss,{bif,N,_}}) -> + {ok,D,{{bif,N},Ss}}; +cse_expr({set,[D],Ss,{alloc,_,{gc_bif,N,_}}}) -> + {ok,D,{{gc_bif,N},Ss}}; +cse_expr({set,[D],Ss,put_list}) -> + {ok,D,{put_list,Ss}}; +cse_expr(_) -> none. + +%% cse_clear(Instr, Expressions0) -> Expressions. +%% Remove all previous expressions that will become +%% invalid when this instruction is executed. Basically, +%% an expression is no longer safe to reuse when the +%% register it has been stored to has been modified, killed, +%% or if any of the source operands have changed. + +cse_clear({set,Ds,_,{alloc,Live,_}}, Es) -> + cse_clear_1(Es, Live, Ds); +cse_clear({set,Ds,_,_}, Es) -> + cse_clear_1(Es, all, Ds). + +cse_clear_1(Es, Live, Ds0) -> + Ds = ordsets:from_list(Ds0), + [E || E <- Es, cse_is_safe(E, Live, Ds)]. + +cse_is_safe({_,{Dst,Interfering}}, Live, Ds) -> + ordsets:is_disjoint(Interfering, Ds) andalso + case Dst of + {x,X} -> + X < Live; + _ -> + true + end. + +%% cse_add(Dest, Expr, Expressions0) -> Expressions. +%% Provided that it is safe, add a new expression to the dictionary +%% of already evaluated expressions. + +cse_add(D, {_,Ss}=Expr, Es) -> + case member(D, Ss) of + false -> + Interfering = ordsets:from_list([D|Ss]), + orddict:store(Expr, {D,Interfering}, Es); + true -> + %% Unsafe because the instruction overwrites one of + %% source operands. + Es + end. diff --git a/lib/compiler/test/beam_block_SUITE.erl b/lib/compiler/test/beam_block_SUITE.erl index 55d5f2dbe8..fac18789e0 100644 --- a/lib/compiler/test/beam_block_SUITE.erl +++ b/lib/compiler/test/beam_block_SUITE.erl @@ -22,7 +22,7 @@ -export([all/0,suite/0,groups/0,init_per_suite/1,end_per_suite/1, init_per_group/2,end_per_group/2, get_map_elements/1,otp_7345/1,move_opt_across_gc_bif/1, - erl_202/1,repro/1]). + erl_202/1,repro/1,local_cse/1]). %% The only test for the following functions is that %% the code compiles and is accepted by beam_validator. @@ -40,7 +40,8 @@ groups() -> otp_7345, move_opt_across_gc_bif, erl_202, - repro + repro, + local_cse ]}]. init_per_suite(Config) -> @@ -237,6 +238,63 @@ find_operands(Cfg,XsiGraph,ActiveList,Count) -> [Count+1, length(NewActiveList), length(digraph:vertices(XsiGraph))], find_operands(NewCfg,XsiGraph,NewActiveList,Count+1). +%% Some tests of local common subexpression elimination (CSE). + +local_cse(_Config) -> + {Self,{ok,Self}} = local_cse_1(), + + local_cse_2([]), + local_cse_2(lists:seq(1, 512)), + local_cse_2(?MODULE:module_info()), + + {[b],[a,b]} = local_cse_3(a, b), + + {2000,Self,{Self,write_cache}} = local_cse_4(), + + ok. + +local_cse_1() -> + %% Cover handling of unsafe tuple construction in + %% eliminate_use_of_from_reg/4. It became necessary to handle + %% unsafe tuples when local CSE was introduced. + + {self(),{ok,self()}}. + +local_cse_2(Term) -> + case cse_make_binary(Term) of + <<Size:8,BinTerm:Size/binary>> -> + Term = binary_to_term(BinTerm); + <<Size:8,SizeTerm:Size/binary,BinTerm/binary>> -> + {'$size',TermSize} = binary_to_term(SizeTerm), + TermSize = byte_size(BinTerm), + Term = binary_to_term(BinTerm) + end. + +%% Copy of observer_backend:ttb_make_binary/1. During development of +%% the local CSE optimization this function was incorrectly optimized. + +cse_make_binary(Term) -> + B = term_to_binary(Term), + SizeB = byte_size(B), + if SizeB > 255 -> + SB = term_to_binary({'$size',SizeB}), + <<(byte_size(SB)):8, SB/binary, B/binary>>; + true -> + <<SizeB:8, B/binary>> + end. + +local_cse_3(X, Y) -> + %% The following expression was incorrectly transformed to {[X,Y],[X,Y]} + %% during development of the local CSE optimization. + + {[Y],[X,Y]}. + +local_cse_4() -> + do_local_cse_4(2000, self(), {self(), write_cache}). + +do_local_cse_4(X, Y, Z) -> + {X,Y,Z}. + %%% %%% Common functions. %%% |