From 25fa1aadc987f4c3abb8b39ae5fa090a9a103daa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Mon, 22 Jan 2018 15:46:05 +0100 Subject: beam_validator: Validate building of tuples Make sure that there is the correct number of put/1 instructions following put_tuple/2. Also make it illegal to reference the register for the tuple being built in a put/1 instruction. That is, beam_validator will now issue a diagnostice for the the following code: {put_tuple,1,{x,0}}. {put,{x,0}}. --- lib/compiler/src/beam_validator.erl | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) (limited to 'lib/compiler/src') diff --git a/lib/compiler/src/beam_validator.erl b/lib/compiler/src/beam_validator.erl index 4feb26c513..ca757f5e47 100644 --- a/lib/compiler/src/beam_validator.erl +++ b/lib/compiler/src/beam_validator.erl @@ -85,8 +85,6 @@ format_error(Error) -> %%% Things currently not checked. XXX %%% %%% - Heap allocation for binaries. -%%% - That put_tuple is followed by the correct number of -%%% put instructions. %%% %% validate(Module, [Function]) -> [] | [Error] @@ -148,7 +146,8 @@ validate_0(Module, [{function,Name,Ar,Entry,Code}|Fs], Ft) -> hf=0, %Available heap size for floats. fls=undefined, %Floating point state. ct=[], %List of hot catch/try labels - setelem=false %Previous instruction was setelement/3. + setelem=false, %Previous instruction was setelement/3. + puts_left=none %put/1 instructions left. }). -type label() :: integer(). @@ -340,11 +339,25 @@ valfun_1({put_list,A,B,Dst}, Vst0) -> Vst = eat_heap(2, Vst0), set_type_reg(cons, Dst, Vst); valfun_1({put_tuple,Sz,Dst}, Vst0) when is_integer(Sz) -> + Vst1 = eat_heap(1, Vst0), + Vst = set_type_reg(tuple_in_progress, Dst, Vst1), + #vst{current=St0} = Vst, + St = St0#st{puts_left={Sz,{Dst,{tuple,Sz}}}}, + Vst#vst{current=St}; +valfun_1({put,Src}, Vst0) -> + assert_term(Src, Vst0), Vst = eat_heap(1, Vst0), - set_type_reg({tuple,Sz}, Dst, Vst); -valfun_1({put,Src}, Vst) -> - assert_term(Src, Vst), - eat_heap(1, Vst); + #vst{current=St0} = Vst, + case St0 of + #st{puts_left=none} -> + error(not_building_a_tuple); + #st{puts_left={1,{Dst,Type}}} -> + St = St0#st{puts_left=none}, + set_type_reg(Type, Dst, Vst#vst{current=St}); + #st{puts_left={PutsLeft,Info}} when is_integer(PutsLeft) -> + St = St0#st{puts_left={PutsLeft-1,Info}}, + Vst#vst{current=St} + end; %% Instructions for optimization of selective receives. valfun_1({recv_mark,{f,Fail}}, Vst) when is_integer(Fail) -> Vst; @@ -1272,6 +1285,7 @@ get_move_term_type(Src, Vst) -> initialized -> error({unassigned,Src}); {catchtag,_} -> error({catchtag,Src}); {trytag,_} -> error({trytag,Src}); + tuple_in_progress -> error({tuple_in_progress,Src}); Type -> Type end. @@ -1280,10 +1294,7 @@ get_move_term_type(Src, Vst) -> %% a standard Erlang type (no catch/try tags or match contexts). get_term_type(Src, Vst) -> - case get_term_type_1(Src, Vst) of - initialized -> error({unassigned,Src}); - {catchtag,_} -> error({catchtag,Src}); - {trytag,_} -> error({trytag,Src}); + case get_move_term_type(Src, Vst) of #ms{} -> error({match_context,Src}); Type -> Type end. -- cgit v1.2.3 From ab6ac37e0c40331a2debd90f753ec2f4531e4701 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Mon, 22 Jan 2018 16:27:33 +0100 Subject: Correct unsafe optimizations in beam_block When attempting to eliminate the move/2 instruction in the following code: {bif,self,{f,0},[],{x,0}}. {move,{x,0},{x,1}}. . . . {put_tuple,2,{x,1}}. {put,{atom,ok}}. {put,{x,0}}. beam_block would produce the following unsafe code: {bif,self,{f,0},[],{x,1}}. . . . {put_tuple,2,{x,1}}. {put,{atom,ok}}. {put,{x,1}}. It is unsafe because the tuple is self-referential. The following code: {put_list,{y,6},nil,{x,4}}. {move,{x,4},{x,5}}. {put_list,{y,1},{x,5},{x,5}}. . . . {put_tuple,2,{x,6}}. {put,{x,4}}. {put,{x,5}}. would be incorrectly transformed to: {put_list,{y,6},nil,{x,5}}. {put_list,{y,1},{x,5},{x,5}}. . . . {put_tuple,2,{x,6}}. {put,{x,5}}. {put,{x,5}}. (Both elements in the built tuple get the same value.) --- lib/compiler/src/beam_block.erl | 53 +++++++++++++++++++++++++++++------------ 1 file changed, 38 insertions(+), 15 deletions(-) (limited to 'lib/compiler/src') diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl index 39ae8d5347..7f6d3a75ae 100644 --- a/lib/compiler/src/beam_block.erl +++ b/lib/compiler/src/beam_block.erl @@ -289,7 +289,7 @@ opt_move(Dest, Is) -> opt_move_1(R, [{set,[D],[R],move}|Is0], Acc) -> %% Provided that the source register is killed by instructions %% that follow, the optimization is safe. - case eliminate_use_of_from_reg(Is0, R, D, []) of + case eliminate_use_of_from_reg(Is0, R, D) of {yes,Is} -> opt_move_rev(D, Acc, Is); no -> not_possible end; @@ -347,7 +347,7 @@ opt_tuple_element_1([{set,_,_,{alloc,_,_}}|_], _, _, _) -> opt_tuple_element_1([{set,_,_,{try_catch,_,_}}|_], _, _, _) -> no; opt_tuple_element_1([{set,[D],[S],move}|Is0], I0, {_,S}, Acc) -> - case eliminate_use_of_from_reg(Is0, S, D, []) of + case eliminate_use_of_from_reg(Is0, S, D) of no -> no; {yes,Is} -> @@ -389,6 +389,14 @@ is_killed_or_used(R, {set,Ss,Ds,_}) -> %% that FromRegister is still used and that the optimization is not %% possible. +eliminate_use_of_from_reg(Is, From, To) -> + try + eliminate_use_of_from_reg(Is, From, To, []) + catch + throw:not_possible -> + no + end. + eliminate_use_of_from_reg([{set,_,_,{alloc,Live,_}}|_]=Is0, {x,X}, _, Acc) -> if X < Live -> @@ -397,21 +405,32 @@ eliminate_use_of_from_reg([{set,_,_,{alloc,Live,_}}|_]=Is0, {x,X}, _, Acc) -> {yes,reverse(Acc, Is0)} end; eliminate_use_of_from_reg([{set,Ds,Ss0,Op}=I0|Is], From, To, Acc) -> + ensure_safe_tuple(I0, To), I = case member(From, Ss0) of - true -> - Ss = [case S of - From -> To; - _ -> S - end || S <- Ss0], - {set,Ds,Ss,Op}; - false -> - I0 - end, + true -> + Ss = [case S of + From -> To; + _ -> S + end || S <- Ss0], + {set,Ds,Ss,Op}; + false -> + I0 + end, case member(From, Ds) of - true -> - {yes,reverse(Acc, [I|Is])}; - false -> - eliminate_use_of_from_reg(Is, From, To, [I|Acc]) + true -> + {yes,reverse(Acc, [I|Is])}; + false -> + case member(To, Ds) of + true -> + case beam_utils:is_killed_block(From, Is) of + true -> + {yes,reverse(Acc, [I|Is])}; + false -> + no + end; + false -> + eliminate_use_of_from_reg(Is, From, To, [I|Acc]) + end end; eliminate_use_of_from_reg([I]=Is, From, _To, Acc) -> case beam_utils:is_killed_block(From, [I]) of @@ -421,6 +440,10 @@ eliminate_use_of_from_reg([I]=Is, From, _To, Acc) -> no end. +ensure_safe_tuple({set,[To],[],{put_tuple,_}}, To) -> + throw(not_possible); +ensure_safe_tuple(_, _) -> ok. + %% opt_allocs(Instructions) -> Instructions. Optimize allocate %% instructions inside blocks. If safe, replace an allocate_zero %% instruction with the slightly cheaper allocate instruction. -- cgit v1.2.3 From d5fbb64374247af0a90974a9c37a3eba93774f28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Mon, 22 Jan 2018 11:47:03 +0100 Subject: Correct bug in beam_block:opt/2 The folling sequence in a block: {move,{x,1},{x,2}}. {move,{x,2},{x,2}}. would be incorrectly rewritten to: {move,{x,2},{x,2}}. (Which in turn would be optimized away a little bit later.) --- lib/compiler/src/beam_block.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/compiler/src') diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl index 7f6d3a75ae..18f325f172 100644 --- a/lib/compiler/src/beam_block.erl +++ b/lib/compiler/src/beam_block.erl @@ -231,7 +231,7 @@ alloc_may_pass({set,_,_,_}) -> true. %% Optimize the instruction stream inside a basic block. opt([{set,[X],[X],move}|Is]) -> opt(Is); -opt([{set,[X],_,move},{set,[X],_,move}=I|Is]) -> +opt([{set,[Dst],_,move},{set,[Dst],[Src],move}=I|Is]) when Dst =/= Src -> opt([I|Is]); opt([{set,[{x,0}],[S1],move}=I1,{set,[D2],[{x,0}],move}|Is]) -> opt([I1,{set,[D2],[S1],move}|Is]); -- cgit v1.2.3 From e7b49a3e657e1bd7bbf92050e6360b1e0f142a1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Mon, 22 Jan 2018 08:04:44 +0100 Subject: Apply common subexpression elimination in blocks Eliminate repeated evaluation of guard BIFs and building of cons cells in blocks. This optimization is applicable in more places than might be expected, because code generation for binaries and record can generate common sub expressions not visible in the original source code. For example, consider this function: make_binary(Term) -> Bin = term_to_binary(Term), Size = byte_size(Bin), <>. The compiler inserts a call to byte_size/2 to calculate the size of the binary being built: {function, make_binary, 1, 2}. {label,1}. {line,...}. {func_info,{atom,t},{atom,make_binary},1}. {label,2}. {allocate,0,1}. {line,...}. {call_ext,1,{extfunc,erlang,term_to_binary,1}}. {line,...}. {gc_bif,byte_size,{f,0},1,[{x,0}],{x,1}}. %Present in original code. {line,...}. {gc_bif,byte_size,{f,0},2,[{x,0}],{x,2}}. %Inserted by compiler. {bs_add,{f,0},[{x,2},{integer,4},1],{x,2}}. {bs_init2,{f,0},{x,2},0,2,{field_flags,[]},{x,2}}. {bs_put_integer,{f,0},{integer,32},1,{field_flags,[unsigned,big]},{x,1}}. {bs_put_binary,{f,0},{atom,all},8,{field_flags,[unsigned,big]},{x,0}}. {move,{x,2},{x,0}}. {deallocate,0}. return. Common sub expression elimination (CSE) eliminates the second call to byte_size/2: {function, make_binary, 1, 2}. {label,1}. {line,...}. {func_info,{atom,t},{atom,make_binary},1}. {label,2}. {allocate,0,1}. {line,...}. {call_ext,1,{extfunc,erlang,term_to_binary,1}}. {line,...}. {gc_bif,byte_size,{f,0},1,[{x,0}],{x,1}}. {move,{x,1},{x,2}}. {bs_add,{f,0},[{x,2},{integer,4},1],{x,2}}. {bs_init2,{f,0},{x,2},0,2,{field_flags,[]},{x,2}}. {bs_put_integer,{f,0},{integer,32},1,{field_flags,[unsigned,big]},{x,1}}. {bs_put_binary,{f,0},{atom,all},8,{field_flags,[unsigned,big]},{x,0}}. {move,{x,2},{x,0}}. {deallocate,0}. return. Note: A possible future optimization would be to include binary construction instructions in blocks. If that is done, the {move,{x,1},{x,2}} instruction could also be eliminated. --- lib/compiler/src/beam_block.erl | 113 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 107 insertions(+), 6 deletions(-) (limited to 'lib/compiler/src') diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl index 18f325f172..d0536e0669 100644 --- a/lib/compiler/src/beam_block.erl +++ b/lib/compiler/src/beam_block.erl @@ -43,12 +43,13 @@ function({function,Name,Arity,CLabel,Is0}, Blockify) -> false -> Is0 end, - Is3 = beam_utils:anno_defs(Is2), - Is4 = move_allocates(Is3), - Is5 = beam_utils:live_opt(Is4), - Is6 = opt_blocks(Is5), - Is7 = beam_utils:delete_annos(Is6), - Is = opt_allocs(Is7), + Is3 = local_cse(Is2), + Is4 = beam_utils:anno_defs(Is3), + Is5 = move_allocates(Is4), + Is6 = beam_utils:live_opt(Is5), + Is7 = opt_blocks(Is6), + Is8 = beam_utils:delete_annos(Is7), + Is = opt_allocs(Is8), %% Done. {function,Name,Arity,CLabel,Is} @@ -564,3 +565,103 @@ defined_regs([{set,Ds,_,{alloc,Live,_}}|_], Regs) -> x_live(Ds, Regs bor ((1 bsl Live) - 1)); defined_regs([{set,Ds,_,_}|Is], Regs) -> defined_regs(Is, x_live(Ds, Regs)). + +%%% +%%% Do local common sub expression elimination (CSE) in each block. +%%% + +local_cse([{block,Bl0}|Is]) -> + Bl = cse_block(Bl0, orddict:new(), []), + [{block,Bl}|local_cse(Is)]; +local_cse([I|Is]) -> + [I|local_cse(Is)]; +local_cse([]) -> []. + +cse_block([I|Is], Es0, Acc0) -> + Es1 = cse_clear(I, Es0), + case cse_expr(I) of + none -> + %% Instruction is not suitable for CSE. + cse_block(Is, Es1, [I|Acc0]); + {ok,D,Expr} -> + %% Suitable instruction. First update the dictionary of + %% suitable expressions for the next iteration. + Es = cse_add(D, Expr, Es1), + + %% Search for a previous identical expression. + case cse_find(Expr, Es0) of + error -> + %% Nothing found + cse_block(Is, Es, [I|Acc0]); + Src -> + %% Use the previously calculated result. + %% Also eliminate any line instruction. + Move = {set,[D],[Src],move}, + case Acc0 of + [{set,_,_,{line,_}}|Acc] -> + cse_block(Is, Es, [Move|Acc]); + [_|_] -> + cse_block(Is, Es, [Move|Acc0]) + end + end + end; +cse_block([], _, Acc) -> + reverse(Acc). + +%% cse_find(Expr, Expressions) -> error | Register. +%% Find a previously evaluated expression whose result can be reused, +%% or return 'error' if no such expression is found. + +cse_find(Expr, Es) -> + case orddict:find(Expr, Es) of + {ok,{Src,_}} -> Src; + error -> error + end. + +cse_expr({set,[D],Ss,{bif,N,_}}) -> + {ok,D,{{bif,N},Ss}}; +cse_expr({set,[D],Ss,{alloc,_,{gc_bif,N,_}}}) -> + {ok,D,{{gc_bif,N},Ss}}; +cse_expr({set,[D],Ss,put_list}) -> + {ok,D,{put_list,Ss}}; +cse_expr(_) -> none. + +%% cse_clear(Instr, Expressions0) -> Expressions. +%% Remove all previous expressions that will become +%% invalid when this instruction is executed. Basically, +%% an expression is no longer safe to reuse when the +%% register it has been stored to has been modified, killed, +%% or if any of the source operands have changed. + +cse_clear({set,Ds,_,{alloc,Live,_}}, Es) -> + cse_clear_1(Es, Live, Ds); +cse_clear({set,Ds,_,_}, Es) -> + cse_clear_1(Es, all, Ds). + +cse_clear_1(Es, Live, Ds0) -> + Ds = ordsets:from_list(Ds0), + [E || E <- Es, cse_is_safe(E, Live, Ds)]. + +cse_is_safe({_,{Dst,Interfering}}, Live, Ds) -> + ordsets:is_disjoint(Interfering, Ds) andalso + case Dst of + {x,X} -> + X < Live; + _ -> + true + end. + +%% cse_add(Dest, Expr, Expressions0) -> Expressions. +%% Provided that it is safe, add a new expression to the dictionary +%% of already evaluated expressions. + +cse_add(D, {_,Ss}=Expr, Es) -> + case member(D, Ss) of + false -> + Interfering = ordsets:from_list([D|Ss]), + orddict:store(Expr, {D,Interfering}, Es); + true -> + %% Unsafe because the instruction overwrites one of + %% source operands. + Es + end. -- cgit v1.2.3