aboutsummaryrefslogtreecommitdiffstats
path: root/lib/compiler/src
diff options
context:
space:
mode:
authorBjörn Gustavsson <[email protected]>2015-04-14 13:01:00 +0200
committerBjörn Gustavsson <[email protected]>2015-04-22 10:14:40 +0200
commit0971feb23d553bb51b04eb78fbf76c4bc19b4b93 (patch)
tree5cb0afafd88e42276c1518d11264dd2cf1bdf873 /lib/compiler/src
parent32b85d920529990df2aa7acb091e7d03c520ef8c (diff)
downloadotp-0971feb23d553bb51b04eb78fbf76c4bc19b4b93.tar.gz
otp-0971feb23d553bb51b04eb78fbf76c4bc19b4b93.tar.bz2
otp-0971feb23d553bb51b04eb78fbf76c4bc19b4b93.zip
v3_codegen: Reduce cost for fixing up bs_match_string instructions
Commit b76588fb5a introduced an optimization of the compile time of huge functions with many bs_match_string instructions. The optimization is done in two passes. The first pass coalesces adjacent bs_match_string instructions. To avoid copying bitstrings multiple times, the bitstrings in the instructions are combined in to a (deep) list. The second pass goes through all instructions in the function and combines the list of bitstrings to a single bitstring in all bs_match_string instructions. The second pass (fix_bs_match_string) is run on all instructions in each function, even if there are no bs_match_instructions in the function. While fix_bs_match_string is not a bottleneck (it is a linear pass), its execution time is noticeable when profiling some modules. Move the execution of the second pass to the select_binary() function so that it will only be executed for instructions that do binary matching. Also take the opportunity to optimize away uses of bs_restore2 that occour directly after a bs_save2. That optimimization is currently done in beam_block, but it can be done essentially for free in the same pass that fixes up bs_match_string instructions.
Diffstat (limited to 'lib/compiler/src')
-rw-r--r--lib/compiler/src/beam_block.erl9
-rw-r--r--lib/compiler/src/v3_codegen.erl42
2 files changed, 24 insertions, 27 deletions
diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl
index 5216f39296..e2639e9cac 100644
--- a/lib/compiler/src/beam_block.erl
+++ b/lib/compiler/src/beam_block.erl
@@ -61,15 +61,6 @@ blockify(Is) ->
blockify([{loop_rec,{f,Fail},{x,0}},{loop_rec_end,_Lbl},{label,Fail}|Is], Acc) ->
%% Useless instruction sequence.
blockify(Is, Acc);
-
-%% New bit syntax matching.
-blockify([{bs_save2,R,Point}=I,{bs_restore2,R,Point}|Is], Acc) ->
- blockify([I|Is], Acc);
-blockify([{bs_save2,R,Point}=I,{test,is_eq_exact,_,_}=Test,
- {bs_restore2,R,Point}|Is], Acc) ->
- blockify([I,Test|Is], Acc);
-
-%% Do other peep-hole optimizations.
blockify([{test,is_atom,{f,Fail},[Reg]}=I|
[{select,select_val,Reg,{f,Fail},
[{atom,false},{f,_}=BrFalse,
diff --git a/lib/compiler/src/v3_codegen.erl b/lib/compiler/src/v3_codegen.erl
index eb7926d3ab..15a54a5886 100644
--- a/lib/compiler/src/v3_codegen.erl
+++ b/lib/compiler/src/v3_codegen.erl
@@ -121,24 +121,15 @@ cg_fun(Les, Hvs, Vdb, AtomMod, NameArity, Anno, St0) ->
put_reg(V, Reg)
end, [], Hvs),
stk=[]}, 0, Vdb),
- {B0,_Aft,St} = cg_list(Les, 0, Vdb, Bef,
+ {B,_Aft,St} = cg_list(Les, 0, Vdb, Bef,
St3#cg{bfail=0,
ultimate_failure=UltimateMatchFail,
is_top_block=true}),
- B = fix_bs_match_strings(B0),
{Name,Arity} = NameArity,
Asm = [{label,Fi},line(Anno),{func_info,AtomMod,{atom,Name},Arity},
{label,Fl}|B++[{label,UltimateMatchFail},if_end]],
{Asm,Fl,St}.
-fix_bs_match_strings([{test,bs_match_string,F,[Ctx,BinList]}|Is])
- when is_list(BinList) ->
- I = {test,bs_match_string,F,[Ctx,list_to_bitstring(BinList)]},
- [I|fix_bs_match_strings(Is)];
-fix_bs_match_strings([I|Is]) ->
- [I|fix_bs_match_strings(Is)];
-fix_bs_match_strings([]) -> [].
-
%% cg(Lkexpr, Vdb, StackReg, State) -> {[Ainstr],StackReg,State}.
%% Generate code for a kexpr.
%% Split function into two steps for clarity, not efficiency.
@@ -717,22 +708,37 @@ select_nil(#l{ke={val_clause,nil,B}}, V, Tf, Vf, Bef, St0) ->
select_binary(#l{ke={val_clause,{binary,{var,V}},B},i=I,vdb=Vdb},
V, Tf, Vf, Bef, St0) ->
Int0 = clear_dead(Bef#sr{reg=Bef#sr.reg}, I, Vdb),
- {Bis,Aft,St1} = match_cg(B, Vf, Int0, St0),
+ {Bis0,Aft,St1} = match_cg(B, Vf, Int0, St0),
CtxReg = fetch_var(V, Int0),
Live = max_reg(Bef#sr.reg),
- {[{test,bs_start_match2,{f,Tf},Live,[CtxReg,V],CtxReg},
- {bs_save2,CtxReg,{V,V}}|Bis],
- Aft,St1};
+ Bis1 = [{test,bs_start_match2,{f,Tf},Live,[CtxReg,V],CtxReg},
+ {bs_save2,CtxReg,{V,V}}|Bis0],
+ Bis = finish_select_binary(Bis1),
+ {Bis,Aft,St1};
select_binary(#l{ke={val_clause,{binary,{var,Ivar}},B},i=I,vdb=Vdb},
V, Tf, Vf, Bef, St0) ->
Regs = put_reg(Ivar, Bef#sr.reg),
Int0 = clear_dead(Bef#sr{reg=Regs}, I, Vdb),
- {Bis,Aft,St1} = match_cg(B, Vf, Int0, St0),
+ {Bis0,Aft,St1} = match_cg(B, Vf, Int0, St0),
CtxReg = fetch_var(Ivar, Int0),
Live = max_reg(Bef#sr.reg),
- {[{test,bs_start_match2,{f,Tf},Live,[fetch_var(V, Bef),Ivar],CtxReg},
- {bs_save2,CtxReg,{Ivar,Ivar}}|Bis],
- Aft,St1}.
+ Bis1 = [{test,bs_start_match2,{f,Tf},Live,[fetch_var(V, Bef),Ivar],CtxReg},
+ {bs_save2,CtxReg,{Ivar,Ivar}}|Bis0],
+ Bis = finish_select_binary(Bis1),
+ {Bis,Aft,St1}.
+
+finish_select_binary([{bs_save2,R,Point}=I,{bs_restore2,R,Point}|Is]) ->
+ [I|finish_select_binary(Is)];
+finish_select_binary([{bs_save2,R,Point}=I,{test,is_eq_exact,_,_}=Test,
+ {bs_restore2,R,Point}|Is]) ->
+ [I,Test|finish_select_binary(Is)];
+finish_select_binary([{test,bs_match_string,F,[Ctx,BinList]}|Is])
+ when is_list(BinList) ->
+ I = {test,bs_match_string,F,[Ctx,list_to_bitstring(BinList)]},
+ [I|finish_select_binary(Is)];
+finish_select_binary([I|Is]) ->
+ [I|finish_select_binary(Is)];
+finish_select_binary([]) -> [].
%% New instructions for selection of binary segments.