aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjörn Gustavsson <[email protected]>2012-08-17 12:11:47 +0200
committerBjörn Gustavsson <[email protected]>2012-10-09 15:24:38 +0200
commitb76588fb5a4057dce8c26307e497370a33217a44 (patch)
treec51b3b493b466c2308626a844606a5ea63b22d80
parent34afddc135ff25cefa75aee70b8d09186f78085e (diff)
downloadotp-b76588fb5a4057dce8c26307e497370a33217a44.tar.gz
otp-b76588fb5a4057dce8c26307e497370a33217a44.tar.bz2
otp-b76588fb5a4057dce8c26307e497370a33217a44.zip
v3_codegen: Combine adjacent bs_match_string instructions
In modules with huge functions with many bs_match_string instructions, we can speed up the compilation by combining adjacent bs_match_strings instruction in v3_codegen (as opposed to in beam_block where we used to do it). For instance, on my computer the v3_codegen became more than twice as fast when compiling the re_testoutput1_split_test module in the STDLIB test suites.
-rw-r--r--lib/compiler/src/beam_block.erl4
-rw-r--r--lib/compiler/src/v3_codegen.erl28
2 files changed, 26 insertions, 6 deletions
diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl
index cd568097fa..109bb1ec0e 100644
--- a/lib/compiler/src/beam_block.erl
+++ b/lib/compiler/src/beam_block.erl
@@ -615,10 +615,6 @@ bsm_opt_2([{test,bs_skip_bits2,F,[Ctx,{integer,I1},Unit1,_]}|Is],
[{test,bs_skip_bits2,F,[Ctx,{integer,I2},Unit2,Flags]}|Acc]) ->
bsm_opt_2(Is, [{test,bs_skip_bits2,F,
[Ctx,{integer,I1*Unit1+I2*Unit2},1,Flags]}|Acc]);
-bsm_opt_2([{test,bs_match_string,F,[Ctx,Bin1]},
- {test,bs_match_string,F,[Ctx,Bin2]}|Is], Acc) ->
- I = {test,bs_match_string,F,[Ctx,<<Bin1/bitstring,Bin2/bitstring>>]},
- bsm_opt_2([I|Is], Acc);
bsm_opt_2([I|Is], Acc) ->
bsm_opt_2(Is, [I|Acc]);
bsm_opt_2([], Acc) -> reverse(Acc).
diff --git a/lib/compiler/src/v3_codegen.erl b/lib/compiler/src/v3_codegen.erl
index 84a1e185ea..3b73269545 100644
--- a/lib/compiler/src/v3_codegen.erl
+++ b/lib/compiler/src/v3_codegen.erl
@@ -123,15 +123,24 @@ cg_fun(Les, Hvs, Vdb, AtomMod, NameArity, Anno, St0) ->
put_reg(V, Reg)
end, [], Hvs),
stk=[]}, 0, Vdb),
- {B,_Aft,St} = cg_list(Les, 0, Vdb, Bef,
+ {B0,_Aft,St} = cg_list(Les, 0, Vdb, Bef,
St3#cg{bfail=0,
ultimate_failure=UltimateMatchFail,
is_top_block=true}),
+ B = fix_bs_match_strings(B0),
{Name,Arity} = NameArity,
Asm = [{label,Fi},line(Anno),{func_info,AtomMod,{atom,Name},Arity},
{label,Fl}|B++[{label,UltimateMatchFail},if_end]],
{Asm,Fl,St}.
+fix_bs_match_strings([{test,bs_match_string,F,[Ctx,BinList]}|Is])
+ when is_list(BinList) ->
+ I = {test,bs_match_string,F,[Ctx,list_to_bitstring(BinList)]},
+ [I|fix_bs_match_strings(Is)];
+fix_bs_match_strings([I|Is]) ->
+ [I|fix_bs_match_strings(Is)];
+fix_bs_match_strings([]) -> [].
+
%% cg(Lkexpr, Vdb, StackReg, State) -> {[Ainstr],StackReg,State}.
%% Generate code for a kexpr.
%% Split function into two steps for clarity, not efficiency.
@@ -713,7 +722,22 @@ select_bin_seg(#l{ke={val_clause,{bin_int,Ctx,Sz,U,Fs,Val,Es},B},i=I,vdb=Vdb},
I, Vdb, Bef, Ctx, St0),
{Bis,Aft,St2} = match_cg(B, Fail, Int, St1),
CtxReg = fetch_var(Ctx, Bef),
- {[{bs_restore2,CtxReg,{Ctx,Ivar}}|Mis] ++ Bis,Aft,St2}.
+ Is = case Mis ++ Bis of
+ [{test,bs_match_string,F,[OtherCtx,Bin1]},
+ {bs_save2,OtherCtx,_},
+ {bs_restore2,OtherCtx,_},
+ {test,bs_match_string,F,[OtherCtx,Bin2]}|Is0] ->
+ %% We used to do this optimization later, but it
+ %% turns out that in huge functions with many
+ %% bs_match_string instructions, it's a big win
+ %% to do the combination now. To avoid copying the
+ %% binary data again and again, we'll combine bitstrings
+ %% in a list and convert all of it to a bitstring later.
+ [{test,bs_match_string,F,[OtherCtx,[Bin1,Bin2]]}|Is0];
+ Is0 ->
+ Is0
+ end,
+ {[{bs_restore2,CtxReg,{Ctx,Ivar}}|Is],Aft,St2}.
select_extract_int([{var,Tl}], Val, {integer,Sz}, U, Fs, Vf,
I, Vdb, Bef, Ctx, St) ->