diff options
author | Björn Gustavsson <[email protected]> | 2012-08-17 12:11:47 +0200 |
---|---|---|
committer | Björn Gustavsson <[email protected]> | 2012-10-09 15:24:38 +0200 |
commit | b76588fb5a4057dce8c26307e497370a33217a44 (patch) | |
tree | c51b3b493b466c2308626a844606a5ea63b22d80 | |
parent | 34afddc135ff25cefa75aee70b8d09186f78085e (diff) | |
download | otp-b76588fb5a4057dce8c26307e497370a33217a44.tar.gz otp-b76588fb5a4057dce8c26307e497370a33217a44.tar.bz2 otp-b76588fb5a4057dce8c26307e497370a33217a44.zip |
v3_codegen: Combine adjacent bs_match_string instructions
In modules with huge functions with many bs_match_string
instructions, we can speed up the compilation by combining
adjacent bs_match_strings instruction in v3_codegen (as opposed
to in beam_block where we used to do it).
For instance, on my computer the v3_codegen became more than
twice as fast when compiling the re_testoutput1_split_test module
in the STDLIB test suites.
-rw-r--r-- | lib/compiler/src/beam_block.erl | 4 | ||||
-rw-r--r-- | lib/compiler/src/v3_codegen.erl | 28 |
2 files changed, 26 insertions, 6 deletions
diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl index cd568097fa..109bb1ec0e 100644 --- a/lib/compiler/src/beam_block.erl +++ b/lib/compiler/src/beam_block.erl @@ -615,10 +615,6 @@ bsm_opt_2([{test,bs_skip_bits2,F,[Ctx,{integer,I1},Unit1,_]}|Is], [{test,bs_skip_bits2,F,[Ctx,{integer,I2},Unit2,Flags]}|Acc]) -> bsm_opt_2(Is, [{test,bs_skip_bits2,F, [Ctx,{integer,I1*Unit1+I2*Unit2},1,Flags]}|Acc]); -bsm_opt_2([{test,bs_match_string,F,[Ctx,Bin1]}, - {test,bs_match_string,F,[Ctx,Bin2]}|Is], Acc) -> - I = {test,bs_match_string,F,[Ctx,<<Bin1/bitstring,Bin2/bitstring>>]}, - bsm_opt_2([I|Is], Acc); bsm_opt_2([I|Is], Acc) -> bsm_opt_2(Is, [I|Acc]); bsm_opt_2([], Acc) -> reverse(Acc). diff --git a/lib/compiler/src/v3_codegen.erl b/lib/compiler/src/v3_codegen.erl index 84a1e185ea..3b73269545 100644 --- a/lib/compiler/src/v3_codegen.erl +++ b/lib/compiler/src/v3_codegen.erl @@ -123,15 +123,24 @@ cg_fun(Les, Hvs, Vdb, AtomMod, NameArity, Anno, St0) -> put_reg(V, Reg) end, [], Hvs), stk=[]}, 0, Vdb), - {B,_Aft,St} = cg_list(Les, 0, Vdb, Bef, + {B0,_Aft,St} = cg_list(Les, 0, Vdb, Bef, St3#cg{bfail=0, ultimate_failure=UltimateMatchFail, is_top_block=true}), + B = fix_bs_match_strings(B0), {Name,Arity} = NameArity, Asm = [{label,Fi},line(Anno),{func_info,AtomMod,{atom,Name},Arity}, {label,Fl}|B++[{label,UltimateMatchFail},if_end]], {Asm,Fl,St}. +fix_bs_match_strings([{test,bs_match_string,F,[Ctx,BinList]}|Is]) + when is_list(BinList) -> + I = {test,bs_match_string,F,[Ctx,list_to_bitstring(BinList)]}, + [I|fix_bs_match_strings(Is)]; +fix_bs_match_strings([I|Is]) -> + [I|fix_bs_match_strings(Is)]; +fix_bs_match_strings([]) -> []. + %% cg(Lkexpr, Vdb, StackReg, State) -> {[Ainstr],StackReg,State}. %% Generate code for a kexpr. %% Split function into two steps for clarity, not efficiency. @@ -713,7 +722,22 @@ select_bin_seg(#l{ke={val_clause,{bin_int,Ctx,Sz,U,Fs,Val,Es},B},i=I,vdb=Vdb}, I, Vdb, Bef, Ctx, St0), {Bis,Aft,St2} = match_cg(B, Fail, Int, St1), CtxReg = fetch_var(Ctx, Bef), - {[{bs_restore2,CtxReg,{Ctx,Ivar}}|Mis] ++ Bis,Aft,St2}. + Is = case Mis ++ Bis of + [{test,bs_match_string,F,[OtherCtx,Bin1]}, + {bs_save2,OtherCtx,_}, + {bs_restore2,OtherCtx,_}, + {test,bs_match_string,F,[OtherCtx,Bin2]}|Is0] -> + %% We used to do this optimization later, but it + %% turns out that in huge functions with many + %% bs_match_string instructions, it's a big win + %% to do the combination now. To avoid copying the + %% binary data again and again, we'll combine bitstrings + %% in a list and convert all of it to a bitstring later. + [{test,bs_match_string,F,[OtherCtx,[Bin1,Bin2]]}|Is0]; + Is0 -> + Is0 + end, + {[{bs_restore2,CtxReg,{Ctx,Ivar}}|Is],Aft,St2}. select_extract_int([{var,Tl}], Val, {integer,Sz}, U, Fs, Vf, I, Vdb, Bef, Ctx, St) -> |