diff options
author | Björn Gustavsson <[email protected]> | 2012-08-16 14:36:55 +0200 |
---|---|---|
committer | Björn Gustavsson <[email protected]> | 2012-10-09 15:24:38 +0200 |
commit | b68297d347a9a041854410a77861982b1d0861d2 (patch) | |
tree | c9f6c9ef206f0bfcc7215ac3bd1e4e7265382391 | |
parent | b76588fb5a4057dce8c26307e497370a33217a44 (diff) | |
download | otp-b68297d347a9a041854410a77861982b1d0861d2.tar.gz otp-b68297d347a9a041854410a77861982b1d0861d2.tar.bz2 otp-b68297d347a9a041854410a77861982b1d0861d2.zip |
Improve binary matching of literals
The bs_match_string instruction is used to speed up matching of
binary literals. For example, given this source code:
foo1(<<1,2,3>>) -> ok.
The matching part of the code will look like:
{test,bs_start_match2,{f,1},1,[{x,0},0],{x,0}}.
{test,bs_match_string,{f,3},[{x,0},24,{string,[1,2,3]}]}.
{test,bs_test_tail2,{f,3},[{x,0},0]}.
Nice. However, if we do a simple change to the source code:
foo2(<<1,2,3>>) -> ok;
foo2(<<>>) -> error.
the resulting matching code will look like (sligthly simplified):
{test,bs_start_match2,{f,4},1,[{x,0},0],{x,0}}.
{test,bs_get_integer2,{f,7},1,[{x,0},{integer,8},1,Flags],{x,1}}.
{test,is_eq_exact,{f,8},[{x,1},{integer,1}]}.
{test,bs_match_string,{f,6},[{x,0},16,{string,[2,3]}]}.
{test,bs_test_tail2,{f,6},[{x,0},0]}.
{move,{atom,ok},{x,0}}.
return.
{label,6}.
{bs_restore2,{x,0},{atom,start}}.
{label,7}.
{test,bs_test_tail2,{f,8},[{x,0},0]}.
That is, matching of the first byte is not combined into the
bs_match_string instruction that follows.
Fix this problem by allowing a bs_match_string instruction to be
used if all clauses will match either the same integer literal or
the empty binary.
-rw-r--r-- | lib/compiler/src/beam_dead.erl | 2 | ||||
-rw-r--r-- | lib/compiler/src/v3_kernel.erl | 41 | ||||
-rw-r--r-- | lib/compiler/test/compilation_SUITE.erl | 2 |
3 files changed, 41 insertions, 4 deletions
diff --git a/lib/compiler/src/beam_dead.erl b/lib/compiler/src/beam_dead.erl index 5f12a98f09..cf6272a2ff 100644 --- a/lib/compiler/src/beam_dead.erl +++ b/lib/compiler/src/beam_dead.erl @@ -527,6 +527,8 @@ count_bits_matched([{test,_,_,_,[_,Sz,U,{field_flags,_}],_}|Is], SavePoint, Bits {integer,N} -> count_bits_matched(Is, SavePoint, Bits+N*U); _ -> count_bits_matched(Is, SavePoint, Bits) end; +count_bits_matched([{test,bs_match_string,_,[_,Bits,_]}|Is], SavePoint, Bits0) -> + count_bits_matched(Is, SavePoint, Bits0+Bits); count_bits_matched([{test,_,_,_}|Is], SavePoint, Bits) -> count_bits_matched(Is, SavePoint, Bits); count_bits_matched([{bs_save2,Reg,SavePoint}|_], {Reg,SavePoint}, Bits) -> diff --git a/lib/compiler/src/v3_kernel.erl b/lib/compiler/src/v3_kernel.erl index b184987625..b69ad416db 100644 --- a/lib/compiler/src/v3_kernel.erl +++ b/lib/compiler/src/v3_kernel.erl @@ -81,7 +81,7 @@ -export([module/2,format_error/1]). -import(lists, [map/2,foldl/3,foldr/3,mapfoldl/3,splitwith/2,member/2, - keymember/3,keyfind/3]). + keymember/3,keyfind/3,partition/2]). -import(ordsets, [add_element/2,del_element/2,union/2,union/1,subtract/2]). -import(cerl, [c_tuple/1]). @@ -1081,9 +1081,44 @@ select_bin_con(Cs0) -> end, Cs0), select_bin_con_1(Cs1). + select_bin_con_1(Cs) -> try - select_bin_int(Cs) + %% The usual way to match literals is to first extract the + %% value to a register, and then compare the register to the + %% literal value. Extracting the value is good if we need + %% compare it more than once. + %% + %% But we would like to combine the extracting and the + %% comparing into a single instruction if we know that + %% a binary segment must contain specific integer value + %% or the matching will fail, like in this example: + %% + %% <<42:8,...>> -> + %% <<42:8,...>> -> + %% . + %% . + %% . + %% <<42:8,...>> -> + %% <<>> -> + %% + %% The first segment must either contain the integer 42 + %% or the binary must end for the match to succeed. + %% + %% The way we do is to replace the generic #k_bin_seg{} + %% record with a #k_bin_int{} record if all clauses will + %% select the same literal integer (except for one or more + %% clauses that will end the binary). + + {BinSegs0,BinEnd} = + partition(fun (C) -> + clause_con(C) =:= k_bin_seg + end, Cs), + BinSegs = select_bin_int(BinSegs0), + case BinEnd of + [] -> BinSegs; + [_|_] -> BinSegs ++ [{k_bin_end,BinEnd}] + end catch throw:not_possible -> select_bin_con_2(Cs) @@ -1097,7 +1132,7 @@ select_bin_con_2([]) -> []. %% select_bin_int([Clause]) -> {k_bin_int,[Clause]} %% If the first pattern in each clause selects the same integer, -%% rewrite all clauses to use #k_bin_int{} (which will later to +%% rewrite all clauses to use #k_bin_int{} (which will later be %% translated to a bs_match_string/4 instruction). %% %% If it is not possible to do this rewrite, a 'not_possible' diff --git a/lib/compiler/test/compilation_SUITE.erl b/lib/compiler/test/compilation_SUITE.erl index fed7bec7d4..34f105b5fc 100644 --- a/lib/compiler/test/compilation_SUITE.erl +++ b/lib/compiler/test/compilation_SUITE.erl @@ -623,7 +623,7 @@ string_table(Config) when is_list(Config) -> ?line File = filename:join(DataDir, "string_table.erl"), ?line {ok,string_table,Beam,[]} = compile:file(File, [return, binary]), ?line {ok,{string_table,[StringTableChunk]}} = beam_lib:chunks(Beam, ["StrT"]), - ?line {"StrT", <<"stringabletringtable">>} = StringTableChunk, + ?line {"StrT", <<"stringtable">>} = StringTableChunk, ok. otp_8949_a(Config) when is_list(Config) -> |