aboutsummaryrefslogtreecommitdiffstats
path: root/lib/compiler/src
diff options
context:
space:
mode:
authorBjörn Gustavsson <[email protected]>2012-08-16 14:36:55 +0200
committerBjörn Gustavsson <[email protected]>2012-10-09 15:24:38 +0200
commitb68297d347a9a041854410a77861982b1d0861d2 (patch)
treec9f6c9ef206f0bfcc7215ac3bd1e4e7265382391 /lib/compiler/src
parentb76588fb5a4057dce8c26307e497370a33217a44 (diff)
downloadotp-b68297d347a9a041854410a77861982b1d0861d2.tar.gz
otp-b68297d347a9a041854410a77861982b1d0861d2.tar.bz2
otp-b68297d347a9a041854410a77861982b1d0861d2.zip
Improve binary matching of literals
The bs_match_string instruction is used to speed up matching of binary literals. For example, given this source code: foo1(<<1,2,3>>) -> ok. The matching part of the code will look like: {test,bs_start_match2,{f,1},1,[{x,0},0],{x,0}}. {test,bs_match_string,{f,3},[{x,0},24,{string,[1,2,3]}]}. {test,bs_test_tail2,{f,3},[{x,0},0]}. Nice. However, if we do a simple change to the source code: foo2(<<1,2,3>>) -> ok; foo2(<<>>) -> error. the resulting matching code will look like (sligthly simplified): {test,bs_start_match2,{f,4},1,[{x,0},0],{x,0}}. {test,bs_get_integer2,{f,7},1,[{x,0},{integer,8},1,Flags],{x,1}}. {test,is_eq_exact,{f,8},[{x,1},{integer,1}]}. {test,bs_match_string,{f,6},[{x,0},16,{string,[2,3]}]}. {test,bs_test_tail2,{f,6},[{x,0},0]}. {move,{atom,ok},{x,0}}. return. {label,6}. {bs_restore2,{x,0},{atom,start}}. {label,7}. {test,bs_test_tail2,{f,8},[{x,0},0]}. That is, matching of the first byte is not combined into the bs_match_string instruction that follows. Fix this problem by allowing a bs_match_string instruction to be used if all clauses will match either the same integer literal or the empty binary.
Diffstat (limited to 'lib/compiler/src')
-rw-r--r--lib/compiler/src/beam_dead.erl2
-rw-r--r--lib/compiler/src/v3_kernel.erl41
2 files changed, 40 insertions, 3 deletions
diff --git a/lib/compiler/src/beam_dead.erl b/lib/compiler/src/beam_dead.erl
index 5f12a98f09..cf6272a2ff 100644
--- a/lib/compiler/src/beam_dead.erl
+++ b/lib/compiler/src/beam_dead.erl
@@ -527,6 +527,8 @@ count_bits_matched([{test,_,_,_,[_,Sz,U,{field_flags,_}],_}|Is], SavePoint, Bits
{integer,N} -> count_bits_matched(Is, SavePoint, Bits+N*U);
_ -> count_bits_matched(Is, SavePoint, Bits)
end;
+count_bits_matched([{test,bs_match_string,_,[_,Bits,_]}|Is], SavePoint, Bits0) ->
+ count_bits_matched(Is, SavePoint, Bits0+Bits);
count_bits_matched([{test,_,_,_}|Is], SavePoint, Bits) ->
count_bits_matched(Is, SavePoint, Bits);
count_bits_matched([{bs_save2,Reg,SavePoint}|_], {Reg,SavePoint}, Bits) ->
diff --git a/lib/compiler/src/v3_kernel.erl b/lib/compiler/src/v3_kernel.erl
index b184987625..b69ad416db 100644
--- a/lib/compiler/src/v3_kernel.erl
+++ b/lib/compiler/src/v3_kernel.erl
@@ -81,7 +81,7 @@
-export([module/2,format_error/1]).
-import(lists, [map/2,foldl/3,foldr/3,mapfoldl/3,splitwith/2,member/2,
- keymember/3,keyfind/3]).
+ keymember/3,keyfind/3,partition/2]).
-import(ordsets, [add_element/2,del_element/2,union/2,union/1,subtract/2]).
-import(cerl, [c_tuple/1]).
@@ -1081,9 +1081,44 @@ select_bin_con(Cs0) ->
end, Cs0),
select_bin_con_1(Cs1).
+
select_bin_con_1(Cs) ->
try
- select_bin_int(Cs)
+ %% The usual way to match literals is to first extract the
+ %% value to a register, and then compare the register to the
+ %% literal value. Extracting the value is good if we need
+ %% compare it more than once.
+ %%
+ %% But we would like to combine the extracting and the
+ %% comparing into a single instruction if we know that
+ %% a binary segment must contain specific integer value
+ %% or the matching will fail, like in this example:
+ %%
+ %% <<42:8,...>> ->
+ %% <<42:8,...>> ->
+ %% .
+ %% .
+ %% .
+ %% <<42:8,...>> ->
+ %% <<>> ->
+ %%
+ %% The first segment must either contain the integer 42
+ %% or the binary must end for the match to succeed.
+ %%
+ %% The way we do is to replace the generic #k_bin_seg{}
+ %% record with a #k_bin_int{} record if all clauses will
+ %% select the same literal integer (except for one or more
+ %% clauses that will end the binary).
+
+ {BinSegs0,BinEnd} =
+ partition(fun (C) ->
+ clause_con(C) =:= k_bin_seg
+ end, Cs),
+ BinSegs = select_bin_int(BinSegs0),
+ case BinEnd of
+ [] -> BinSegs;
+ [_|_] -> BinSegs ++ [{k_bin_end,BinEnd}]
+ end
catch
throw:not_possible ->
select_bin_con_2(Cs)
@@ -1097,7 +1132,7 @@ select_bin_con_2([]) -> [].
%% select_bin_int([Clause]) -> {k_bin_int,[Clause]}
%% If the first pattern in each clause selects the same integer,
-%% rewrite all clauses to use #k_bin_int{} (which will later to
+%% rewrite all clauses to use #k_bin_int{} (which will later be
%% translated to a bs_match_string/4 instruction).
%%
%% If it is not possible to do this rewrite, a 'not_possible'