From b68297d347a9a041854410a77861982b1d0861d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Thu, 16 Aug 2012 14:36:55 +0200 Subject: Improve binary matching of literals The bs_match_string instruction is used to speed up matching of binary literals. For example, given this source code: foo1(<<1,2,3>>) -> ok. The matching part of the code will look like: {test,bs_start_match2,{f,1},1,[{x,0},0],{x,0}}. {test,bs_match_string,{f,3},[{x,0},24,{string,[1,2,3]}]}. {test,bs_test_tail2,{f,3},[{x,0},0]}. Nice. However, if we do a simple change to the source code: foo2(<<1,2,3>>) -> ok; foo2(<<>>) -> error. the resulting matching code will look like (sligthly simplified): {test,bs_start_match2,{f,4},1,[{x,0},0],{x,0}}. {test,bs_get_integer2,{f,7},1,[{x,0},{integer,8},1,Flags],{x,1}}. {test,is_eq_exact,{f,8},[{x,1},{integer,1}]}. {test,bs_match_string,{f,6},[{x,0},16,{string,[2,3]}]}. {test,bs_test_tail2,{f,6},[{x,0},0]}. {move,{atom,ok},{x,0}}. return. {label,6}. {bs_restore2,{x,0},{atom,start}}. {label,7}. {test,bs_test_tail2,{f,8},[{x,0},0]}. That is, matching of the first byte is not combined into the bs_match_string instruction that follows. Fix this problem by allowing a bs_match_string instruction to be used if all clauses will match either the same integer literal or the empty binary. --- lib/compiler/src/v3_kernel.erl | 41 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 38 insertions(+), 3 deletions(-) (limited to 'lib/compiler/src/v3_kernel.erl') diff --git a/lib/compiler/src/v3_kernel.erl b/lib/compiler/src/v3_kernel.erl index b184987625..b69ad416db 100644 --- a/lib/compiler/src/v3_kernel.erl +++ b/lib/compiler/src/v3_kernel.erl @@ -81,7 +81,7 @@ -export([module/2,format_error/1]). -import(lists, [map/2,foldl/3,foldr/3,mapfoldl/3,splitwith/2,member/2, - keymember/3,keyfind/3]). + keymember/3,keyfind/3,partition/2]). -import(ordsets, [add_element/2,del_element/2,union/2,union/1,subtract/2]). -import(cerl, [c_tuple/1]). @@ -1081,9 +1081,44 @@ select_bin_con(Cs0) -> end, Cs0), select_bin_con_1(Cs1). + select_bin_con_1(Cs) -> try - select_bin_int(Cs) + %% The usual way to match literals is to first extract the + %% value to a register, and then compare the register to the + %% literal value. Extracting the value is good if we need + %% compare it more than once. + %% + %% But we would like to combine the extracting and the + %% comparing into a single instruction if we know that + %% a binary segment must contain specific integer value + %% or the matching will fail, like in this example: + %% + %% <<42:8,...>> -> + %% <<42:8,...>> -> + %% . + %% . + %% . + %% <<42:8,...>> -> + %% <<>> -> + %% + %% The first segment must either contain the integer 42 + %% or the binary must end for the match to succeed. + %% + %% The way we do is to replace the generic #k_bin_seg{} + %% record with a #k_bin_int{} record if all clauses will + %% select the same literal integer (except for one or more + %% clauses that will end the binary). + + {BinSegs0,BinEnd} = + partition(fun (C) -> + clause_con(C) =:= k_bin_seg + end, Cs), + BinSegs = select_bin_int(BinSegs0), + case BinEnd of + [] -> BinSegs; + [_|_] -> BinSegs ++ [{k_bin_end,BinEnd}] + end catch throw:not_possible -> select_bin_con_2(Cs) @@ -1097,7 +1132,7 @@ select_bin_con_2([]) -> []. %% select_bin_int([Clause]) -> {k_bin_int,[Clause]} %% If the first pattern in each clause selects the same integer, -%% rewrite all clauses to use #k_bin_int{} (which will later to +%% rewrite all clauses to use #k_bin_int{} (which will later be %% translated to a bs_match_string/4 instruction). %% %% If it is not possible to do this rewrite, a 'not_possible' -- cgit v1.2.3