From 3ff0914b5625b8ef76e6ff5a2f2db1c563ebed3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Valim?= Date: Fri, 10 May 2019 14:05:40 +0200 Subject: Expand and squeeze literal integers/utf8 bin segments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds two operations when handling literal integers and literal utf8 segments in the v3_kernel pass. The first operation is to expand all literal integers with size more than 8 and literal utf8s into integers with size of 8 (and potentially an integer with size less than 8 at the end). This expansion simplifies the code in other operations inside v3_kernel and ensure they apply more consistently. For instance, literal binary matching now applies to both regular and utf8 strings. Furthermore, we can more efficiently group clauses. For instance, the following code: foo(<<$á/utf8, X/binary>>) -> foo(X); foo(<<$é/utf8, X/binary>>) -> foo(X); foo(<<>>) -> ok. Becomes a bs_get_integer_16 comparing 50089 and 50081, allowing us to skip the utf8 conversion at runtime. However, since expanding an integer of size 16 into two of size 8 can be less efficient when matching at runtime, later we do another pass, where we squeeze all of those integers together into an integer with maximum size of 24. This allows prefix matching, such as: foo(<<"aaaa", X/binary>>) -> foo(X); foo(<<"bbbb", X/binary>>) -> foo(X); foo(<<>>) -> ok. To run more than 2x faster (as long as all clauses match on a given prefix). Compilation times and binary size are roughly the same. --- lib/compiler/test/bs_match_SUITE.erl | 162 ++++++++++++++++++++++++++++++++++- 1 file changed, 160 insertions(+), 2 deletions(-) (limited to 'lib/compiler/test') diff --git a/lib/compiler/test/bs_match_SUITE.erl b/lib/compiler/test/bs_match_SUITE.erl index d97f49c56e..6c3b900d1f 100644 --- a/lib/compiler/test/bs_match_SUITE.erl +++ b/lib/compiler/test/bs_match_SUITE.erl @@ -24,7 +24,7 @@ -export([all/0, suite/0,groups/0,init_per_suite/1, end_per_suite/1, init_per_group/2,end_per_group/2, init_per_testcase/2,end_per_testcase/2, - verify_highest_opcode/1, + verify_highest_opcode/1, expand_and_squeeze/1, size_shadow/1,int_float/1,otp_5269/1,null_fields/1,wiger/1, bin_tail/1,save_restore/1, partitioned_bs_match/1,function_clause/1, @@ -63,7 +63,7 @@ groups() -> [{p,[], [verify_highest_opcode, size_shadow,int_float,otp_5269,null_fields,wiger, - bin_tail,save_restore, + bin_tail,save_restore,expand_and_squeeze, partitioned_bs_match,function_clause,unit, shared_sub_bins,bin_and_float,dec_subidentifiers, skip_optional_tag,decode_integer,wfbm,degenerated_match,bs_sum, @@ -2006,3 +2006,161 @@ do_matching_meets_apply(_Bin, {Handler, State}) -> Handler:abs(State). id(I) -> I. + +expand_and_squeeze(Config) when is_list(Config) -> + %% UTF8 literals are expanded and then squeezed into integer16 + [ + {test,bs_get_integer2,_,_,[_,{integer,16}|_],_} + | _ + ] = binary_match_to_asm([ + ?Q("<<$á/utf8,_/binary>>"), + ?Q("<<$é/utf8,_/binary>>") + ]), + + %% Sized integers are expanded and then squeezed into integer16 + [ + {test,bs_get_integer2,_,_,[_,{integer,16}|_],_} + | _ + ] = binary_match_to_asm([ + ?Q("<<0:32,_/binary>>"), + ?Q("<<\"bbbb\",_/binary>>") + ]), + + %% Groups of 8 bits are squeezed into integer16 + [ + {test,bs_get_integer2,_,_,[_,{integer,16}|_],_} + | _ + ] = binary_match_to_asm([ + ?Q("<<\"aaaa\",_/binary>>"), + ?Q("<<\"bbbb\",_/binary>>") + ]), + + %% Groups of 8 bits with empty binary are also squeezed + [ + {test,bs_get_integer2,_,_,[_,{integer,16}|_],_} + | _ + ] = binary_match_to_asm([ + ?Q("<<\"aaaa\",_/binary>>"), + ?Q("<<\"bbbb\",_/binary>>"), + ?Q("<<>>") + ]), + + %% Groups of 8 bits with float lookup are not squeezed + [ + {test,bs_get_integer2,_,_,[_,{integer,8}|_],_} + | _ + ] = binary_match_to_asm([ + ?Q("<<\"aaaa\",_/binary>>"), + ?Q("<<\"bbbb\",_/binary>>"), + ?Q("<<_/float>>") + ]), + + %% Groups of diverse bits go with minimum possible + [ + {test,bs_get_integer2,_,_,[_,{integer,8}|_],_} + | _ + ] = binary_match_to_asm([ + ?Q("<<\"aa\",_/binary>>"), + ?Q("<<\"bb\",_/binary>>"), + ?Q("<<\"c\",_/binary>>") + ]), + + %% Groups of diverse bits go with minimum possible but are recursive... + [ + {test,bs_get_integer2,_,_,[_,{integer,8}|_],_} + | RestDiverse + ] = binary_match_to_asm([ + ?Q("<<\"aaa\",_/binary>>"), + ?Q("<<\"abb\",_/binary>>"), + ?Q("<<\"c\",_/binary>>") + ]), + + %% so we still perform a 16 bits lookup for the remaining + true = lists:any(fun({test,bs_get_integer2,_,_,[_,{integer,16}|_],_}) -> true; + (_) -> false end, RestDiverse), + + %% Large match is kept as is if there is a sized match later + [ + {test,bs_get_integer2,_,_,[_,{integer,64}|_],_} + | _ + ] = binary_match_to_asm([ + ?Q("<<255,255,255,255,255,255,255,255>>"), + ?Q("<<_:64>>") + ]), + + %% Large match is kept as is with large matches before and after + [ + {test,bs_get_integer2,_,_,[_,{integer,32}|_],_} + | _ + ] = binary_match_to_asm([ + ?Q("<>"), + ?Q("<<0:32>>"), + ?Q("<<_:32>>") + ]), + + %% Large match is kept as is with large matches before and after + [ + {test,bs_get_integer2,_,_,[_,{integer,32}|_],_} + | _ + ] = binary_match_to_asm([ + ?Q("<>"), + ?Q("<<0,0,0,0>>"), + ?Q("<<_:32>>") + ]), + + %% Large match is kept as is with smaller but still large matches before and after + [ + {test,bs_get_integer2,_,_,[_,{integer,32}|_],_} + | _ + ] = binary_match_to_asm([ + ?Q("<>"), + ?Q("<<0:64>>"), + ?Q("<<_:32>>") + ]), + + %% There is no squeezing for groups with more than 16 matches + [ + {test,bs_get_integer2,_,_,[_,{integer,8}|_],_} + | _ + ] = binary_match_to_asm([ + ?Q("<<\"aa\", _/binary>>"), + ?Q("<<\"bb\", _/binary>>"), + ?Q("<<\"cc\", _/binary>>"), + ?Q("<<\"dd\", _/binary>>"), + ?Q("<<\"ee\", _/binary>>"), + ?Q("<<\"ff\", _/binary>>"), + ?Q("<<\"gg\", _/binary>>"), + ?Q("<<\"hh\", _/binary>>"), + ?Q("<<\"ii\", _/binary>>"), + ?Q("<<\"jj\", _/binary>>"), + ?Q("<<\"kk\", _/binary>>"), + ?Q("<<\"ll\", _/binary>>"), + ?Q("<<\"mm\", _/binary>>"), + ?Q("<<\"nn\", _/binary>>"), + ?Q("<<\"oo\", _/binary>>"), + ?Q("<<\"pp\", _/binary>>") + ]), + + ok. + +binary_match_to_asm(Matches) -> + Clauses = [ + begin + Ann = element(2, Match), + {clause,Ann,[Match],[],[{integer,Ann,Return}]} + end || {Match,Return} <- lists:zip(Matches, lists:seq(1, length(Matches))) + ], + + Module = [ + {attribute,1,module,match_to_asm}, + {attribute,2,export,[{example,1}]}, + {function,3,example,1,Clauses} + ], + + {ok,match_to_asm,{match_to_asm,_Exports,_Attrs,Funs,_},_} = + compile:forms(Module, [return, to_asm]), + + [{function,example,1,2,AllInstructions}|_] = Funs, + [{label,_},{line,_},{func_info,_,_,_},{label,_},{'%',_}, + {test,bs_start_match3,_,_,_,_},{bs_get_position,_,_,_}|Instructions] = AllInstructions, + Instructions. -- cgit v1.2.3