diff options
-rw-r--r-- | lib/compiler/src/sys_core_bsm.erl | 203 | ||||
-rw-r--r-- | lib/compiler/test/bs_match_SUITE.erl | 19 |
2 files changed, 88 insertions, 134 deletions
diff --git a/lib/compiler/src/sys_core_bsm.erl b/lib/compiler/src/sys_core_bsm.erl index 37e071fafa..65580f79e3 100644 --- a/lib/compiler/src/sys_core_bsm.erl +++ b/lib/compiler/src/sys_core_bsm.erl @@ -24,7 +24,7 @@ -export([module/2,format_error/1]). -include("core_parse.hrl"). --import(lists, [member/2,nth/2,reverse/1,usort/1]). +-import(lists, [member/2,reverse/1,usort/1]). -spec module(cerl:c_module(), [compile:option()]) -> {'ok', cerl:c_module()}. @@ -59,13 +59,6 @@ format_error(bin_opt_alias) -> format_error(bin_partition) -> "INFO: matching non-variables after a previous clause matching a variable " "will prevent delayed sub binary optimization"; -format_error(bin_left_var_used_in_guard) -> - "INFO: a variable to the left of the binary pattern is used in a guard; " - "will prevent delayed sub binary optimization"; -format_error(bin_argument_order) -> - "INFO: matching anything else but a plain variable to the left of " - "binary pattern will prevent delayed sub binary optimization; " - "SUGGEST changing argument order"; format_error(bin_var_used) -> "INFO: using a matched out sub binary will prevent " "delayed sub binary optimization"; @@ -96,46 +89,41 @@ bsm_an(#c_case{arg=#c_values{es=Es}}=Case) -> bsm_an(Other) -> {ok,Other}. -bsm_an_1(Vs, #c_case{clauses=Cs}=Case) -> - case bsm_leftmost(Cs) of - none -> {ok,Case}; - Pos -> bsm_an_2(Vs, Cs, Case, Pos) - end. - -bsm_an_2(Vs, Cs, Case, Pos) -> - case bsm_nonempty(Cs, Pos) of - true -> bsm_an_3(Vs, Cs, Case, Pos); - false -> {ok,Case} +bsm_an_1(Vs0, #c_case{clauses=Cs0}=Case) -> + case bsm_leftmost(Cs0) of + none -> + {ok,Case}; + 1 -> + bsm_an_2(Vs0, Cs0, Case); + Pos -> + Vs = move_from_col(Pos, Vs0), + Cs = [C#c_clause{pats=move_from_col(Pos, Ps)} || + #c_clause{pats=Ps}=C <- Cs0], + bsm_an_2(Vs, Cs, Case) end. -bsm_an_3(Vs, Cs, Case, Pos) -> +bsm_an_2(Vs, Cs, Case) -> try - bsm_ensure_no_partition(Cs, Pos), - {ok,bsm_do_an(Vs, Pos, Cs, Case)} + bsm_ensure_no_partition(Cs), + {ok,bsm_do_an(Vs, Cs, Case)} catch - throw:{problem,Where,What} -> - {ok,Case,{Where,What}} + throw:{problem,Where,What} -> + {ok,Case,{Where,What}} end. -bsm_do_an(Vs0, Pos, Cs0, Case) -> - case nth(Pos, Vs0) of - #c_var{name=Vname}=V0 -> - Cs = bsm_do_an_var(Vname, Pos, Cs0, []), - V = bsm_annotate_for_reuse(V0), - Bef = lists:sublist(Vs0, Pos-1), - Aft = lists:nthtail(Pos, Vs0), - case Bef ++ [V|Aft] of - [_] -> - Case#c_case{arg=V,clauses=Cs}; - Vs -> - Case#c_case{arg=#c_values{es=Vs},clauses=Cs} - end; - _ -> - Case - end. +move_from_col(Pos, L) -> + {First,[Col|Rest]} = lists:split(Pos - 1, L), + [Col|First] ++ Rest. -bsm_do_an_var(V, S, [#c_clause{pats=Ps,guard=G,body=B0}=C0|Cs], Acc) -> - case nth(S, Ps) of +bsm_do_an([#c_var{name=Vname}=V0|Vs0], Cs0, Case) -> + Cs = bsm_do_an_var(Vname, Cs0), + V = bsm_annotate_for_reuse(V0), + Vs = core_lib:make_values([V|Vs0]), + Case#c_case{arg=Vs,clauses=Cs}; +bsm_do_an(_Vs, _Cs, Case) -> Case. + +bsm_do_an_var(V, [#c_clause{pats=[P|_],guard=G,body=B0}=C0|Cs]) -> + case P of #c_var{name=VarName} -> case core_lib:is_var_used(V, G) of true -> bsm_problem(C0, orig_bin_var_used_in_guard); @@ -148,23 +136,23 @@ bsm_do_an_var(V, S, [#c_clause{pats=Ps,guard=G,body=B0}=C0|Cs], Acc) -> B1 = bsm_maybe_ctx_to_binary(VarName, B0), B = bsm_maybe_ctx_to_binary(V, B1), C = C0#c_clause{body=B}, - bsm_do_an_var(V, S, Cs, [C|Acc]); - #c_alias{}=P -> + [C|bsm_do_an_var(V, Cs)]; + #c_alias{} -> case bsm_could_match_binary(P) of false -> - bsm_do_an_var(V, S, Cs, [C0|Acc]); + [C0|bsm_do_an_var(V, Cs)]; true -> bsm_problem(C0, bin_opt_alias) end; - P -> + _ -> case bsm_could_match_binary(P) andalso bsm_is_var_used(V, G, B0) of false -> - bsm_do_an_var(V, S, Cs, [C0|Acc]); + [C0|bsm_do_an_var(V, Cs)]; true -> bsm_problem(C0, bin_var_used) end end; -bsm_do_an_var(_, _, [], Acc) -> reverse(Acc). +bsm_do_an_var(_, []) -> []. bsm_annotate_for_reuse(#c_var{anno=Anno}=Var) -> Var#c_var{anno=[reuse_for_context|Anno]}. @@ -192,131 +180,82 @@ previous_ctx_to_binary(V, Core) -> end. %% bsm_leftmost(Cs) -> none | ArgumentNumber -%% Find the leftmost argument that does binary matching. Return -%% the number of the argument (1-N). +%% Find the leftmost argument that matches a nonempty binary. +%% Return either 'none' or the argument number (1-N). bsm_leftmost(Cs) -> bsm_leftmost_1(Cs, none). +bsm_leftmost_1([_|_], 1) -> + 1; bsm_leftmost_1([#c_clause{pats=Ps}|Cs], Pos) -> bsm_leftmost_2(Ps, Cs, 1, Pos); bsm_leftmost_1([], Pos) -> Pos. bsm_leftmost_2(_, Cs, Pos, Pos) -> bsm_leftmost_1(Cs, Pos); -bsm_leftmost_2([#c_binary{}|_], Cs, N, _) -> +bsm_leftmost_2([#c_binary{segments=[_|_]}|_], Cs, N, _) -> bsm_leftmost_1(Cs, N); bsm_leftmost_2([_|Ps], Cs, N, Pos) -> bsm_leftmost_2(Ps, Cs, N+1, Pos); bsm_leftmost_2([], Cs, _, Pos) -> bsm_leftmost_1(Cs, Pos). -%% bsm_nonempty(Cs, Pos) -> true|false -%% Check if at least one of the clauses matches a non-empty -%% binary in the given argument position. +%% bsm_ensure_no_partition(Cs) -> ok (exception if problem) +%% There must only be a single bs_start_match2 instruction if we +%% are to reuse the binary variable for the match context. +%% +%% To make sure that there is only a single bs_start_match2 +%% instruction, we will check for partitions such as: %% -bsm_nonempty([#c_clause{pats=Ps}|Cs], Pos) -> - case nth(Pos, Ps) of - #c_binary{segments=[_|_]} -> - true; - _ -> - bsm_nonempty(Cs, Pos) - end; -bsm_nonempty([], _ ) -> false. - -%% bsm_ensure_no_partition(Cs, Pos) -> ok (exception if problem) -%% We must make sure that matching is not partitioned between -%% variables like this: %% foo(<<...>>) -> ... %% foo(<Variable>) when ... -> ... -%% foo(<Any non-variable pattern>) -> -%% If there is such partition, we are not allowed to reuse the binary variable -%% for the match context. +%% foo(<Non-variable pattern>) -> %% -%% Also, arguments to the left of the argument that is matched -%% against a binary, are only allowed to be simple variables, not -%% used in guards. The reason is that we must know that the binary is -%% only matched in one place (i.e. there must be only one bs_start_match2 -%% instruction emitted). +%% If there is such partition, we reject the optimization. -bsm_ensure_no_partition(Cs, Pos) -> - bsm_ensure_no_partition_1(Cs, Pos, before). +bsm_ensure_no_partition(Cs) -> + bsm_ensure_no_partition_1(Cs, before). %% Loop through each clause. -bsm_ensure_no_partition_1([#c_clause{pats=Ps,guard=G}|Cs], Pos, State0) -> - State = bsm_ensure_no_partition_2(Ps, Pos, G, simple_vars, State0), +bsm_ensure_no_partition_1([#c_clause{pats=Ps,guard=G}|Cs], State0) -> + State = bsm_ensure_no_partition_2(Ps, G, State0), case State of 'after' -> - bsm_ensure_no_partition_after(Cs, Pos); + bsm_ensure_no_partition_after(Cs); _ -> ok end, - bsm_ensure_no_partition_1(Cs, Pos, State); -bsm_ensure_no_partition_1([], _, _) -> ok. + bsm_ensure_no_partition_1(Cs, State); +bsm_ensure_no_partition_1([], _) -> ok. -%% Loop through each pattern for this clause. -bsm_ensure_no_partition_2([#c_binary{}=Where|_], 1, _, Vstate, State) -> - case State of - before when Vstate =:= simple_vars -> within; - before -> bsm_problem(Where, Vstate); - within when Vstate =:= simple_vars -> within; - within -> bsm_problem(Where, Vstate) - end; -bsm_ensure_no_partition_2([#c_alias{}=Alias|_], 1, N, Vstate, State) -> +bsm_ensure_no_partition_2([#c_binary{}|_], _, _State) -> + within; +bsm_ensure_no_partition_2([#c_alias{}=Alias|_], N, State) -> %% Retrieve the real pattern that the alias refers to and check that. P = bsm_real_pattern(Alias), - bsm_ensure_no_partition_2([P], 1, N, Vstate, State); -bsm_ensure_no_partition_2([_|_], 1, _, _Vstate, before=State) -> + bsm_ensure_no_partition_2([P], N, State); +bsm_ensure_no_partition_2([_|_], _, before=State) -> %% No binary matching yet - therefore no partition. State; -bsm_ensure_no_partition_2([P|_], 1, _, Vstate, State) -> +bsm_ensure_no_partition_2([P|_], _, State) -> case bsm_could_match_binary(P) of false -> - %% If clauses can be freely arranged (Vstate =:= simple_vars), - %% a clause that cannot match a binary will not partition the clause. - %% Example: - %% - %% a(Var, <<>>) -> ... - %% a(Var, []) -> ... - %% a(Var, <<B>>) -> ... - %% - %% But if the clauses can't be freely rearranged, as in - %% - %% b(Var, <<X>>) -> ... - %% b(1, 2) -> ... - %% - %% we do have a problem. - %% - case Vstate of - simple_vars -> State; - _ -> bsm_problem(P, Vstate) - end; + State; true -> %% The pattern P *may* match a binary, so we must update the state. %% (P must be a variable.) - case State of - within -> 'after'; - 'after' -> 'after' - end - end; -bsm_ensure_no_partition_2([#c_var{name=V}|Ps], N, G, Vstate, S) -> - case core_lib:is_var_used(V, G) of - false -> - bsm_ensure_no_partition_2(Ps, N-1, G, Vstate, S); - true -> - bsm_ensure_no_partition_2(Ps, N-1, G, bin_left_var_used_in_guard, S) - end; -bsm_ensure_no_partition_2([_|Ps], N, G, _, S) -> - bsm_ensure_no_partition_2(Ps, N-1, G, bin_argument_order, S). + 'after' + end. -bsm_ensure_no_partition_after([#c_clause{pats=Ps}=C|Cs], Pos) -> - case nth(Pos, Ps) of - #c_var{} -> - bsm_ensure_no_partition_after(Cs, Pos); - _ -> - bsm_problem(C, bin_partition) +bsm_ensure_no_partition_after([#c_clause{pats=Ps}=C|Cs]) -> + case Ps of + [#c_var{}|_] -> + bsm_ensure_no_partition_after(Cs); + _ -> + bsm_problem(C, bin_partition) end; -bsm_ensure_no_partition_after([], _) -> ok. +bsm_ensure_no_partition_after([]) -> ok. bsm_could_match_binary(#c_alias{pat=P}) -> bsm_could_match_binary(P); bsm_could_match_binary(#c_cons{}) -> false; diff --git a/lib/compiler/test/bs_match_SUITE.erl b/lib/compiler/test/bs_match_SUITE.erl index 7557d6d57b..4bd5e8e2e1 100644 --- a/lib/compiler/test/bs_match_SUITE.erl +++ b/lib/compiler/test/bs_match_SUITE.erl @@ -40,7 +40,7 @@ map_and_binary/1,unsafe_branch_caching/1, bad_literals/1,good_literals/1,constant_propagation/1, parse_xml/1,get_payload/1,escape/1,num_slots_different/1, - beam_bsm/1,guard/1,is_ascii/1]). + beam_bsm/1,guard/1,is_ascii/1,non_opt_eq/1]). -export([coverage_id/1,coverage_external_ignore/2]). @@ -73,7 +73,7 @@ groups() -> map_and_binary,unsafe_branch_caching, bad_literals,good_literals,constant_propagation,parse_xml, get_payload,escape,num_slots_different, - beam_bsm,guard,is_ascii]}]. + beam_bsm,guard,is_ascii,non_opt_eq]}]. init_per_suite(Config) -> @@ -1654,6 +1654,21 @@ do_is_ascii(<<C,_/binary>>) when C >= 16#80 -> do_is_ascii(<<_, T/binary>>) -> do_is_ascii(T). +non_opt_eq(_Config) -> + true = non_opt_eq([], <<>>), + true = non_opt_eq([$a], <<$a>>), + false = non_opt_eq([$a], <<$b>>), + ok. + +%% An example from the Efficiency Guide. It used to be not optimized, +%% but now it can be optimized. + +non_opt_eq([H|T1], <<H,T2/binary>>) -> + non_opt_eq(T1, T2); +non_opt_eq([_|_], <<_,_/binary>>) -> + false; +non_opt_eq([], <<>>) -> + true. check(F, R) -> R = F(). |