aboutsummaryrefslogblamecommitdiffstats
path: root/lib/compiler/src/beam_ssa_bsm.erl
blob: 1ac9e6a3bbac24b0f43c41ac2a5a98cbf2304559 (plain) (tree)













































































































































































































































































































                                                                               

                                                   













































                                                                              
                                                  





                                                                   




                                                                  




































                                                                           



                                                                           





                                                         





                                                             































                                                                              
                                                           






                                                                   
                                               






































                                                                               
                                                                




























































































































































































                                                                                  



                                                                             
                                                
                                                                  




























































                                                                            



                                                      
































































































































                                                                                  

                                                                             


































































































































































                                                                                
%%
%% %CopyrightBegin%
%%
%% Copyright Ericsson AB 2018. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%%     http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%
%% %CopyrightEnd%
%%

%%%
%%% This pass optimizes bit syntax matching, and is centered around the concept
%%% of "match context reuse" which is best explained through example. To put it
%%% shortly we attempt to turn this:
%%%
%%%    <<0,B/bits>> = A,
%%%    <<1,C/bits>> = B,
%%%    <<D,_/bits>> = C,
%%%    D.
%%%
%%% ... Into this:
%%%
%%%    <<0,1,D,_/bits>>=A,
%%%    D.
%%%
%%% Which is much faster as it avoids the creation of intermediate terms. This
%%% is especially noticeable in loops where such garbage is generated on each
%%% iteration.
%%%
%%% The optimization itself is very simple and can be applied whenever there's
%%% matching on the tail end of a binary; instead of creating a new binary and
%%% starting a new match context on it, we reuse the match context used to
%%% extract the tail and avoid the creation of both objects.
%%%
%%% The catch is that a match context isn't a proper type and nothing outside
%%% of bit syntax match operations can handle them. We therefore need to make
%%% sure that they never "leak" into other instructions, and most of the pass
%%% revolves around getting around that limitation.
%%%
%%% Unlike most other passes we look at the whole module so we can combine
%%% matches across function boundaries, greatly increasing the performance of
%%% complex matches and loops.
%%%

-module(beam_ssa_bsm).

-export([module/2, format_error/1]).

-include("beam_ssa.hrl").

-import(lists, [member/2, reverse/1, splitwith/2, map/2, foldl/3, mapfoldl/3,
                nth/2, max/1, unzip/1]).

-spec format_error(term()) -> nonempty_string().

format_error(OptInfo) ->
    format_opt_info(OptInfo).

-spec module(Module, Options) -> Result when
      Module :: beam_ssa:b_module(),
      Options :: [compile:option()],
      Result :: {ok, beam_ssa:b_module(), list()}.

-define(PASS(N), {N,fun N/1}).

module(#b_module{body=Fs0}=Module, Opts) ->
    ModInfo = analyze_module(Module),

    %% combine_matches is repeated after accept_context_args as the control
    %% flow changes can enable further optimizations, as in the example below:
    %%
    %%    a(<<0,X/binary>>) -> a(X);
    %%    a(A) when bit_size(A) =:= 52 -> bar;
    %%    a(<<1,X/binary>>) -> X. %% Match context will be reused here when
    %%                            %% when repeated.

    {Fs, _} = compile:run_sub_passes(
                [?PASS(combine_matches),
                 ?PASS(accept_context_args),
                 ?PASS(combine_matches),
                 ?PASS(allow_context_passthrough),
                 ?PASS(skip_outgoing_tail_extraction),
                 ?PASS(annotate_context_parameters)],
                {Fs0, ModInfo}),

    Ws = case proplists:get_bool(bin_opt_info, Opts) of
             true -> collect_opt_info(Fs);
             false -> []
         end,

    {ok, Module#b_module{body=Fs}, Ws}.

-type module_info() :: #{ func_id() => func_info() }.

-type func_id() :: {Name :: atom(), Arity :: non_neg_integer()}.

-type func_info() :: #{ has_bsm_ops => boolean(),
                        parameters => [#b_var{}],
                        parameter_info => #{ #b_var{} => param_info() } }.

-type param_info() :: suitable_for_reuse |
                      {Problem :: atom(), Where :: term()}.

-spec analyze_module(#b_module{}) -> module_info().

analyze_module(#b_module{body=Fs}) ->
    foldl(fun(#b_function{args=Parameters}=F, I) ->
                  FuncInfo = #{ has_bsm_ops => has_bsm_ops(F),
                                parameters => Parameters,
                                parameter_info => #{} },
                  FuncId = get_fa(F),
                  I#{ FuncId => FuncInfo }
          end, #{}, Fs).

has_bsm_ops(#b_function{bs=Blocks}) ->
    hbo_blocks(maps:to_list(Blocks)).

hbo_blocks([{_,#b_blk{is=Is}} | Blocks]) ->
    case hbo_is(Is) of
        false -> hbo_blocks(Blocks);
        true -> true
    end;
hbo_blocks([]) ->
    false.

hbo_is([#b_set{op=bs_start_match} | _]) -> true;
hbo_is([_I | Is]) -> hbo_is(Is);
hbo_is([]) -> false.

%% Checks whether it's legal to make a call with the given argument as a match
%% context, returning the param_info() of the relevant parameter.
-spec check_context_call(#b_set{}, Arg, CtxChain, ModInfo) -> param_info() when
      Arg :: #b_var{},
      CtxChain :: [#b_var{}],
      ModInfo :: module_info().
check_context_call(#b_set{args=Args}, Arg, CtxChain, ModInfo) ->
    Aliases = [Arg | CtxChain],
    ccc_1(Args, Arg, Aliases, ModInfo).

ccc_1([#b_local{}=Call | Args], Ctx, Aliases, ModInfo) ->
    %% Matching operations assume that their context isn't aliased (as in
    %% pointer aliasing), so we must reject calls whose arguments contain more
    %% than one reference to the context.
    %%
    %% TODO: Try to fall back to passing binaries in these cases. Partial reuse
    %% is better than nothing.
    UseCount = foldl(fun(Arg, C) ->
                             case member(Arg, Aliases) of
                                 true -> C + 1;
                                 false -> C
                             end
                     end, 0, Args),
    if
        UseCount =:= 1 ->
            #b_local{name=#b_literal{val=Name},arity=Arity} = Call,
            Callee = {Name, Arity},

            ParamInfo = funcinfo_get(Callee, parameter_info, ModInfo),
            Parameters = funcinfo_get(Callee, parameters, ModInfo),
            Parameter = nth(1 + arg_index(Ctx, Args), Parameters),

            case maps:find(Parameter, ParamInfo) of
                {ok, suitable_for_reuse} ->
                    suitable_for_reuse;
                {ok, Other} ->
                    {unsuitable_call, {Call, Other}};
                error ->
                    {no_match_on_entry, Call}
            end;
        UseCount > 1 ->
            {multiple_uses_in_call, Call}
    end;
ccc_1([#b_remote{}=Call | _Args], _Ctx, _CtxChain, _ModInfo) ->
    {remote_call, Call};
ccc_1([Fun | _Args], _Ctx, _CtxChain, _ModInfo) ->
    %% TODO: It may be possible to support this in the future for locally
    %% defined funs, including ones with free variables.
    {fun_call, Fun}.

%% Returns the index of Var in Args.
arg_index(Var, Args) -> arg_index_1(Var, Args, 0).

arg_index_1(Var, [Var | _Args], Index) -> Index;
arg_index_1(Var, [_Arg | Args], Index) -> arg_index_1(Var, Args, Index + 1).

is_tail_binary(#b_set{op=bs_match,args=[#b_literal{val=binary} | Rest]}) ->
    member(#b_literal{val=all}, Rest);
is_tail_binary(#b_set{op=bs_get_tail}) ->
    true;
is_tail_binary(_) ->
    false.

is_tail_binary(#b_var{}=Var, Defs) ->
    case find_match_definition(Var, Defs) of
        {ok, Def} -> is_tail_binary(Def);
        _ -> false
    end;
is_tail_binary(_Literal, _Defs) ->
    false.

assert_match_context(#b_var{}=Var, Defs) ->
    case maps:find(Var, Defs) of
        {ok, #b_set{op=bs_match,args=[_,#b_var{}=Ctx|_]}} ->
            assert_match_context(Ctx, Defs);
        {ok, #b_set{op=bs_start_match}} ->
            ok
    end.

find_match_definition(#b_var{}=Var, Defs) ->
    case maps:find(Var, Defs) of
        {ok, #b_set{op=bs_extract,args=[Ctx]}} -> maps:find(Ctx, Defs);
        {ok, #b_set{op=bs_get_tail}=Def} -> {ok, Def};
        _ -> error
    end.

%% Returns a list of all contexts that were used to extract Var.
context_chain_of(#b_var{}=Var, Defs) ->
    case maps:find(Var, Defs) of
        {ok, #b_set{op=bs_match,args=[_,#b_var{}=Ctx|_]}} ->
            [Ctx | context_chain_of(Ctx, Defs)];
        {ok, #b_set{op=bs_get_tail,args=[Ctx]}} ->
            [Ctx | context_chain_of(Ctx, Defs)];
        {ok, #b_set{op=bs_extract,args=[Ctx]}} ->
            [Ctx | context_chain_of(Ctx, Defs)];
        _ ->
            []
    end.

%% Grabs the match context used to produce the given variable.
match_context_of(#b_var{}=Var, Defs) ->
    Ctx = match_context_of_1(Var, Defs),
    assert_match_context(Ctx, Defs),
    Ctx.

match_context_of_1(Var, Defs) ->
    case maps:get(Var, Defs) of
        #b_set{op=bs_extract,args=[#b_var{}=Ctx0]} ->
            #b_set{op=bs_match,
                   args=[_,#b_var{}=Ctx|_]} = maps:get(Ctx0, Defs),
            Ctx;
        #b_set{op=bs_get_tail,args=[#b_var{}=Ctx]} ->
            Ctx
    end.

funcinfo_get(#b_function{}=F, Attribute, ModInfo) ->
    funcinfo_get(get_fa(F), Attribute, ModInfo);
funcinfo_get({_,_}=Key, Attribute, ModInfo) ->
    FuncInfo = maps:get(Key, ModInfo),
    maps:get(Attribute, FuncInfo).

funcinfo_set(#b_function{}=F, Attribute, Value, ModInfo) ->
    funcinfo_set(get_fa(F), Attribute, Value, ModInfo);
funcinfo_set(Key, Attribute, Value, ModInfo) ->
    FuncInfo = maps:put(Attribute, Value, maps:get(Key, ModInfo, #{})),
    maps:put(Key, FuncInfo, ModInfo).

get_fa(#b_function{ anno = Anno }) ->
    {_,Name,Arity} = maps:get(func_info, Anno),
    {Name,Arity}.

%% Replaces matched-out binaries with aliases that are lazily converted to
%% binary form when used, allowing us to keep the "match path" free of binary
%% creation.

-spec alias_matched_binaries(Blocks, Counter, AliasMap) -> Result when
      Blocks :: beam_ssa:block_map(),
      Counter :: non_neg_integer(),
      AliasMap :: match_alias_map(),
      Result :: {Blocks, Counter}.

-type match_alias_map() ::
        #{ Binary :: #b_var{} =>
                     { %% Replace all uses of Binary with an alias after this
                       %% label.
                       AliasAfter :: beam_ssa:label(),
                       %% The match context whose tail is equal to Binary.
                       Context :: #b_var{} } }.

%% Keeps track of the promotions we need to insert. They're partially keyed by
%% location because they may not be valid on all execution paths and we may
%% need to add redundant promotions in some cases.
-type promotion_map() ::
        #{ { PromoteAt :: beam_ssa:label(),
             Variable :: #b_var{} } =>
               Instruction :: #b_set{} }.

-record(amb, { dominators :: beam_ssa:dominator_map(),
               match_aliases :: match_alias_map(),
               cnt :: non_neg_integer(),
               promotions = #{} :: promotion_map() }).

alias_matched_binaries(Blocks0, Counter, AliasMap) when AliasMap =/= #{} ->
    {Dominators, _} = beam_ssa:dominators(Blocks0),
    State0 = #amb{ dominators = Dominators,
                   match_aliases = AliasMap,
                   cnt = Counter },
    {Blocks, State} = beam_ssa:mapfold_blocks_rpo(fun amb_1/3, [0], State0,
                                                  Blocks0),
    {amb_insert_promotions(Blocks, State), State#amb.cnt};
alias_matched_binaries(Blocks, Counter, _AliasMap) ->
    {Blocks, Counter}.

amb_1(Lbl, #b_blk{is=Is0,last=Last0}=Block, State0) ->
    {Is, State1} = mapfoldl(fun(I, State) ->
                                    amb_assign_set(I, Lbl, State)
                            end, State0, Is0),
    {Last, State} = amb_assign_last(Last0, Lbl, State1),
    {Block#b_blk{is=Is,last=Last}, State}.

amb_assign_set(#b_set{op=phi,args=Args0}=I, _Lbl, State0) ->
    %% Phi node aliases are relative to their source block, not their
    %% containing block.
    {Args, State} =
        mapfoldl(fun({Arg0, Lbl}, Acc) ->
                         {Arg, State} = amb_get_alias(Arg0, Lbl, Acc),
                         {{Arg, Lbl}, State}
                 end, State0, Args0),
    {I#b_set{args=Args}, State};
amb_assign_set(#b_set{args=Args0}=I, Lbl, State0) ->
    {Args, State} = mapfoldl(fun(Arg0, Acc) ->
                                     amb_get_alias(Arg0, Lbl, Acc)
                             end, State0, Args0),
    {I#b_set{args=Args}, State}.

amb_assign_last(#b_ret{arg=Arg0}=T, Lbl, State0) ->
    {Arg, State} = amb_get_alias(Arg0, Lbl, State0),
    {T#b_ret{arg=Arg}, State};
amb_assign_last(#b_switch{arg=Arg0}=T, Lbl, State0) ->
    {Arg, State} = amb_get_alias(Arg0, Lbl, State0),
    {T#b_switch{arg=Arg}, State};
amb_assign_last(#b_br{bool=Arg0}=T, Lbl, State0) ->
    {Arg, State} = amb_get_alias(Arg0, Lbl, State0),
    {T#b_br{bool=Arg}, State}.

amb_get_alias(#b_var{}=Arg, Lbl, State) ->
    case maps:find(Arg, State#amb.match_aliases) of
        {ok, {AliasAfter, Context}} ->
            %% Our context may not have been created yet, so we skip assigning
            %% an alias unless the given block is among our dominators.
            Dominators = maps:get(Lbl, State#amb.dominators),
            case member(AliasAfter, Dominators) of
                true -> amb_create_alias(Arg, Context, Lbl, State);
                false -> {Arg, State}
            end;
        error ->
            {Arg, State}
    end;
amb_get_alias(#b_remote{mod=Mod0,name=Name0}=Arg0, Lbl, State0) ->
    {Mod, State1} = amb_get_alias(Mod0, Lbl, State0),
    {Name, State} = amb_get_alias(Name0, Lbl, State1),
    Arg = Arg0#b_remote{mod=Mod,name=Name},
    {Arg, State};
amb_get_alias(Arg, _Lbl, State) ->
    {Arg, State}.

amb_create_alias(#b_var{}=Arg0, Context, Lbl, State0) ->
    Dominators = maps:get(Lbl, State0#amb.dominators),
    Promotions0 = State0#amb.promotions,

    PrevPromotions =
        [maps:get({Dom, Arg0}, Promotions0)
         || Dom <- Dominators, is_map_key({Dom, Arg0}, Promotions0)],

    case PrevPromotions of
        [_|_] ->
            %% We've already created an alias prior to this block, so we'll
            %% grab the most recent one to minimize stack use.

            #b_set{dst=Alias} = max(PrevPromotions),
            {Alias, State0};
        [] ->
            %% If we haven't created an alias we need to do so now. The
            %% promotion will be inserted later by amb_insert_promotions/2.

            Counter = State0#amb.cnt,
            Alias = #b_var{name={'@ssa_bsm_alias', Counter}},
            Promotion = #b_set{op=bs_get_tail,dst=Alias,args=[Context]},

            Promotions = maps:put({Lbl, Arg0}, Promotion, Promotions0),
            State = State0#amb{ promotions=Promotions, cnt=Counter+1 },

            {Alias, State}
    end.

amb_insert_promotions(Blocks0, State) ->
    F = fun({Lbl, #b_var{}}, Promotion, Blocks) ->
                Block = maps:get(Lbl, Blocks),

                Alias = Promotion#b_set.dst,
                {Before, After} = splitwith(
                                    fun(#b_set{args=Args}) ->
                                            not is_var_in_args(Alias, Args)
                                    end, Block#b_blk.is),
                Is = Before ++ [Promotion | After],

                maps:put(Lbl, Block#b_blk{is=Is}, Blocks)
        end,
    maps:fold(F, Blocks0, State#amb.promotions).

is_var_in_args(Var, [Var | _]) -> true;
is_var_in_args(Var, [#b_remote{name=Var} | _]) -> true;
is_var_in_args(Var, [#b_remote{mod=Var} | _]) -> true;
is_var_in_args(Var, [_ | Args]) -> is_var_in_args(Var, Args);
is_var_in_args(_Var, []) -> false.

%%%
%%% Subpasses
%%%

%% Removes superflous chained bs_start_match instructions in the same
%% function. When matching on an extracted tail binary, or on a binary we've
%% already matched on, we reuse the original match context.
%%
%% This pass runs first since it makes subsequent optimizations more effective
%% by removing spots where promotion would be required.

-type prior_match_map() ::
        #{ Binary :: #b_var{} =>
                     [{ %% The context and success label of a previous
                        %% bs_start_match made on this binary.
                        ValidAfter :: beam_ssa:label(),
                        Context :: #b_var{} }] }.

-record(cm, { definitions :: beam_ssa:definition_map(),
              dominators :: beam_ssa:dominator_map(),
              blocks :: beam_ssa:block_map(),
              match_aliases = #{} :: match_alias_map(),
              prior_matches = #{} :: prior_match_map(),
              renames = #{} :: beam_ssa:rename_map() }).

combine_matches({Fs0, ModInfo}) ->
    Fs = map(fun(F) -> combine_matches(F, ModInfo) end, Fs0),
    {Fs, ModInfo}.

combine_matches(#b_function{bs=Blocks0,cnt=Counter0}=F, ModInfo) ->
    case funcinfo_get(F, has_bsm_ops, ModInfo) of
        true ->
            {Dominators, _} = beam_ssa:dominators(Blocks0),
            {Blocks1, State} =
                beam_ssa:mapfold_blocks_rpo(
                  fun(Lbl, #b_blk{is=Is0}=Block0, State0) ->
                          {Is, State} = cm_1(Is0, [], Lbl, State0),
                          {Block0#b_blk{is=Is}, State}
                  end, [0],
                  #cm{ definitions = beam_ssa:definitions(Blocks0),
                       dominators = Dominators,
                       blocks = Blocks0 },
                  Blocks0),

            Blocks2 = beam_ssa:rename_vars(State#cm.renames, [0], Blocks1),

            {Blocks, Counter} = alias_matched_binaries(Blocks2, Counter0,
                                                       State#cm.match_aliases),

            F#b_function{ bs=Blocks, cnt=Counter };
        false ->
            F
    end.

cm_1([#b_set{ op=bs_start_match,
              dst=Ctx,
              args=[Src] },
      #b_set{ op=succeeded,
              dst=Bool,
              args=[Ctx] }]=MatchSeq, Acc0, Lbl, State0) ->
    Acc = reverse(Acc0),
    case is_tail_binary(Src, State0#cm.definitions) of
        true -> cm_combine_tail(Src, Ctx, Bool, Acc, State0);
        false -> cm_handle_priors(Src, Ctx, Bool, Acc, MatchSeq, Lbl, State0)
    end;
cm_1([I | Is], Acc, Lbl, State) ->
    cm_1(Is, [I | Acc], Lbl, State);
cm_1([], Acc, _Lbl, State) ->
    {reverse(Acc), State}.

%% If we're dominated by at least one match on the same source, we can reuse
%% the context created by that match.
cm_handle_priors(Src, DstCtx, Bool, Acc, MatchSeq, Lbl, State0) ->
    PriorCtxs = case maps:find(Src, State0#cm.prior_matches) of
                    {ok, Priors} ->
                        %% We've seen other match contexts on this source, but
                        %% we can only consider the ones whose success path
                        %% dominate us.
                        Dominators = maps:get(Lbl, State0#cm.dominators, []),
                        [Ctx || {ValidAfter, Ctx} <- Priors,
                                member(ValidAfter, Dominators)];
                    error ->
                        []
                end,
    case PriorCtxs of
        [Ctx|_] ->
            Renames0 = State0#cm.renames,
            Renames = Renames0#{ Bool => #b_literal{val=true}, DstCtx => Ctx },
            {Acc, State0#cm{ renames = Renames }};
        [] ->
            %% Since we lack a prior match, we need to register this one in
            %% case we dominate another.
            State = cm_register_prior(Src, DstCtx, Lbl, State0),
            {Acc ++ MatchSeq, State}
    end.

cm_register_prior(Src, DstCtx, Lbl, State) ->
    Block = maps:get(Lbl, State#cm.blocks),
    #b_br{succ=ValidAfter} = Block#b_blk.last,

    Priors0 = maps:get(Src, State#cm.prior_matches, []),
    Priors = [{ValidAfter, DstCtx} | Priors0],

    PriorMatches = maps:put(Src, Priors, State#cm.prior_matches),
    State#cm{ prior_matches = PriorMatches }.

cm_combine_tail(Src, DstCtx, Bool, Acc, State0) ->
    SrcCtx = match_context_of(Src, State0#cm.definitions),

    %% We replace the source with a context alias as it normally won't be used
    %% on the happy path after being matched, and the added cost of conversion
    %% is negligible if it is.
    Aliases = maps:put(Src, {0, SrcCtx}, State0#cm.match_aliases),

    Renames0 = State0#cm.renames,
    Renames = Renames0#{ Bool => #b_literal{val=true}, DstCtx => SrcCtx },

    State = State0#cm{ match_aliases = Aliases, renames = Renames },

    {Acc, State}.

%% Lets functions accept match contexts as arguments. The parameter must be
%% unused before the bs_start_match instruction, and it must be matched in the
%% first block.

-record(aca, { unused_parameters :: ordsets:ordset(#b_var{}),
               counter :: non_neg_integer(),
               parameter_info = #{} :: #{ #b_var{} => param_info() },
               match_aliases = #{} :: match_alias_map() }).

accept_context_args({Fs, ModInfo}) ->
    mapfoldl(fun accept_context_args/2, ModInfo, Fs).

accept_context_args(#b_function{bs=Blocks0}=F, ModInfo0) ->
    case funcinfo_get(F, has_bsm_ops, ModInfo0) of
        true ->
            Parameters = ordsets:from_list(funcinfo_get(F, parameters, ModInfo0)),
            State0 = #aca{ unused_parameters = Parameters,
                           counter = F#b_function.cnt },

            {Blocks1, State} = aca_1(Blocks0, State0),
            {Blocks, Counter} = alias_matched_binaries(Blocks1,
                                                       State#aca.counter,
                                                       State#aca.match_aliases),

            ModInfo = funcinfo_set(F, parameter_info, State#aca.parameter_info,
                                   ModInfo0),

            {F#b_function{bs=Blocks,cnt=Counter}, ModInfo};
        false ->
            {F, ModInfo0}
    end.

aca_1(Blocks, State) ->
    %% We only handle block 0 as we don't yet support starting a match after a
    %% test. This is generally good enough as the sys_core_bsm pass makes the
    %% match instruction come first if possible, and it's rare for a function
    %% to binary-match several parameters at once.
    EntryBlock = maps:get(0, Blocks),
    aca_enable_reuse(EntryBlock#b_blk.is, EntryBlock, Blocks, [], State).

aca_enable_reuse([#b_set{op=bs_start_match,args=[Src]}=I0 | Rest],
                 EntryBlock, Blocks0, Acc, State0) ->
    case aca_is_reuse_safe(Src, State0) of
        true ->
            {I, Last, Blocks1, State} =
                aca_reuse_context(I0, EntryBlock, Blocks0, State0),

            Is = reverse([I|Acc]) ++ Rest,
            Blocks = maps:put(0, EntryBlock#b_blk{is=Is,last=Last}, Blocks1),

            {Blocks, State};
        false ->
            {Blocks0, State0}
    end;
aca_enable_reuse([I | Is], EntryBlock, Blocks, Acc, State0) ->
    UnusedParams0 = State0#aca.unused_parameters,
    case ordsets:intersection(UnusedParams0, beam_ssa:used(I)) of
        [] ->
            aca_enable_reuse(Is, EntryBlock, Blocks, [I | Acc], State0);
        PrematureUses ->
            UnusedParams = ordsets:subtract(UnusedParams0, PrematureUses),

            %% Mark the offending parameters as unsuitable for context reuse.
            ParamInfo = foldl(fun(A, Ps) ->
                                      maps:put(A, {used_before_match, I}, Ps)
                              end, State0#aca.parameter_info, PrematureUses),

            State = State0#aca{ unused_parameters = UnusedParams,
                                parameter_info = ParamInfo },
            aca_enable_reuse(Is, EntryBlock, Blocks, [I | Acc], State)
    end;
aca_enable_reuse([], _EntryBlock, Blocks, _Acc, State) ->
    {Blocks, State}.

aca_is_reuse_safe(Src, State) ->
    %% Context reuse is unsafe unless all uses are dominated by the start_match
    %% instruction. Since we only process block 0 it's enough to check if
    %% they're unused so far.
    ordsets:is_element(Src, State#aca.unused_parameters).

aca_reuse_context(#b_set{dst=Dst, args=[Src]}=I0, Block, Blocks0, State0) ->
    %% When matching fails on a reused context it needs to be converted back
    %% to a binary. We only need to do this on the success path since it can't
    %% be a context on the type failure path, but it's very common for these
    %% to converge which requires special handling.
    {State1, Last, Blocks} =
        aca_handle_convergence(Src, State0, Block#b_blk.last, Blocks0),

    Aliases = maps:put(Src, {Last#b_br.succ, Dst}, State1#aca.match_aliases),
    ParamInfo = maps:put(Src, suitable_for_reuse, State1#aca.parameter_info),

    State = State1#aca{ match_aliases = Aliases,
                        parameter_info = ParamInfo },

    I = beam_ssa:add_anno(accepts_match_contexts, true, I0),

    {I, Last, Blocks, State}.

aca_handle_convergence(Src, State0, Last0, Blocks0) ->
    #b_br{fail=Fail0,succ=Succ0} = Last0,

    SuccPath = beam_ssa:rpo([Succ0], Blocks0),
    FailPath = beam_ssa:rpo([Fail0], Blocks0),

    %% The promotion logic in alias_matched_binaries breaks down if the source
    %% is used after the fail/success paths converge, as we have no way to tell
    %% whether the source is a match context or something else past that point.
    %%
    %% We could handle this through clever insertion of phi nodes but it's
    %% far simpler to copy either branch in its entirety. It doesn't matter
    %% which one as long as they become disjoint.
    ConvergedPaths = ordsets:intersection(
                       ordsets:from_list(SuccPath),
                       ordsets:from_list(FailPath)),

    case maps:is_key(Src, beam_ssa:uses(ConvergedPaths, Blocks0)) of
        true ->
            case shortest(SuccPath, FailPath) of
                left ->
                    {Succ, Blocks, Counter} =
                        aca_copy_successors(Succ0, Blocks0, State0#aca.counter),
                    State = State0#aca{ counter = Counter },
                    {State, Last0#b_br{succ=Succ}, Blocks};
                right ->
                    {Fail, Blocks, Counter} =
                        aca_copy_successors(Fail0, Blocks0, State0#aca.counter),
                    State = State0#aca{ counter = Counter },
                    {State, Last0#b_br{fail=Fail}, Blocks}
            end;
        false ->
            {State0, Last0, Blocks0}
    end.

shortest([_|As], [_|Bs]) -> shortest(As, Bs);
shortest([], _) -> left;
shortest(_, []) -> right.

%% Copies all successor blocks of Lbl, returning the label to the entry block
%% of this copy. Since the copied blocks aren't referenced anywhere else, they
%% are all guaranteed to be dominated by Lbl.
aca_copy_successors(Lbl0, Blocks0, Counter0) ->
    %% Building the block rename map up front greatly simplifies phi node
    %% handling.
    Path = beam_ssa:rpo([Lbl0], Blocks0),
    {BRs, Counter1} = aca_cs_build_brs(Path, Counter0, #{}),
    {Blocks, Counter} = aca_cs_1(Path, Blocks0, Counter1, #{}, BRs, #{}),
    Lbl = maps:get(Lbl0, BRs),
    {Lbl, Blocks, Counter}.

aca_cs_build_brs([?BADARG_BLOCK=Lbl | Path], Counter, Acc) ->
    %% ?BADARG_BLOCK is a marker and not an actual block, so renaming it will
    %% break exception handling.
    aca_cs_build_brs(Path, Counter, Acc#{ Lbl => Lbl });
aca_cs_build_brs([Lbl | Path], Counter0, Acc) ->
    aca_cs_build_brs(Path, Counter0 + 1, Acc#{ Lbl => Counter0 });
aca_cs_build_brs([], Counter, Acc) ->
    {Acc, Counter}.

aca_cs_1([Lbl0 | Path], Blocks, Counter0, VRs0, BRs, Acc0) ->
    Block0 = maps:get(Lbl0, Blocks),
    Lbl = maps:get(Lbl0, BRs),
    {VRs, Block, Counter} = aca_cs_block(Block0, Counter0, VRs0, BRs),
    Acc = maps:put(Lbl, Block, Acc0),
    aca_cs_1(Path, Blocks, Counter, VRs, BRs, Acc);
aca_cs_1([], Blocks, Counter, _VRs, _BRs, Acc) ->
    {maps:merge(Blocks, Acc), Counter}.

aca_cs_block(#b_blk{is=Is0,last=Last0}=Block0, Counter0, VRs0, BRs) ->
    {VRs, Is, Counter} = aca_cs_is(Is0, Counter0, VRs0, BRs, []),
    Last = aca_cs_last(Last0, VRs, BRs),
    Block = Block0#b_blk{is=Is,last=Last},
    {VRs, Block, Counter}.

aca_cs_is([#b_set{op=Op,
                  dst=Dst0,
                  args=Args0}=I0 | Is],
          Counter0, VRs0, BRs, Acc) ->
    Args = case Op of
               phi -> aca_cs_args_phi(Args0, VRs0, BRs);
               _ -> aca_cs_args(Args0, VRs0)
           end,
    Counter = Counter0 + 1,
    Dst = #b_var{name={'@ssa_bsm_aca',Counter}},
    I = I0#b_set{dst=Dst,args=Args},
    VRs = maps:put(Dst0, Dst, VRs0),
    aca_cs_is(Is, Counter, VRs, BRs, [I | Acc]);
aca_cs_is([], Counter, VRs, _BRs, Acc) ->
    {VRs, reverse(Acc), Counter}.

aca_cs_last(#b_switch{arg=Arg0,list=Switch0,fail=Fail0}=Sw, VRs, BRs) ->
    Switch = [{Literal, maps:get(Lbl, BRs)} || {Literal, Lbl} <- Switch0],
    Sw#b_switch{arg=aca_cs_arg(Arg0, VRs),
                fail=maps:get(Fail0, BRs),
                list=Switch};
aca_cs_last(#b_br{bool=Arg0,succ=Succ0,fail=Fail0}=Br, VRs, BRs) ->
    Br#b_br{bool=aca_cs_arg(Arg0, VRs),
            succ=maps:get(Succ0, BRs),
            fail=maps:get(Fail0, BRs)};
aca_cs_last(#b_ret{arg=Arg0}=Ret, VRs, _BRs) ->
    Ret#b_ret{arg=aca_cs_arg(Arg0, VRs)}.

aca_cs_args_phi([{Arg, Lbl} | Args], VRs, BRs) ->
    case BRs of
        #{ Lbl := New } ->
            [{aca_cs_arg(Arg, VRs), New} | aca_cs_args_phi(Args, VRs, BRs)];
        #{} ->
            aca_cs_args_phi(Args, VRs, BRs)
    end;
aca_cs_args_phi([], _VRs, _BRs) ->
    [].

aca_cs_args([Arg | Args], VRs) ->
    [aca_cs_arg(Arg, VRs) | aca_cs_args(Args, VRs)];
aca_cs_args([], _VRs) ->
    [].

aca_cs_arg(#b_remote{mod=Mod0,name=Name0}=Rem, VRs) ->
    Mod = aca_cs_arg(Mod0, VRs),
    Name = aca_cs_arg(Name0, VRs),
    Rem#b_remote{mod=Mod,name=Name};
aca_cs_arg(Arg, VRs) ->
    case VRs of
        #{ Arg := New } -> New;
        #{} -> Arg
    end.

%% Allows contexts to pass through "wrapper functions" where the context is
%% passed directly to a function that accepts match contexts (including other
%% wrappers).
%%
%% This does not alter the function in any way, it only changes parameter info
%% so that skip_outgoing_tail_extraction is aware that it's safe to pass
%% contexts to us.

allow_context_passthrough({Fs, ModInfo0}) ->
    ModInfo =
        acp_forward_params([{F, beam_ssa:uses(F#b_function.bs)} || F <- Fs],
                           ModInfo0),
    {Fs, ModInfo}.

acp_forward_params(FsUses, ModInfo0) ->
    F = fun({#b_function{args=Parameters}=Func, UseMap}, ModInfo) ->
                ParamInfo =
                    foldl(fun(Param, ParamInfo) ->
                                  Uses = maps:get(Param, UseMap, []),
                                  acp_1(Param, Uses, ModInfo, ParamInfo)
                          end,
                          funcinfo_get(Func, parameter_info, ModInfo),
                          Parameters),
                funcinfo_set(Func, parameter_info, ParamInfo, ModInfo)
        end,
    %% Allowing context passthrough on one function may make it possible to
    %% enable it on another, so it needs to be repeated for maximum effect.
    case foldl(F, ModInfo0, FsUses) of
        ModInfo0 -> ModInfo0;
        Changed -> acp_forward_params(FsUses, Changed)
    end.

%% We have no way to know if an argument is a context, so it's only safe to
%% forward them if they're passed exactly once in the first block. Any other
%% uses are unsafe, including function_clause errors.
acp_1(Param, [{0, #b_set{op=call}=I}], ModInfo, ParamInfo) ->
    %% We don't need to provide a context chain as our callers make sure that
    %% multiple arguments never reference the same context.
    case check_context_call(I, Param, [], ModInfo) of
        {no_match_on_entry, _} -> ParamInfo;
        Other -> maps:put(Param, Other, ParamInfo)
    end;
acp_1(_Param, _Uses, _ModInfo, ParamInfo) ->
    ParamInfo.

%% This is conceptually similar to combine_matches but operates across
%% functions. Whenever a tail binary is passed to a parameter that accepts
%% match contexts we'll pass the context instead, improving performance by
%% avoiding the creation of a new match context in the callee.
%%
%% We also create an alias to delay extraction until it's needed as an actual
%% binary, which is often rare on the happy path. The cost of being wrong is
%% negligible (`bs_test_unit + bs_get_tail` vs `bs_get_binary`) so we're
%% applying it unconditionally to keep things simple.

-record(sote, { definitions :: beam_ssa:definition_map(),
                mod_info :: module_info(),
                match_aliases = #{} :: match_alias_map() }).

skip_outgoing_tail_extraction({Fs0, ModInfo}) ->
    Fs = map(fun(F) -> skip_outgoing_tail_extraction(F, ModInfo) end, Fs0),
    {Fs, ModInfo}.

skip_outgoing_tail_extraction(#b_function{bs=Blocks0}=F, ModInfo) ->
    case funcinfo_get(F, has_bsm_ops, ModInfo) of
        true ->
            State0 = #sote{ definitions = beam_ssa:definitions(Blocks0),
                            mod_info = ModInfo },

            {Blocks1, State} = beam_ssa:mapfold_instrs_rpo(
                                 fun sote_rewrite_calls/2, [0], State0, Blocks0),

            {Blocks, Counter} = alias_matched_binaries(Blocks1,
                                                       F#b_function.cnt,
                                                       State#sote.match_aliases),

            F#b_function{bs=Blocks,cnt=Counter};
        false ->
            F
    end.

sote_rewrite_calls(#b_set{op=call,args=Args}=Call, State) ->
    sote_rewrite_call(Call, Args, [], State);
sote_rewrite_calls(I, State) ->
    {I, State}.

sote_rewrite_call(Call, [], ArgsOut, State) ->
    {Call#b_set{args=reverse(ArgsOut)}, State};
sote_rewrite_call(Call0, [Arg | ArgsIn], ArgsOut, State0) ->
    case is_tail_binary(Arg, State0#sote.definitions) of
        true ->
            CtxChain = context_chain_of(Arg, State0#sote.definitions),
            case check_context_call(Call0, Arg, CtxChain, State0#sote.mod_info) of
                suitable_for_reuse ->
                    Ctx = match_context_of(Arg, State0#sote.definitions),

                    MatchAliases0 = State0#sote.match_aliases,
                    MatchAliases = maps:put(Arg, {0, Ctx}, MatchAliases0),
                    State = State0#sote{ match_aliases = MatchAliases },

                    Call = beam_ssa:add_anno(bsm_info, context_reused, Call0),
                    sote_rewrite_call(Call, ArgsIn, [Ctx | ArgsOut], State);
                Other ->
                    Call = beam_ssa:add_anno(bsm_info, Other, Call0),
                    sote_rewrite_call(Call, ArgsIn, [Arg | ArgsOut], State0)
            end;
        false ->
            sote_rewrite_call(Call0, ArgsIn, [Arg | ArgsOut], State0)
    end.

%% Adds parameter_type_info annotations to help the validator determine whether
%% our optimizations were safe.

annotate_context_parameters({Fs, ModInfo}) ->
    mapfoldl(fun annotate_context_parameters/2, ModInfo, Fs).

annotate_context_parameters(F, ModInfo) ->
    ParamInfo = funcinfo_get(F, parameter_info, ModInfo),
    TypeAnno0 = beam_ssa:get_anno(parameter_type_info, F, #{}),
    TypeAnno = maps:fold(fun(K, _V, Acc) when is_map_key(K, Acc) ->
                                 %% Assertion.
                                 error(conflicting_parameter_types);
                            (K, suitable_for_reuse, Acc) ->
                                 T = beam_validator:type_anno(match_context),
                                 Acc#{ K => T };
                            (_K, _V, Acc) ->
                                 Acc
                         end, TypeAnno0, ParamInfo),
    {beam_ssa:add_anno(parameter_type_info, TypeAnno, F), ModInfo}.

%%%
%%% +bin_opt_info
%%%

collect_opt_info(Fs) ->
    foldl(fun(#b_function{bs=Blocks}=F, Acc0) ->
                  UseMap = beam_ssa:uses(Blocks),
                  Where = beam_ssa:get_anno(location, F, []),
                  beam_ssa:fold_instrs_rpo(
                    fun(I, Acc) ->
                            collect_opt_info_1(I, Where, UseMap, Acc)
                    end, [0], Acc0, Blocks)
          end, [], Fs).

collect_opt_info_1(#b_set{op=Op,anno=Anno,dst=Dst}=I, Where, UseMap, Acc0) ->
    case is_tail_binary(I) of
        true when Op =:= bs_match ->
            %% The uses include when the context is passed raw, so we discard
            %% everything but the bs_extract instruction to limit warnings to
            %% unoptimized uses.
            Uses0 = maps:get(Dst, UseMap, []),
            case [E || {_, #b_set{op=bs_extract}=E} <- Uses0] of
                [Use] -> add_unopt_binary_info(Use, false, Where, UseMap, Acc0);
                [] -> Acc0
            end;
        true ->
            %% Add a warning for each use. Note that we don't do anything
            %% special if unused as a later pass will remove this instruction
            %% anyway.
            Uses = maps:get(Dst, UseMap, []),
            foldl(fun({_Lbl, Use}, Acc) ->
                          add_unopt_binary_info(Use, false, Where, UseMap, Acc)
                  end, Acc0, Uses);
        false ->
            add_opt_info(Anno, Where, Acc0)
    end;
collect_opt_info_1(#b_ret{anno=Anno}, Where, _UseMap, Acc) ->
    add_opt_info(Anno, Where, Acc);
collect_opt_info_1(_I, _Where, _Uses, Acc) ->
    Acc.

add_opt_info(Anno, Where, Acc) ->
    case maps:find(bsm_info, Anno) of
        {ok, Term} -> [make_warning(Term, Anno, Where) | Acc];
        error -> Acc
    end.

%% When an alias is promoted we need to figure out where it goes to ignore
%% warnings for compiler-generated things, and provide more useful warnings in
%% general.
%%
%% We track whether the binary has been used to build another term because it
%% can be helpful when there's no line information.

add_unopt_binary_info(#b_set{op=Follow,dst=Dst}, _Nested, Where, UseMap, Acc0)
  when Follow =:= put_tuple;
       Follow =:= put_list;
       Follow =:= put_map ->
    %% Term-building instructions.
    {_, Uses} = unzip(maps:get(Dst, UseMap, [])),
    foldl(fun(Use, Acc) ->
                  add_unopt_binary_info(Use, true, Where, UseMap, Acc)
          end, Acc0, Uses);
add_unopt_binary_info(#b_set{op=Follow,dst=Dst}, Nested, Where, UseMap, Acc0)
  when Follow =:= bs_extract;
       Follow =:= phi ->
    %% Non-building instructions that need to be followed.
    {_, Uses} = unzip(maps:get(Dst, UseMap, [])),
    foldl(fun(Use, Acc) ->
                  add_unopt_binary_info(Use, Nested, Where, UseMap, Acc)
          end, Acc0, Uses);
add_unopt_binary_info(#b_set{op=call,
                             args=[#b_remote{mod=#b_literal{val=erlang},
                                             name=#b_literal{val=error}} |
                                   _Ignored]},
                      _Nested, _Where, _UseMap, Acc) ->
    %% There's no nice way to tell compiler-generated exceptions apart from
    %% user ones so we ignore them all. I doubt anyone cares.
    Acc;
add_unopt_binary_info(#b_switch{anno=Anno}=I, Nested, Where, _UseMap, Acc) ->
    [make_promotion_warning(I, Nested, Anno, Where) | Acc];
add_unopt_binary_info(#b_set{anno=Anno}=I, Nested, Where, _UseMap, Acc) ->
    [make_promotion_warning(I, Nested, Anno, Where) | Acc];
add_unopt_binary_info(#b_ret{anno=Anno}=I, Nested, Where, _UseMap, Acc) ->
    [make_promotion_warning(I, Nested, Anno, Where) | Acc];
add_unopt_binary_info(#b_br{anno=Anno}=I, Nested, Where, _UseMap, Acc) ->
    [make_promotion_warning(I, Nested, Anno, Where) | Acc].

make_promotion_warning(I, Nested, Anno, Where) ->
    make_warning({binary_created, I, Nested}, Anno, Where).

make_warning(Term, Anno, Where) ->
    {File, Line} = maps:get(location, Anno, Where),
    {File,[{Line,?MODULE,Term}]}.

format_opt_info(context_reused) ->
    "OPTIMIZED: match context reused";
format_opt_info({binary_created, _, _}=Promotion) ->
    io_lib:format("BINARY CREATED: ~s", [format_opt_info_1(Promotion)]);
format_opt_info(Other) ->
    io_lib:format("NOT OPTIMIZED: ~s", [format_opt_info_1(Other)]).

format_opt_info_1({binary_created, #b_set{op=call,args=[Call|_]}, false}) ->
    io_lib:format("binary is used in call to ~s which doesn't support "
                  "context reuse", [format_call(Call)]);
format_opt_info_1({binary_created, #b_set{op=call,args=[Call|_]}, true}) ->
    io_lib:format("binary is used in term passed to ~s",
                  [format_call(Call)]);
format_opt_info_1({binary_created, #b_set{op={bif, BIF},args=Args}, false}) ->
    io_lib:format("binary is used in ~p/~p which doesn't support context "
                  "reuse", [BIF, length(Args)]);
format_opt_info_1({binary_created, #b_set{op={bif, BIF},args=Args}, true}) ->
    io_lib:format("binary is used in term passed to ~p/~p",
                  [BIF, length(Args)]);
format_opt_info_1({binary_created, #b_set{op=Op}, false}) ->
    io_lib:format("binary is used in '~p' which doesn't support context "
                  "reuse", [Op]);
format_opt_info_1({binary_created, #b_set{op=Op}, true}) ->
    io_lib:format("binary is used in term passed to '~p'", [Op]);
format_opt_info_1({binary_created, #b_ret{}, false}) ->
    io_lib:format("binary is returned from the function", []);
format_opt_info_1({binary_created, #b_ret{}, true}) ->
    io_lib:format("binary is used in a term that is returned from the "
                  "function", []);
format_opt_info_1({unsuitable_call, {Call, Inner}}) ->
    io_lib:format("binary used in call to ~s, where ~s",
                  [format_call(Call), format_opt_info_1(Inner)]);
format_opt_info_1({remote_call, Call}) ->
    io_lib:format("binary is used in remote call to ~s", [format_call(Call)]);
format_opt_info_1({fun_call, Call}) ->
    io_lib:format("binary is used in fun call (~s)",
                  [format_call(Call)]);
format_opt_info_1({multiple_uses_in_call, Call}) ->
    io_lib:format("binary is passed as multiple arguments to ~s",
                  [format_call(Call)]);
format_opt_info_1({no_match_on_entry, Call}) ->
    io_lib:format("binary is used in call to ~s which does not begin with a "
                  "suitable binary match", [format_call(Call)]);
format_opt_info_1({used_before_match, #b_set{op=call,args=[Call|_]}}) ->
    io_lib:format("binary is used in call to ~s before being matched",
                  [format_call(Call)]);
format_opt_info_1({used_before_match, #b_set{op={bif, BIF},args=Args}}) ->
    io_lib:format("binary is used in ~p/~p before being matched",
                  [BIF, length(Args)]);
format_opt_info_1({used_before_match, #b_set{op=phi}}) ->
    io_lib:format("binary is returned from an expression before being "
                  "matched", []);
format_opt_info_1({used_before_match, #b_set{op=Op}}) ->
    io_lib:format("binary is used in '~p' before being matched",[Op]);
format_opt_info_1(Term) ->
    io_lib:format("~w", [Term]).

format_call(#b_local{name=#b_literal{val=F},arity=A}) ->
    io_lib:format("~p/~p", [F, A]);
format_call(#b_remote{mod=#b_literal{val=M},name=#b_literal{val=F},arity=A}) ->
    io_lib:format("~p:~p/~p", [M, F, A]);
format_call(Fun) ->
    io_lib:format("~p", [Fun]).