diff options
Diffstat (limited to 'lib/compiler/src/beam_ssa_codegen.erl')
-rw-r--r-- | lib/compiler/src/beam_ssa_codegen.erl | 2077 |
1 files changed, 2077 insertions, 0 deletions
diff --git a/lib/compiler/src/beam_ssa_codegen.erl b/lib/compiler/src/beam_ssa_codegen.erl new file mode 100644 index 0000000000..c2d5035b19 --- /dev/null +++ b/lib/compiler/src/beam_ssa_codegen.erl @@ -0,0 +1,2077 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2018. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +%% Purpose: Generate BEAM assembly code from the SSA format. + +-module(beam_ssa_codegen). + +-export([module/2]). +-export([classify_heap_need/2]). %Called from beam_ssa_pre_codegen. + +-export_type([ssa_register/0]). + +-include("beam_ssa.hrl"). + +-import(lists, [foldl/3,keymember/3,keysort/2,last/1,map/2,mapfoldl/3, + reverse/1,reverse/2,sort/1,splitwith/2,takewhile/2]). + +-record(cg, {lcount=1 :: beam_label(), %Label counter + functable=#{} :: #{fa()=>beam_label()}, + labels=#{} :: #{ssa_label()=>0|beam_label()}, + used_labels=gb_sets:empty() :: gb_sets:set(ssa_label()), + regs=#{} :: #{beam_ssa:var_name()=>ssa_register()}, + ultimate_fail=1 :: beam_label(), + catches=gb_sets:empty() :: gb_sets:set(ssa_label()) + }). + +-spec module(beam_ssa:b_module(), [compile:option()]) -> + {'ok',beam_asm:module_code()}. + +module(#b_module{name=Mod,exports=Es,attributes=Attrs,body=Fs}, _Opts) -> + {Asm,St} = functions(Fs, {atom,Mod}), + {ok,{Mod,Es,Attrs,Asm,St#cg.lcount}}. + +-record(need, {h=0 :: non_neg_integer(), + f=0 :: non_neg_integer()}). + +-record(cg_blk, {anno=#{} :: anno(), + is=[] :: [instruction()], + last :: terminator()}). + +-record(cg_set, {anno=#{} :: anno(), + dst :: b_var(), + op :: beam_ssa:op(), + args :: [beam_ssa:argument() | xreg()]}). + +-record(cg_alloc, {anno=#{} :: anno(), + stack=none :: 'none' | pos_integer(), + words=#need{} :: #need{}, + live :: 'undefined' | pos_integer(), + def_yregs=[] :: [yreg()] + }). + +-record(cg_br, {bool :: beam_ssa:value(), + succ :: ssa_label(), + fail :: ssa_label() + }). +-record(cg_ret, {arg :: cg_value(), + dealloc=none :: 'none' | pos_integer() + }). +-record(cg_switch, {arg :: cg_value(), + fail :: ssa_label(), + list :: [sw_list_item()] + }). + +-type fa() :: {beam_asm:function_name(),arity()}. +-type ssa_label() :: beam_ssa:label(). +-type beam_label() :: beam_asm:label(). + +-type anno() :: beam_ssa:anno(). + +-type b_var() :: beam_ssa:b_var(). +-type b_literal() :: beam_ssa:b_literal(). + +-type cg_value() :: beam_ssa:value() | xreg(). + +-type cg_set() :: #cg_set{}. +-type cg_alloc() :: #cg_alloc{}. + +-type instruction() :: cg_set() | cg_alloc(). + +-type cg_br() :: #cg_br{}. +-type cg_ret() :: #cg_ret{}. +-type cg_switch() :: #cg_switch{}. +-type terminator() :: cg_br() | cg_ret() | cg_switch(). + +-type sw_list_item() :: {b_literal(),ssa_label()}. + +-type reg_num() :: beam_asm:reg_num(). +-type xreg() :: {'x',reg_num()}. +-type yreg() :: {'y',reg_num()}. + +-type ssa_register() :: xreg() | yreg() | {'fr',reg_num()} | {'z',reg_num()}. + +functions(Forms, AtomMod) -> + mapfoldl(fun (F, St) -> function(F, AtomMod, St) end, + #cg{lcount=1}, Forms). + +function(#b_function{anno=Anno,bs=Blocks}, AtomMod, St0) -> + #{func_info:={_,Name,Arity}} = Anno, + try + assert_badarg_block(Blocks), %Assertion. + Regs = maps:get(registers, Anno), + St1 = St0#cg{labels=#{},used_labels=gb_sets:empty(), + regs=Regs}, + {Fi,St2} = new_label(St1), %FuncInfo label + {Entry,St3} = local_func_label(Name, Arity, St2), + {Ult,St4} = new_label(St3), %Ultimate failure + Labels = (St4#cg.labels)#{0=>Entry,?BADARG_BLOCK=>0}, + St5 = St4#cg{labels=Labels,used_labels=gb_sets:singleton(Entry), + ultimate_fail=Ult}, + {Body,St} = cg_fun(Blocks, St5), + Asm = [{label,Fi},line(Anno), + {func_info,AtomMod,{atom,Name},Arity}] ++ + add_parameter_annos(Body, Anno) ++ + [{label,Ult},if_end], + Func = {function,Name,Arity,Entry,Asm}, + {Func,St} + catch + Class:Error:Stack -> + io:fwrite("Function: ~w/~w\n", [Name,Arity]), + erlang:raise(Class, Error, Stack) + end. + +assert_badarg_block(Blocks) -> + %% Assertion: ?BADARG_BLOCK must be the call erlang:error(badarg). + case Blocks of + #{?BADARG_BLOCK:=Blk} -> + #b_blk{is=[#b_set{op=call,dst=Ret, + args=[#b_remote{mod=#b_literal{val=erlang}, + name=#b_literal{val=error}}, + #b_literal{val=badarg}]}], + last=#b_ret{arg=Ret}} = Blk, + ok; + #{} -> + %% ?BADARG_BLOCK has been removed because it was never used. + ok + end. + +add_parameter_annos([{label, _}=Entry | Body], Anno) -> + ParamInfo = maps:get(parameter_type_info, Anno, #{}), + Annos = maps:fold( + fun(K, V, Acc) when is_map_key(K, ParamInfo) -> + TypeInfo = maps:get(K, ParamInfo), + [{'%', {type_info, V, TypeInfo}} | Acc]; + (_K, _V, Acc) -> + Acc + end, [], maps:get(registers, Anno)), + [Entry | sort(Annos)] ++ Body. + +cg_fun(Blocks, St0) -> + Linear0 = linearize(Blocks), + St = collect_catch_labels(Linear0, St0), + Linear1 = need_heap(Linear0), + Linear2 = prefer_xregs(Linear1, St), + Linear3 = liveness(Linear2, St), + Linear4 = defined(Linear3, St), + Linear = opt_allocate(Linear4, St), + cg_linear(Linear, St). + +%% collect_catch_labels(Linear, St) -> St. +%% Collect all catch labels (labels for blocks that begin +%% with 'landingpad' instructions) for later use. + +collect_catch_labels(Linear, St) -> + Labels = collect_catch_labels_1(Linear), + St#cg{catches=gb_sets:from_list(Labels)}. + +collect_catch_labels_1([{L,#cg_blk{is=[#cg_set{op=landingpad}|_]}}|Bs]) -> + [L|collect_catch_labels_1(Bs)]; +collect_catch_labels_1([_|Bs]) -> + collect_catch_labels_1(Bs); +collect_catch_labels_1([]) -> []. + +%% need_heap([{BlockLabel,Block]) -> [{BlockLabel,Block}]. +%% Insert need_heap instructions in the instruction list. Try to be smart and +%% collect them together as much as possible. + +need_heap(Bs0) -> + Bs1 = need_heap_allocs(Bs0, #{}), + {Bs,#need{h=0,f=0}} = need_heap_blks(reverse(Bs1), #need{}, []), + Bs. + +need_heap_allocs([{L,#cg_blk{is=Is0,last=Terminator}=Blk0}|Bs], Counts0) -> + Next = next_block(Bs), + Successors = successors(Terminator), + Counts = foldl(fun(S, Cnts) -> + case Cnts of + #{S:=C} -> Cnts#{S:=C+1}; + #{} when S =:= Next -> Cnts#{S=>1}; + #{} -> Cnts#{S=>42} + end + end, Counts0, Successors), + case Counts of + #{L:=1} -> + [{L,Blk0}|need_heap_allocs(Bs, Counts)]; + #{L:=_} -> + %% This block has multiple predecessors. Force an allocation + %% in this block so that the predecessors don't need to do + %% an allocation on behalf of this block. + Is = case need_heap_never(Is0) of + true -> Is0; + false -> [#cg_alloc{}|Is0] + end, + Blk = Blk0#cg_blk{is=Is}, + [{L,Blk}|need_heap_allocs(Bs, Counts)]; + #{} -> + [{L,Blk0}|need_heap_allocs(Bs, Counts)] + end; +need_heap_allocs([], _) -> []. + +need_heap_never([#cg_alloc{}|_]) -> true; +need_heap_never([#cg_set{op=recv_next}|_]) -> true; +need_heap_never([#cg_set{op=wait}|_]) -> true; +need_heap_never(_) -> false. + +need_heap_blks([{L,#cg_blk{is=Is0}=Blk0}|Bs], H0, Acc) -> + {Is1,H1} = need_heap_is(reverse(Is0), H0, []), + {Ns,H} = need_heap_terminator(Bs, L, H1), + Is = Ns ++ Is1, + Blk = Blk0#cg_blk{is=Is}, + need_heap_blks(Bs, H, [{L,Blk}|Acc]); +need_heap_blks([], H, Acc) -> + {Acc,H}. + +need_heap_is([#cg_alloc{words=Words}=Alloc0|Is], N, Acc) -> + Alloc = Alloc0#cg_alloc{words=add_heap_words(N, Words)}, + need_heap_is(Is, #need{}, [Alloc|Acc]); +need_heap_is([#cg_set{anno=Anno,op=bs_init}=I0|Is], N, Acc) -> + Alloc = case need_heap_need(N) of + [#cg_alloc{words=Need}] -> alloc(Need); + [] -> 0 + end, + I = I0#cg_set{anno=Anno#{alloc=>Alloc}}, + need_heap_is(Is, #need{}, [I|Acc]); +need_heap_is([#cg_set{op=Op,args=Args}=I|Is], N, Acc) -> + case classify_heap_need(Op, Args) of + {put,Words} -> + %% Pass through adding to needed heap. + need_heap_is(Is, add_heap_words(N, Words), [I|Acc]); + put_float -> + need_heap_is(Is, add_heap_float(N), [I|Acc]); + neutral -> + need_heap_is(Is, N, [I|Acc]); + gc -> + need_heap_is(Is, #need{}, [I]++need_heap_need(N)++Acc) + end; +need_heap_is([], N, Acc) -> + {Acc,N}. + +need_heap_terminator([{_,#cg_blk{last=#cg_br{succ=L,fail=L}}}|_], L, N) -> + %% Fallthrough. + {[],N}; +need_heap_terminator([{_,#cg_blk{is=Is,last=#cg_br{succ=L}}}|_], L, N) -> + case need_heap_need(N) of + [] -> + {[],#need{}}; + [_|_]=Alloc -> + %% If the preceding instructions are a binary construction, + %% hoist the allocation and incorporate into the bs_init + %% instruction. + case reverse(Is) of + [#cg_set{op=succeeded},#cg_set{op=bs_init}|_] -> + {[],N}; + [#cg_set{op=bs_put}|_] -> + {[],N}; + _ -> + %% Not binary construction. Must emit an allocation + %% instruction in this block. + {Alloc,#need{}} + end + end; +need_heap_terminator([{_,#cg_blk{}}|_], _, N) -> + {need_heap_need(N),#need{}}; +need_heap_terminator([], _, H) -> + {need_heap_need(H),#need{}}. + +need_heap_need(#need{h=0,f=0}) -> []; +need_heap_need(#need{}=N) -> [#cg_alloc{words=N}]. + +add_heap_words(#need{h=H1,f=F1}, #need{h=H2,f=F2}) -> + #need{h=H1+H2,f=F1+F2}; +add_heap_words(#need{h=Heap}=N, Words) when is_integer(Words) -> + N#need{h=Heap+Words}. + +add_heap_float(#need{f=F}=N) -> + N#need{f=F+1}. + +%% classify_heap_need(Operation, Arguments) -> +%% gc | neutral | {put,Words} | put_float. +%% Classify the heap need for this instruction. The return +%% values have the following meaning. +%% +%% {put,Words} means that the instruction will use Words words to build +%% something on the heap. +%% +%% 'put_float' means that the instruction will build one floating point +%% number on the heap. +%% +%% 'gc' means that that the instruction can potentially do a GC or throw an +%% exception. That means that an allocation instruction for any building +%% must be placed after this instruction. +%% +%% 'neutral' means that the instruction does nothing to disturb the heap. + +-spec classify_heap_need(beam_ssa:op(), [beam_ssa:value()]) -> + 'gc' | 'neutral' | + {'put',non_neg_integer()} | 'put_float'. + +classify_heap_need(put_list, _) -> + {put,2}; +classify_heap_need(put_tuple_arity, [#b_literal{val=Words}]) -> + {put,Words+1}; +classify_heap_need(put_tuple, Elements) -> + {put,length(Elements)+1}; +classify_heap_need({bif,Name}, Args) -> + case is_gc_bif(Name, Args) of + false -> neutral; + true -> gc + end; +classify_heap_need({float,Op}, _Args) -> + case Op of + get -> put_float; + _ -> neutral + end; +classify_heap_need(Name, _Args) -> + classify_heap_need(Name). + +%% classify_heap_need(Operation) -> gc | neutral. +%% Return either 'gc' or 'neutral'. +%% +%% 'gc' means that that the instruction can potentially do a GC or throw an +%% exception. That means that an allocation instruction for any building +%% must be placed after this instruction. +%% +%% 'neutral' means that the instruction does nothing to disturb the heap. +%% +%% Note: Only handle operations in this function that are not handled +%% by classify_heap_need/2. + +classify_heap_need(bs_add) -> gc; +classify_heap_need(bs_get) -> gc; +classify_heap_need(bs_get_tail) -> gc; +classify_heap_need(bs_init) -> gc; +classify_heap_need(bs_init_writable) -> gc; +classify_heap_need(bs_match_string) -> gc; +classify_heap_need(bs_put) -> neutral; +classify_heap_need(bs_restore) -> neutral; +classify_heap_need(bs_save) -> neutral; +classify_heap_need(bs_get_position) -> gc; +classify_heap_need(bs_set_position) -> neutral; +classify_heap_need(bs_skip) -> gc; +classify_heap_need(bs_start_match) -> neutral; +classify_heap_need(bs_test_tail) -> neutral; +classify_heap_need(bs_utf16_size) -> neutral; +classify_heap_need(bs_utf8_size) -> neutral; +classify_heap_need(build_stacktrace) -> gc; +classify_heap_need(call) -> gc; +classify_heap_need(catch_end) -> gc; +classify_heap_need(copy) -> neutral; +classify_heap_need(extract) -> gc; +classify_heap_need(get_hd) -> neutral; +classify_heap_need(get_map_element) -> neutral; +classify_heap_need(get_tl) -> neutral; +classify_heap_need(get_tuple_element) -> neutral; +classify_heap_need(has_map_field) -> neutral; +classify_heap_need(is_nonempty_list) -> neutral; +classify_heap_need(is_tagged_tuple) -> neutral; +classify_heap_need(kill_try_tag) -> gc; +classify_heap_need(landingpad) -> gc; +classify_heap_need(make_fun) -> gc; +classify_heap_need(new_try_tag) -> gc; +classify_heap_need(peek_message) -> gc; +classify_heap_need(put_map) -> gc; +classify_heap_need(put_tuple_elements) -> neutral; +classify_heap_need(raw_raise) -> gc; +classify_heap_need(recv_next) -> gc; +classify_heap_need(remove_message) -> neutral; +classify_heap_need(resume) -> gc; +classify_heap_need(set_tuple_element) -> gc; +classify_heap_need(succeeded) -> neutral; +classify_heap_need(timeout) -> gc; +classify_heap_need(wait) -> gc; +classify_heap_need(wait_timeout) -> gc. + +%%% +%%% Because beam_ssa_pre_codegen has inserted 'copy' instructions to copy +%%% variables that must be saved on the stack, a value can for some time +%%% be in both an X register and a Y register. +%%% +%%% Here we will keep track of variables that have the same value and +%%% rewrite instructions to use the variable that refers to the X +%%% register instead of the Y register. That could improve performance, +%%% since the BEAM interpreter have more optimized instructions +%%% operating on X registers than on Y registers. +%%% +%%% 'call' and 'make_fun' are handled somewhat specially. If a value +%%% already is in the correct X register, the X register will always +%%% be used instead of the Y register. However, if there are one or more +%%% values in the wrong X registers, the X registers variables will be +%%% used only if that does not cause more 'move' instructions to be +%%% be emitted than if the Y register variables were used. +%%% +%%% Here are some examples. The first example shows how a 'move' from +%%% an Y register is eliminated: +%%% +%%% move x0 y1 +%%% move y1 x0 %%Will be eliminated. +%%% +%%% call f/1 +%%% +%%% Here is an example when x0 and x1 must be swapped to load the argument +%%% registers. Here the 'call' instruction will use the Y registers to +%%% avoid introducing an extra 'move' insruction: +%%% +%%% move x0 y0 +%%% move x1 y1 +%%% +%%% move y0 x1 +%%% move y1 x0 +%%% +%%% call f/2 +%%% +%%% Using the X register to load the argument registers would need +%%% an extra 'move' instruction like this: +%%% +%%% move x0 y0 +%%% move x1 y1 +%%% +%%% move x1 x2 +%%% move x0 x1 +%%% move x2 x0 +%%% +%%% call f/2 +%%% + +prefer_xregs(Linear, St) -> + prefer_xregs(Linear, St, #{0=>#{}}). + +prefer_xregs([{L,#cg_blk{is=Is0,last=Last0}=Blk0}|Bs], St, Map0) -> + Copies0 = maps:get(L, Map0), + {Is,Copies} = prefer_xregs_is(Is0, St, Copies0, []), + Last = prefer_xregs_terminator(Last0, Copies, St), + Blk = Blk0#cg_blk{is=Is,last=Last}, + Successors = successors(Last), + Map = prefer_xregs_successors(Successors, Copies, Map0), + [{L,Blk}|prefer_xregs(Bs, St, Map)]; +prefer_xregs([], _St, _Map) -> []. + +prefer_xregs_successors([L|Ls], Copies0, Map0) -> + case Map0 of + #{L:=Copies1} -> + Copies = merge_copies(Copies0, Copies1), + Map = Map0#{L:=Copies}, + prefer_xregs_successors(Ls, Copies0, Map); + #{} -> + Map = Map0#{L=>Copies0}, + prefer_xregs_successors(Ls, Copies0, Map) + end; +prefer_xregs_successors([], _, Map) -> Map. + +prefer_xregs_is([#cg_alloc{}=I|Is], St, Copies0, Acc) -> + Copies = case I of + #cg_alloc{stack=none,words=#need{h=0,f=0}} -> + Copies0; + #cg_alloc{} -> + #{} + end, + prefer_xregs_is(Is, St, Copies, [I|Acc]); +prefer_xregs_is([#cg_set{op=copy,dst=Dst,args=[Src]}=I|Is], St, Copies0, Acc) -> + Copies1 = prefer_xregs_prune(I, Copies0, St), + Copies = case beam_args([Src,Dst], St) of + [Same,Same] -> Copies1; + [_,_] -> Copies1#{Dst=>Src} + end, + prefer_xregs_is(Is, St, Copies, [I|Acc]); +prefer_xregs_is([#cg_set{op=call,dst=Dst}=I0|Is], St, Copies, Acc) -> + I = prefer_xregs_call(I0, Copies, St), + prefer_xregs_is(Is, St, #{Dst=>{x,0}}, [I|Acc]); +prefer_xregs_is([#cg_set{op=make_fun,dst=Dst}=I0|Is], St, Copies, Acc) -> + I = prefer_xregs_call(I0, Copies, St), + prefer_xregs_is(Is, St, #{Dst=>{x,0}}, [I|Acc]); +prefer_xregs_is([#cg_set{op=set_tuple_element}=I|Is], St, Copies, Acc) -> + %% FIXME: HiPE translates the following code segment incorrectly: + %% {call_ext,3,{extfunc,erlang,setelement,3}}. + %% {move,{x,0},{y,3}}. + %% {set_tuple_element,{y,1},{y,3},1}. + %% Therefore, skip the translation of the arguments for set_tuple_element. + prefer_xregs_is(Is, St, Copies, [I|Acc]); +prefer_xregs_is([#cg_set{args=Args0}=I0|Is], St, Copies0, Acc) -> + Args = [do_prefer_xreg(A, Copies0, St) || A <- Args0], + I = I0#cg_set{args=Args}, + Copies = prefer_xregs_prune(I, Copies0, St), + prefer_xregs_is(Is, St, Copies, [I|Acc]); +prefer_xregs_is([], _St, Copies, Acc) -> + {reverse(Acc),Copies}. + +prefer_xregs_terminator(#cg_br{bool=Arg0}=I, Copies, St) -> + Arg = do_prefer_xreg(Arg0, Copies, St), + I#cg_br{bool=Arg}; +prefer_xregs_terminator(#cg_ret{arg=Arg0}=I, Copies, St) -> + Arg = do_prefer_xreg(Arg0, Copies, St), + I#cg_ret{arg=Arg}; +prefer_xregs_terminator(#cg_switch{arg=Arg0}=I, Copies, St) -> + Arg = do_prefer_xreg(Arg0, Copies, St), + I#cg_switch{arg=Arg}. + +prefer_xregs_prune(#cg_set{anno=#{clobbers:=true}}, _, _) -> + #{}; +prefer_xregs_prune(#cg_set{dst=Dst}, Copies, St) -> + DstReg = beam_arg(Dst, St), + F = fun(_, Alias) -> + beam_arg(Alias, St) =/= DstReg + end, + maps:filter(F, Copies). + +%% prefer_xregs_call(Instruction, Copies, St) -> Instruction. +%% Given a 'call' or 'make_fun' instruction, minimize the number +%% of 'move' instructions to set up the argument registers. +%% Prefer using X registers over Y registers, unless that will +%% result in more 'move' instructions. + +prefer_xregs_call(#cg_set{args=[_]}=I, _Copies, _St) -> + I; +prefer_xregs_call(#cg_set{args=[F|Args0]}=I, Copies, St) -> + case Args0 of + [A0] -> + %% Only one argument. Always prefer the X register + %% if available. + A = do_prefer_xreg(A0, Copies, St), + I#cg_set{args=[F,A]}; + [_|_] -> + %% Two or more arguments. Try rewriting arguments in + %% two ways and see which way produces the least + %% number of 'move' instructions. + Args1 = prefer_xregs_call_1(Args0, Copies, 0, St), + Args2 = [do_prefer_xreg(A, Copies, St) || A <- Args0], + case {count_moves(Args1, St),count_moves(Args2, St)} of + {N1,N2} when N1 < N2 -> + %% There will be fewer 'move' instructions if + %% we keep using Y registers. + I#cg_set{args=[F|Args1]}; + {_,_} -> + %% Always use the values in X registers. + I#cg_set{args=[F|Args2]} + end + end. + +count_moves(Args, St) -> + length(setup_args(beam_args(Args, St))). + +prefer_xregs_call_1([#b_var{}=A|As], Copies, X, St) -> + case {beam_arg(A, St),Copies} of + {{y,_},#{A:=Other}} -> + case beam_arg(Other, St) of + {x,X} -> + %% This value is already in the correct X register. + %% It is always benefical to use the X register variable. + [Other|prefer_xregs_call_1(As, Copies, X+1, St)]; + _ -> + %% This value is another X register. Keep using + %% the Y register variable. + [A|prefer_xregs_call_1(As, Copies, X+1, St)] + end; + {_,_} -> + %% The value is not available in an X register. + [A|prefer_xregs_call_1(As, Copies, X+1, St)] + end; +prefer_xregs_call_1([A|As], Copies, X, St) -> + [A|prefer_xregs_call_1(As, Copies, X+1, St)]; +prefer_xregs_call_1([], _, _, _) -> []. + +do_prefer_xreg(#b_var{}=A, Copies, St) -> + case {beam_arg(A, St),Copies} of + {{y,_},#{A:=Copy}} -> + Copy; + {_,_} -> + A + end; +do_prefer_xreg(A, _, _) -> A. + +merge_copies(Copies0, Copies1) when map_size(Copies0) =< map_size(Copies1) -> + maps:filter(fun(K, V) -> + case Copies1 of + #{K:=V} -> true; + #{} -> false + end + end, Copies0); +merge_copies(Copies0, Copies1) -> + merge_copies(Copies1, Copies0). + + +%%% +%%% Add annotations for the number of live registers. +%%% + +liveness(Linear, #cg{regs=Regs}) -> + liveness(reverse(Linear), #{}, Regs, []). + +liveness([{L,#cg_blk{is=Is0,last=Last0}=Blk0}|Bs], LiveMap0, Regs, Acc) -> + Successors = liveness_successors(Last0), + Live0 = ordsets:union([liveness_get(S, LiveMap0) || S <- Successors]), + Live1 = liveness_terminator(Last0, Live0), + {Is,Live} = liveness_is(reverse(Is0), Regs, Live1, []), + LiveMap = LiveMap0#{L=>Live}, + Blk = Blk0#cg_blk{is=Is}, + liveness(Bs, LiveMap, Regs, [{L,Blk}|Acc]); +liveness([], _LiveMap, _Regs, Acc) -> Acc. + +liveness_get(S, LiveMap) -> + case LiveMap of + #{S:=Live} -> Live; + #{} -> [] + end. + +liveness_successors(Terminator) -> + successors(Terminator) -- [?BADARG_BLOCK]. + +liveness_is([#cg_alloc{}=I0|Is], Regs, Live, Acc) -> + I = I0#cg_alloc{live=num_live(Live, Regs)}, + liveness_is(Is, Regs, Live, [I|Acc]); +liveness_is([#cg_set{dst=Dst,args=Args}=I0|Is], Regs, Live0, Acc) -> + Live1 = liveness_clobber(I0, Live0, Regs), + I1 = liveness_yregs_anno(I0, Live1, Regs), + Live2 = liveness_args(Args, Live1), + Live = ordsets:del_element(Dst, Live2), + I = liveness_anno(I1, Live, Regs), + liveness_is(Is, Regs, Live, [I|Acc]); +liveness_is([], _, Live, Acc) -> + {Acc,Live}. + +liveness_terminator(#cg_br{bool=Arg}, Live) -> + liveness_terminator_1(Arg, Live); +liveness_terminator(#cg_switch{arg=Arg}, Live) -> + liveness_terminator_1(Arg, Live); +liveness_terminator(#cg_ret{arg=Arg}, Live) -> + liveness_terminator_1(Arg, Live). + +liveness_terminator_1(#b_var{}=V, Live) -> + ordsets:add_element(V, Live); +liveness_terminator_1(#b_literal{}, Live) -> + Live; +liveness_terminator_1(Reg, Live) -> + _ = verify_beam_register(Reg), + ordsets:add_element(Reg, Live). + +liveness_args([#b_var{}=V|As], Live) -> + liveness_args(As, ordsets:add_element(V, Live)); +liveness_args([#b_remote{mod=Mod,name=Name}|As], Live) -> + liveness_args([Mod,Name|As], Live); +liveness_args([A|As], Live) -> + case is_beam_register(A) of + true -> + liveness_args(As, ordsets:add_element(A, Live)); + false -> + liveness_args(As, Live) + end; +liveness_args([], Live) -> Live. + +liveness_anno(#cg_set{op=Op}=I, Live, Regs) -> + case need_live_anno(Op) of + true -> + NumLive = num_live(Live, Regs), + Anno = (I#cg_set.anno)#{live=>NumLive}, + I#cg_set{anno=Anno}; + false -> + I + end. + +liveness_yregs_anno(#cg_set{op=Op,dst=Dst}=I, Live0, Regs) -> + case need_live_anno(Op) of + true -> + Live = ordsets:del_element(Dst, Live0), + LiveYregs = [V || V <- Live, is_yreg(V, Regs)], + Anno = (I#cg_set.anno)#{live_yregs=>LiveYregs}, + I#cg_set{anno=Anno}; + false -> + I + end. + +liveness_clobber(#cg_set{anno=Anno}, Live, Regs) -> + case Anno of + #{clobbers:=true} -> + [R || R <- Live, is_yreg(R, Regs)]; + _ -> + Live + end. + +is_yreg(R, Regs) -> + case Regs of + #{R:={y,_}} -> true; + #{} -> false + end. + +num_live(Live, Regs) -> + Rs = ordsets:from_list([get_register(V, Regs) || V <- Live]), + num_live_1(Rs, 0). + +num_live_1([{x,X}|T], X) -> + num_live_1(T, X+1); +num_live_1([{x,_}|_]=T, X) -> + %% error({hole,{x,X},expected,Next}); + num_live_1(T, X+1); +num_live_1([{y,_}|_], X) -> + X; +num_live_1([{z,_}|_], X) -> + X; +num_live_1([{fr,_}|T], X) -> + num_live_1(T, X); +num_live_1([], X) -> + X. + +get_live(#cg_set{anno=#{live:=Live}}) -> + Live. + +%% need_live_anno(Operation) -> true|false. +%% Return 'true' if the instruction needs a 'live' annotation with +%% the number live X registers, or 'false' otherwise. + +need_live_anno(Op) -> + case Op of + {bif,_} -> true; + bs_get -> true; + bs_init -> true; + bs_get_position -> true; + bs_get_tail -> true; + bs_start_match -> true; + bs_skip -> true; + call -> true; + put_map -> true; + _ -> false + end. + +%%% +%%% Add the following annotations for Y registers: +%%% +%%% def_yregs An ordset with variables that refer to live Y registers. +%%% That is, Y registers that that have been killed +%%% are not included. This annotation is added to all +%%% instructions that require Y registers to be initialized. +%%% +%%% kill_yregs This annotation is added to call instructions. It is +%%% an ordset containing variables referring to Y registers +%%% that will no longer be used after the call instruction. +%%% + +defined(Linear, #cg{regs=Regs}) -> + def(Linear, #{}, Regs). + +def([{L,#cg_blk{is=Is0,last=Last}=Blk0}|Bs], DefMap0, Regs) -> + Def0 = def_get(L, DefMap0), + {Is,Def} = def_is(Is0, Regs, Def0, []), + Successors = successors(Last), + DefMap = def_successors(Successors, Def, DefMap0), + Blk = Blk0#cg_blk{is=Is}, + [{L,Blk}|def(Bs, DefMap, Regs)]; +def([], _, _) -> []. + +def_get(L, DefMap) -> + case DefMap of + #{L:=Def} -> Def; + #{} -> [] + end. + +def_is([#cg_alloc{anno=Anno0}=I0|Is], Regs, Def, Acc) -> + I = I0#cg_alloc{anno=Anno0#{def_yregs=>Def}}, + def_is(Is, Regs, Def, [I|Acc]); +def_is([#cg_set{op=kill_try_tag,args=[#b_var{}=Tag]}=I|Is], Regs, Def0, Acc) -> + Def = ordsets:del_element(Tag, Def0), + def_is(Is, Regs, Def, [I|Acc]); +def_is([#cg_set{op=catch_end,args=[#b_var{}=Tag|_]}=I|Is], Regs, Def0, Acc) -> + Def = ordsets:del_element(Tag, Def0), + def_is(Is, Regs, Def, [I|Acc]); +def_is([#cg_set{anno=Anno0,op=call,dst=Dst}=I0|Is], + Regs, Def0, Acc) -> + #{live_yregs:=LiveYregVars} = Anno0, + LiveRegs = gb_sets:from_list([maps:get(V, Regs) || V <- LiveYregVars]), + Kill0 = ordsets:subtract(Def0, LiveYregVars), + + %% Kill0 is the set of variables that have just died. However, the registers + %% used for killed variables may have been reused, so we must check that the + %% registers to be killed are not used by other variables. + Kill = [K || K <- Kill0, not gb_sets:is_element(maps:get(K, Regs), LiveRegs)], + Anno = Anno0#{def_yregs=>Def0,kill_yregs=>Kill}, + I = I0#cg_set{anno=Anno}, + Def1 = ordsets:subtract(Def0, Kill), + Def = def_add_yreg(Dst, Def1, Regs), + def_is(Is, Regs, Def, [I|Acc]); +def_is([#cg_set{anno=Anno0,op={bif,Bif},dst=Dst,args=Args}=I0|Is], + Regs, Def0, Acc) -> + Arity = length(Args), + I = case is_gc_bif(Bif, Args) orelse not erl_bifs:is_safe(erlang, Bif, Arity) of + true -> + I0#cg_set{anno=Anno0#{def_yregs=>Def0}}; + false -> + I0 + end, + Def = def_add_yreg(Dst, Def0, Regs), + def_is(Is, Regs, Def, [I|Acc]); +def_is([#cg_set{anno=Anno0,dst=Dst}=I0|Is], Regs, Def0, Acc) -> + I = case need_y_init(I0) of + true -> + I0#cg_set{anno=Anno0#{def_yregs=>Def0}}; + false -> + I0 + end, + Def = def_add_yreg(Dst, Def0, Regs), + def_is(Is, Regs, Def, [I|Acc]); +def_is([], _, Def, Acc) -> + {reverse(Acc),Def}. + +def_add_yreg(Dst, Def, Regs) -> + case is_yreg(Dst, Regs) of + true -> ordsets:add_element(Dst, Def); + false -> Def + end. + +def_successors([S|Ss], Def0, DefMap) -> + case DefMap of + #{S:=Def1} -> + Def = ordsets:intersection(Def0, Def1), + def_successors(Ss, Def0, DefMap#{S:=Def}); + #{} -> + def_successors(Ss, Def0, DefMap#{S=>Def0}) + end; +def_successors([], _, DefMap) -> DefMap. + +%% need_y_init(#cg_set{}) -> true|false. +%% Return true if this instructions needs initialized Y registers +%% (because the instruction may do a GC or cause an exception +%% so that the stack will be scanned), or false otherwise. + +need_y_init(#cg_set{anno=#{clobbers:=Clobbers}}) -> Clobbers; +need_y_init(#cg_set{op=bs_get}) -> true; +need_y_init(#cg_set{op=bs_get_position}) -> true; +need_y_init(#cg_set{op=bs_get_tail}) -> true; +need_y_init(#cg_set{op=bs_init}) -> true; +need_y_init(#cg_set{op=bs_skip,args=[#b_literal{val=Type}|_]}) -> + case Type of + utf8 -> true; + utf16 -> true; + utf32 -> true; + _ -> false + end; +need_y_init(#cg_set{op=bs_start_match}) -> true; +need_y_init(#cg_set{op=put_map}) -> true; +need_y_init(#cg_set{}) -> false. + +%% opt_allocate([{BlockLabel,Block}], #st{}) -> [BeamInstruction]. +%% Update the def_yregs field of each #cg_alloc{} that allocates +%% a stack frame. #cg_alloc.def_yregs will list all Y registers +%% that will be initialized by the subsequent code (thus, the +%% listed Y registers don't require init/1 instructions). + +opt_allocate(Linear, #cg{regs=Regs}) -> + opt_allocate_1(Linear, Regs). + +opt_allocate_1([{L,#cg_blk{is=[#cg_alloc{stack=Stk}=I0|Is]}=Blk0}|Bs]=Bs0, Regs) + when is_integer(Stk) -> + %% Collect the variables that are initialized by copy + %% instruction in this block. + case ordsets:from_list(opt_allocate_defs(Is, Regs)) of + Yregs when length(Yregs) =:= Stk -> + %% Those copy instructions are sufficient to fully + %% initialize the stack frame. + I = I0#cg_alloc{def_yregs=Yregs}, + [{L,Blk0#cg_blk{is=[I|Is]}}|opt_allocate_1(Bs, Regs)]; + Yregs0 -> + %% Determine a conservative approximation of the Y + %% registers that are guaranteed to be initialized by all + %% successors of this block, and to it add the variables + %% initialized by copy instructions in this block. + Yregs1 = opt_alloc_def(Bs0, gb_sets:singleton(L), []), + Yregs = ordsets:union(Yregs0, Yregs1), + I = I0#cg_alloc{def_yregs=Yregs}, + [{L,Blk0#cg_blk{is=[I|Is]}}|opt_allocate_1(Bs, Regs)] + end; +opt_allocate_1([B|Bs], Regs) -> + [B|opt_allocate_1(Bs, Regs)]; +opt_allocate_1([], _) -> []. + +opt_allocate_defs([#cg_set{op=copy,dst=Dst}|Is], Regs) -> + case is_yreg(Dst, Regs) of + true -> [Dst|opt_allocate_defs(Is, Regs)]; + false -> [] + end; +opt_allocate_defs(_, _Regs) -> []. + +opt_alloc_def([{L,#cg_blk{is=Is,last=Last}}|Bs], Ws0, Def0) -> + case gb_sets:is_member(L, Ws0) of + false -> + opt_alloc_def(Bs, Ws0, Def0); + true -> + case opt_allocate_is(Is) of + none -> + Succ = successors(Last), + Ws = gb_sets:union(Ws0, gb_sets:from_list(Succ)), + opt_alloc_def(Bs, Ws, Def0); + Def1 when is_list(Def1) -> + Def = [Def1|Def0], + opt_alloc_def(Bs, Ws0, Def) + end + end; +opt_alloc_def([], _, Def) -> + ordsets:intersection(Def). + +opt_allocate_is([#cg_set{anno=Anno}|Is]) -> + case Anno of + #{def_yregs:=Yregs} -> + Yregs; + #{} -> + opt_allocate_is(Is) + end; +opt_allocate_is([#cg_alloc{anno=#{def_yregs:=Yregs},stack=none}|_]) -> + Yregs; +opt_allocate_is([#cg_alloc{}|Is]) -> + opt_allocate_is(Is); +opt_allocate_is([]) -> none. + +%%% +%%% Here follows the main code generation functions. +%%% + +%% cg_linear([{BlockLabel,Block}]) -> [BeamInstruction]. +%% Generate BEAM instructions. + +cg_linear([{L,#cg_blk{anno=#{recv_set:=L}=Anno0}=B0}|Bs], St0) -> + Anno = maps:remove(recv_set, Anno0), + B = B0#cg_blk{anno=Anno}, + {Is,St1} = cg_linear([{L,B}|Bs], St0), + {Fail,St} = use_block_label(L, St1), + {[{recv_set,Fail}|Is],St}; +cg_linear([{L,#cg_blk{is=Is0,last=Last}}|Bs], St0) -> + Next = next_block(Bs), + St1 = new_block_label(L, St0), + {Is1,St2} = cg_block(Is0, Last, Next, St1), + {Is2,St} = cg_linear(Bs, St2), + {def_block_label(L, St)++Is1++Is2,St}; +cg_linear([], St) -> {[],St}. + +cg_block([#cg_set{op=recv_next}], #cg_br{succ=Lr0}, _Next, St0) -> + {Lr,St} = use_block_label(Lr0, St0), + {[{loop_rec_end,Lr}],St}; +cg_block([#cg_set{op=wait}], #cg_br{succ=Lr0}, _Next, St0) -> + {Lr,St} = use_block_label(Lr0, St0), + {[{wait,Lr}],St}; +cg_block(Is0, Last, Next, St0) -> + case Last of + #cg_br{succ=Next,fail=Next} -> + cg_block(Is0, none, St0); + #cg_br{succ=Same,fail=Same} -> + {Fail,St1} = use_block_label(Same, St0), + {Is,St} = cg_block(Is0, none, St1), + {Is++[jump(Fail)],St}; + #cg_br{bool=Bool,succ=Next,fail=Fail0} -> + {Fail,St1} = use_block_label(Fail0, St0), + {Is,St} = cg_block(Is0, {Bool,Fail}, St1), + {Is,St}; + #cg_br{bool=Bool,succ=Succ0,fail=Fail0} -> + {[Succ,Fail],St1} = use_block_labels([Succ0,Fail0], St0), + {Is,St} = cg_block(Is0, {Bool,Fail}, St1), + {Is++[jump(Succ)],St}; + #cg_ret{arg=Src0,dealloc=N} -> + Src = beam_arg(Src0, St0), + cg_block(Is0, {return,Src,N}, St0); + #cg_switch{} -> + cg_switch(Is0, Last, St0) + end. + +cg_switch(Is0, Last, St0) -> + #cg_switch{arg=Src0,fail=Fail0,list=List0} = Last, + Src = beam_arg(Src0, St0), + {Fail1,St1} = use_block_label(Fail0, St0), + Fail = ensure_label(Fail1, St1), + {List1,St2} = + flatmapfoldl(fun({V,L}, S0) -> + {Lbl,S} = use_block_label(L, S0), + {[beam_arg(V, S),Lbl],S} + end, St1, List0), + {Is1,St} = cg_block(Is0, none, St2), + case reverse(Is1) of + [{bif,tuple_size,_,[Tuple],{z,_}=Src}|More] -> + List = map(fun({integer,Arity}) -> Arity; + ({f,_}=F) -> F + end, List1), + Is = reverse(More, [{select_tuple_arity,Tuple,Fail,{list,List}}]), + {Is,St}; + _ -> + SelectVal = {select_val,Src,Fail,{list,List1}}, + {Is1 ++ [SelectVal],St} + end. + +jump({f,_}=Fail) -> + {jump,Fail}; +jump({catch_tag,Fail}) -> + {jump,Fail}. + +bif_fail({f,_}=Fail) -> Fail; +bif_fail({catch_tag,_}) -> {f,0}. + +next_block([]) -> none; +next_block([{Next,_}|_]) -> Next. + +ensure_label(Fail0, #cg{ultimate_fail=Lbl}) -> + case bif_fail(Fail0) of + {f,0} -> {f,Lbl}; + {f,_}=Fail -> Fail + end. + +cg_block([#cg_set{anno=#{recv_mark:=L}=Anno0}=I0|T], Context, St0) -> + Anno = maps:remove(recv_mark, Anno0), + I = I0#cg_set{anno=Anno}, + {Is,St1} = cg_block([I|T], Context, St0), + {Fail,St} = use_block_label(L, St1), + {[{recv_mark,Fail}|Is],St}; +cg_block([#cg_set{op=new_try_tag,dst=Tag,args=Args}], {Tag,Fail0}, St) -> + {catch_tag,Fail} = Fail0, + [Reg,{atom,Kind}] = beam_args([Tag|Args], St), + {[{Kind,Reg,Fail}],St}; +cg_block([#cg_set{anno=Anno,op={bif,Name},dst=Dst0,args=Args0}=I, + #cg_set{op=succeeded,dst=Bool}], {Bool,Fail0}, St) -> + [Dst|Args] = beam_args([Dst0|Args0], St), + Line0 = call_line(body, {extfunc,erlang,Name,length(Args)}, Anno), + Fail = bif_fail(Fail0), + Line = case Fail of + {f,0} -> Line0; + {f,_} -> [] + end, + case is_gc_bif(Name, Args) of + true -> + Live = get_live(I), + Kill = kill_yregs(Anno, St), + {Kill++Line++[{gc_bif,Name,Fail,Live,Args,Dst}],St}; + false -> + {Line++[{bif,Name,Fail,Args,Dst}],St} + end; +cg_block([#cg_set{op={bif,tuple_size},dst=Arity0,args=[Tuple0]}, + #cg_set{op={bif,'=:='},dst=Bool,args=[Arity0,#b_literal{val=Ar}]}=Eq], + {Bool,Fail}=Context, St0) -> + Tuple = beam_arg(Tuple0, St0), + case beam_arg(Arity0, St0) of + {z,_} -> + %% The size will only be used once. Combine to a test_arity instruction. + Test = {test,test_arity,ensure_label(Fail, St0),[Tuple,Ar]}, + {[Test],St0}; + Arity -> + %% The size will be used more than once. Must do an explicit + %% BIF call followed by the '==' test. + TupleSize = {bif,tuple_size,{f,0},[Tuple],Arity}, + {Is,St} = cg_block([Eq], Context, St0), + {[TupleSize|Is],St} + end; +cg_block([#cg_set{op={bif,Name},dst=Dst0,args=Args0}]=Is0, {Dst0,Fail}, St0) -> + [Dst|Args] = beam_args([Dst0|Args0], St0), + case Dst of + {z,_} -> + %% The result of the BIF call will only be used once. Convert to + %% a test instruction. + {Test,St1} = bif_to_test(Name, Args, ensure_label(Fail, St0), St0), + {Test,St1}; + _ -> + %% Must explicitly call the BIF since the result will be used + %% more than once. + {Is1,St1} = cg_block(Is0, none, St0), + {Is2,St} = cg_block([], {Dst0,Fail}, St1), + {Is1++Is2,St} + end; +cg_block([#cg_set{anno=Anno,op={bif,Name},dst=Dst0,args=Args0}=I|T], + Context, St0) -> + [Dst|Args] = beam_args([Dst0|Args0], St0), + {Is0,St} = cg_block(T, Context, St0), + case is_gc_bif(Name, Args) of + true -> + Line = call_line(body, {extfunc,erlang,Name,length(Args)}, Anno), + Live = get_live(I), + Kill = kill_yregs(Anno, St), + Is = Kill++Line++[{gc_bif,Name,{f,0},Live,Args,Dst}|Is0], + {Is,St}; + false -> + Is = [{bif,Name,{f,0},Args,Dst}|Is0], + {Is,St} + end; +cg_block([#cg_set{op=bs_init,dst=Dst0,args=Args0,anno=Anno}=I, + #cg_set{op=succeeded,dst=Bool}], {Bool,Fail0}, St) -> + Fail = bif_fail(Fail0), + Line = line(Anno), + Alloc = map_get(alloc, Anno), + [#b_literal{val=Kind}|Args1] = Args0, + case Kind of + new -> + [Dst,Size,{integer,Unit}] = beam_args([Dst0|Args1], St), + Live = get_live(I), + {[Line|cg_bs_init(Dst, Size, Alloc, Unit, Live, Fail)],St}; + private_append -> + [Dst,Src,Bits,{integer,Unit}] = beam_args([Dst0|Args1], St), + Flags = {field_flags,[]}, + Is = [Line,{bs_private_append,Fail,Bits,Unit,Src,Flags,Dst}], + {Is,St}; + append -> + [Dst,Src,Bits,{integer,Unit}] = beam_args([Dst0|Args1], St), + Flags = {field_flags,[]}, + Live = get_live(I), + Is = [Line,{bs_append,Fail,Bits,Alloc,Live,Unit,Src,Flags,Dst}], + {Is,St} + end; +cg_block([#cg_set{anno=Anno,op=bs_start_match,dst=Ctx0,args=[Bin0]}=I, + #cg_set{op=succeeded,dst=Bool}], {Bool,Fail}, St) -> + [Dst,Bin1] = beam_args([Ctx0,Bin0], St), + {Bin,Pre} = force_reg(Bin1, Dst), + Live = get_live(I), + %% num_slots is only set when using the old instructions. + case maps:find(num_slots, Anno) of + {ok, Slots} -> + Is = Pre ++ [{test,bs_start_match2,Fail,Live,[Bin,Slots],Dst}], + {Is,St}; + error -> + Is = Pre ++ [{test,bs_start_match3,Fail,Live,[Bin],Dst}], + {Is,St} + end; +cg_block([#cg_set{op=bs_get}=Set, + #cg_set{op=succeeded,dst=Bool}], {Bool,Fail}, St) -> + {cg_bs_get(Fail, Set, St),St}; +cg_block([#cg_set{op=bs_match_string,args=[CtxVar,#b_literal{val=String}]}, + #cg_set{op=succeeded,dst=Bool}], {Bool,Fail}, St) -> + CtxReg = beam_arg(CtxVar, St), + Is = [{test,bs_match_string,Fail,[CtxReg,String]}], + {Is,St}; +cg_block([#cg_set{dst=Dst0,op=landingpad,args=Args0}|T], Context, St0) -> + [Dst,{atom,Kind},Tag] = beam_args([Dst0|Args0], St0), + case Kind of + 'catch' -> + cg_catch(Dst, T, Context, St0); + 'try' -> + cg_try(Dst, Tag, T, Context, St0) + end; +cg_block([#cg_set{op=kill_try_tag,args=Args0}|Is], Context, St0) -> + [Reg] = beam_args(Args0, St0), + {Is0,St} = cg_block(Is, Context, St0), + {[{try_end,Reg}|Is0],St}; +cg_block([#cg_set{op=catch_end,dst=Dst0,args=Args0}|Is], Context, St0) -> + [Dst,Reg,{x,0}] = beam_args([Dst0|Args0], St0), + {Is0,St} = cg_block(Is, Context, St0), + {[{catch_end,Reg}|copy({x,0}, Dst)++Is0],St}; +cg_block([#cg_set{op=call}=I, + #cg_set{op=succeeded,dst=Bool}], {Bool,_Fail}, St) -> + %% A call in try/catch block. + cg_block([I], none, St); +cg_block([#cg_set{op=Op,dst=Dst0,args=Args0}=I, + #cg_set{op=succeeded,dst=Bool}], {Bool,Fail}, St) -> + [Dst|Args] = beam_args([Dst0|Args0], St), + {cg_test(Op, bif_fail(Fail), Args, Dst, I),St}; +cg_block([#cg_set{op=bs_put,dst=Bool,args=Args0}], {Bool,Fail}, St) -> + Args = beam_args(Args0, St), + {cg_bs_put(bif_fail(Fail), Args),St}; +cg_block([#cg_set{op=bs_test_tail,dst=Bool,args=Args0}], {Bool,Fail}, St) -> + [Ctx,{integer,Bits}] = beam_args(Args0, St), + {[{test,bs_test_tail2,bif_fail(Fail),[Ctx,Bits]}],St}; +cg_block([#cg_set{op={float,checkerror},dst=Bool}], {Bool,Fail}, St) -> + {[{fcheckerror,bif_fail(Fail)}],St}; +cg_block([#cg_set{op=is_tagged_tuple,dst=Bool,args=Args0}], {Bool,Fail}, St) -> + [Src,{integer,Arity},Tag] = beam_args(Args0, St), + {[{test,is_tagged_tuple,ensure_label(Fail, St),[Src,Arity,Tag]}],St}; +cg_block([#cg_set{op=is_nonempty_list,dst=Bool,args=Args0}], {Bool,Fail}, St) -> + Args = beam_args(Args0, St), + {[{test,is_nonempty_list,ensure_label(Fail, St),Args}],St}; +cg_block([#cg_set{op=has_map_field,dst=Bool,args=Args0}], {Bool,Fail}, St) -> + [Src,Key] = beam_args(Args0, St), + {[{test,has_map_fields,Fail,Src,{list,[Key]}}],St}; +cg_block([#cg_set{op=call}=Call], {_Bool,_Fail}=Context, St0) -> + {Is0,St1} = cg_call(Call, body, none, St0), + {Is1,St} = cg_block([], Context, St1), + {Is0++Is1,St}; +cg_block([#cg_set{op=call,dst=Dst0}=Call], Context, St) -> + Dst = beam_arg(Dst0, St), + case Context of + {return,Dst,_} -> + cg_call(Call, tail, Context, St); + _ -> + cg_call(Call, body, Context, St) + end; +cg_block([#cg_set{op=call}=Call|T], Context, St0) -> + {Is0,St1} = cg_call(Call, body, none, St0), + {Is1,St} = cg_block(T, Context, St1), + {Is0++Is1,St}; +cg_block([#cg_set{op=make_fun,dst=Dst0,args=[Local|Args0]}|T], + Context, St0) -> + #b_local{name=#b_literal{val=Func},arity=Arity} = Local, + [Dst|Args] = beam_args([Dst0|Args0], St0), + {FuncLbl,St1} = local_func_label(Func, Arity, St0), + Is0 = setup_args(Args) ++ + [{make_fun2,{f,FuncLbl},0,0,length(Args)}|copy({x,0}, Dst)], + {Is1,St} = cg_block(T, Context, St1), + {Is0++Is1,St}; +cg_block([#cg_set{op=copy}|_]=T0, Context, St0) -> + {Is0,T} = cg_copy(T0, St0), + {Is1,St} = cg_block(T, Context, St0), + Is = Is0 ++ Is1, + case is_call(T) of + {yes,Arity} -> + {opt_call_moves(Is, Arity),St}; + no -> + {Is,St} + end; +cg_block([#cg_set{op=Op,dst=Dst0,args=Args0}=Set], none, St) -> + [Dst|Args] = beam_args([Dst0|Args0], St), + Is = cg_instr(Op, Args, Dst, Set), + {Is,St}; +cg_block([#cg_set{op=Op,dst=Dst0,args=Args0}=Set|T], Context, St0) -> + [Dst|Args] = beam_args([Dst0|Args0], St0), + Is0 = cg_instr(Op, Args, Dst, Set), + {Is1,St} = cg_block(T, Context, St0), + {Is0++Is1,St}; +cg_block([#cg_alloc{}=Alloc|T], Context, St0) -> + Is0 = cg_alloc(Alloc, St0), + {Is1,St} = cg_block(T, Context, St0), + {Is0++Is1,St}; +cg_block([], {return,Arg,none}, St) -> + Is = copy(Arg, {x,0}) ++ [return], + {Is,St}; +cg_block([], {return,Arg,N}, St) -> + Is = copy(Arg, {x,0}) ++ [{deallocate,N},return], + {Is,St}; +cg_block([], none, St) -> + {[],St}; +cg_block([], {Bool0,Fail}, St) -> + [Bool] = beam_args([Bool0], St), + {[{test,is_eq_exact,Fail,[Bool,{atom,true}]}],St}. + +cg_copy(T0, St) -> + {Copies,T} = splitwith(fun(#cg_set{op=copy}) -> true; + (_) -> false + end, T0), + Moves0 = cg_copy_1(Copies, St), + Moves1 = [Move || {move,Src,Dst}=Move <- Moves0, Src =/= Dst], + Scratch = {x,1022}, + Moves = order_moves(Moves1, Scratch), + {Moves,T}. + +cg_copy_1([#cg_set{dst=Dst0,args=Args}|T], St) -> + [Dst,Src] = beam_args([Dst0|Args], St), + Copies = cg_copy_1(T, St), + case keymember(Dst, 3, Copies) of + true -> + %% Will be overwritten. Don't generate a move instruction. + Copies; + false -> + [{move,Src,Dst}|Copies] + end; +cg_copy_1([], _St) -> []. + +-define(IS_LITERAL(Val), (Val =:= nil orelse + element(1, Val) =:= integer orelse + element(1, Val) =:= float orelse + element(1, Val) =:= atom orelse + element(1, Val) =:= literal)). + +bif_to_test('or', [V1,V2], {f,Lbl}=Fail, St0) when Lbl =/= 0 -> + {SuccLabel,St} = new_label(St0), + {[{test,is_eq_exact,{f,SuccLabel},[V1,{atom,false}]}, + {test,is_eq_exact,Fail,[V2,{atom,true}]}, + {label,SuccLabel}],St}; +bif_to_test(Op, Args, Fail, St) -> + {bif_to_test(Op, Args, Fail),St}. + +bif_to_test('and', [V1,V2], Fail) -> + [{test,is_eq_exact,Fail,[V1,{atom,true}]}, + {test,is_eq_exact,Fail,[V2,{atom,true}]}]; +bif_to_test('not', [Var], Fail) -> + [{test,is_eq_exact,Fail,[Var,{atom,false}]}]; +bif_to_test(Name, Args, Fail) -> + [bif_to_test_1(Name, Args, Fail)]. + +bif_to_test_1(is_atom, [_]=Ops, Fail) -> + {test,is_atom,Fail,Ops}; +bif_to_test_1(is_boolean, [_]=Ops, Fail) -> + {test,is_boolean,Fail,Ops}; +bif_to_test_1(is_binary, [_]=Ops, Fail) -> + {test,is_binary,Fail,Ops}; +bif_to_test_1(is_bitstring,[_]=Ops, Fail) -> + {test,is_bitstr,Fail,Ops}; +bif_to_test_1(is_float, [_]=Ops, Fail) -> + {test,is_float,Fail,Ops}; +bif_to_test_1(is_function, [_]=Ops, Fail) -> + {test,is_function,Fail,Ops}; +bif_to_test_1(is_function, [_,_]=Ops, Fail) -> + {test,is_function2,Fail,Ops}; +bif_to_test_1(is_integer, [_]=Ops, Fail) -> + {test,is_integer,Fail,Ops}; +bif_to_test_1(is_list, [_]=Ops, Fail) -> + {test,is_list,Fail,Ops}; +bif_to_test_1(is_map, [_]=Ops, Fail) -> + {test,is_map,Fail,Ops}; +bif_to_test_1(is_number, [_]=Ops, Fail) -> + {test,is_number,Fail,Ops}; +bif_to_test_1(is_pid, [_]=Ops, Fail) -> + {test,is_pid,Fail,Ops}; +bif_to_test_1(is_port, [_]=Ops, Fail) -> + {test,is_port,Fail,Ops}; +bif_to_test_1(is_reference, [_]=Ops, Fail) -> + {test,is_reference,Fail,Ops}; +bif_to_test_1(is_tuple, [_]=Ops, Fail) -> + {test,is_tuple,Fail,Ops}; +bif_to_test_1('=<', [A,B], Fail) -> + {test,is_ge,Fail,[B,A]}; +bif_to_test_1('>', [A,B], Fail) -> + {test,is_lt,Fail,[B,A]}; +bif_to_test_1('<', [_,_]=Ops, Fail) -> + {test,is_lt,Fail,Ops}; +bif_to_test_1('>=', [_,_]=Ops, Fail) -> + {test,is_ge,Fail,Ops}; +bif_to_test_1('==', [C,A], Fail) when ?IS_LITERAL(C) -> + {test,is_eq,Fail,[A,C]}; +bif_to_test_1('==', [_,_]=Ops, Fail) -> + {test,is_eq,Fail,Ops}; +bif_to_test_1('/=', [C,A], Fail) when ?IS_LITERAL(C) -> + {test,is_ne,Fail,[A,C]}; +bif_to_test_1('/=', [_,_]=Ops, Fail) -> + {test,is_ne,Fail,Ops}; +bif_to_test_1('=:=', [C,A], Fail) when ?IS_LITERAL(C) -> + {test,is_eq_exact,Fail,[A,C]}; +bif_to_test_1('=:=', [_,_]=Ops, Fail) -> + {test,is_eq_exact,Fail,Ops}; +bif_to_test_1('=/=', [C,A], Fail) when ?IS_LITERAL(C) -> + {test,is_ne_exact,Fail,[A,C]}; +bif_to_test_1('=/=', [_,_]=Ops, Fail) -> + {test,is_ne_exact,Fail,Ops}. + +opt_call_moves(Is0, Arity) -> + {Moves0,Is} = splitwith(fun({move,_,_}) -> true; + ({kill,_}) -> true; + (_) -> false + end, Is0), + Moves = opt_call_moves_1(Moves0, Arity), + Moves ++ Is. + +opt_call_moves_1([{move,Src,{x,_}=Tmp}=M1|[{kill,_}|_]=Is], Arity) -> + %% There could be a {move,Tmp,{x,0}} instruction after the + %% kill/1 instructions (moved to there by opt_move_to_x0/1). + case splitwith(fun({kill,_}) -> true; + (_) -> false + end, Is) of + {Kills,[{move,{x,_}=Tmp,{x,0}}=M2]} -> + %% The two move/2 instructions (M1 and M2) can be combined + %% to one. The question is, though, is it safe to place + %% them after the kill/1 instructions? + case is_killed(Src, Kills, Arity) of + true -> + %% Src (a Y register) is killed by one of the + %% kill/1 instructions. Thus M1 and M2 + %% must be placed before the kill/1 instructions + %% (essentially undoing what opt_move_to_x0/1 + %% did, which turned out to be a pessimization + %% in this case). + opt_call_moves_1([M1,M2|Kills], Arity); + false -> + %% Src is not killed by any of the kill/1 + %% instructions. Thus it is safe to place + %% M1 and M2 after the kill/1 instructions. + opt_call_moves_1(Kills++[M1,M2], Arity) + end; + {_,_} -> + [M1|Is] + end; +opt_call_moves_1([{move,Src,{x,_}=Tmp}=M1,{move,Tmp,Dst}=M2|Is], Arity) -> + case is_killed(Tmp, Is, Arity) of + true -> + %% The X register Tmp is never used again. We can collapse + %% the two move instruction into one. + [{move,Src,Dst}|opt_call_moves_1(Is, Arity)]; + false -> + [M1|opt_call_moves_1([M2|Is], Arity)] + end; +opt_call_moves_1([M|Ms], Arity) -> + [M|opt_call_moves_1(Ms, Arity)]; +opt_call_moves_1([], _Arity) -> []. + +is_killed(Y, [{kill,Y}|_], _) -> + true; +is_killed(R, [{kill,_}|Is], Arity) -> + is_killed(R, Is, Arity); +is_killed(R, [{move,R,_}|_], _) -> + false; +is_killed(R, [{move,_,R}|_], _) -> + true; +is_killed(R, [{move,_,_}|Is], Arity) -> + is_killed(R, Is, Arity); +is_killed({x,X}, [], Arity) -> + X >= Arity; +is_killed({y,_}, [], _) -> + false. + +cg_alloc(#cg_alloc{stack=none,words=#need{h=0,f=0}}, _St) -> + []; +cg_alloc(#cg_alloc{stack=none,words=Need,live=Live}, _St) -> + [{test_heap,alloc(Need),Live}]; +cg_alloc(#cg_alloc{stack=Stk,words=Need,live=Live,def_yregs=DefYregs}, + #cg{regs=Regs}) when is_integer(Stk) -> + Alloc = alloc(Need), + All = [{y,Y} || Y <- lists:seq(0, Stk-1)], + Def = ordsets:from_list([maps:get(V, Regs) || V <- DefYregs]), + NeedInit = ordsets:subtract(All, Def), + NoZero = length(Def)*2 > Stk, + I = case {NoZero,Alloc} of + {true,0} -> {allocate,Stk,Live}; + {true,_} -> {allocate_heap,Stk,Alloc,Live}; + {false,0} -> {allocate_zero,Stk,Live}; + {false,_} -> {allocate_heap_zero,Stk,Alloc,Live} + end, + [I|case NoZero of + true -> [{init,Y} || Y <- NeedInit]; + false -> [] + end]. + +alloc(#need{h=Words,f=0}) -> + Words; +alloc(#need{h=Words,f=Floats}) -> + {alloc,[{words,Words},{floats,Floats}]}. + +is_call([#cg_set{op=call,args=[#b_var{}|Args]}|_]) -> + {yes,1+length(Args)}; +is_call([#cg_set{op=call,args=[_|Args]}|_]) -> + {yes,length(Args)}; +is_call([#cg_set{op=make_fun,args=[_|Args]}|_]) -> + {yes,length(Args)}; +is_call(_) -> + no. + +cg_call(#cg_set{anno=Anno,op=call,dst=Dst0,args=[#b_local{}=Func0|Args0]}, + Where, Context, St0) -> + [Dst|Args] = beam_args([Dst0|Args0], St0), + #b_local{name=Name0,arity=Arity} = Func0, + {atom,Name} = beam_arg(Name0, St0), + {FuncLbl,St} = local_func_label(Name, Arity, St0), + Line = call_line(Where, local, Anno), + Call = build_call(call, Arity, {f,FuncLbl}, Context, Dst), + Is = setup_args(Args, Anno, Context, St) ++ Line ++ Call, + case Anno of + #{ result_type := Info } -> + {Is ++ [{'%', {type_info, Dst, Info}}], St}; + #{} -> + {Is, St} + end; +cg_call(#cg_set{anno=Anno0,op=call,dst=Dst0,args=[#b_remote{}=Func0|Args0]}, + Where, Context, St) -> + [Dst|Args] = beam_args([Dst0|Args0], St), + #b_remote{mod=Mod0,name=Name0,arity=Arity} = Func0, + case {beam_arg(Mod0, St),beam_arg(Name0, St)} of + {{atom,Mod},{atom,Name}} -> + Func = {extfunc,Mod,Name,Arity}, + Line = call_line(Where, Func, Anno0), + Call = build_call(call_ext, Arity, Func, Context, Dst), + Anno = case erl_bifs:is_exit_bif(Mod, Name, Arity) of + true -> + %% There is no need to kill Y registers + %% before calling an exit BIF. + maps:remove(kill_yregs, Anno0); + false -> + Anno0 + end, + Is = setup_args(Args, Anno, Context, St) ++ Line ++ Call, + {Is,St}; + {Mod,Name} -> + Apply = build_apply(Arity, Context, Dst), + Is = setup_args(Args++[Mod,Name], Anno0, Context, St) ++ + [line(Anno0)] ++ Apply, + {Is,St} + end; +cg_call(#cg_set{anno=Anno,op=call,dst=Dst0,args=Args0}, + Where, Context, St) -> + [Dst,Func|Args] = beam_args([Dst0|Args0], St), + Line = call_line(Where, Func, Anno), + Arity = length(Args), + Call = build_call(call_fun, Arity, Func, Context, Dst), + Is = setup_args(Args++[Func], Anno, Context, St) ++ Line ++ Call, + {Is,St}. + +build_call(call_fun, Arity, _Func, none, Dst) -> + [{call_fun,Arity}|copy({x,0}, Dst)]; +build_call(call_fun, Arity, _Func, {return,Dst,N}, Dst) when is_integer(N) -> + [{call_fun,Arity},{deallocate,N},return]; +build_call(call_fun, Arity, _Func, {return,Val,N}, _Dst) when is_integer(N) -> + [{call_fun,Arity},{move,Val,{x,0}},{deallocate,N},return]; +build_call(call_ext, 2, {extfunc,erlang,'!',2}, none, Dst) -> + [send|copy({x,0}, Dst)]; +build_call(call_ext, 2, {extfunc,erlang,'!',2}, {return,Dst,N}, Dst) + when is_integer(N) -> + [send,{deallocate,N},return]; +build_call(Prefix, Arity, Func, {return,Dst,none}, Dst) -> + I = case Prefix of + call -> call_only; + call_ext -> call_ext_only + end, + [{I,Arity,Func}]; +build_call(call_ext, Arity, {extfunc,Mod,Name,Arity}=Func, {return,_,none}, _Dst) -> + true = erl_bifs:is_exit_bif(Mod, Name, Arity), %Assertion. + [{call_ext_only,Arity,Func}]; +build_call(Prefix, Arity, Func, {return,Dst,N}, Dst) when is_integer(N) -> + I = case Prefix of + call -> call_last; + call_ext -> call_ext_last + end, + [{I,Arity,Func,N}]; +build_call(I, Arity, Func, {return,Val,N}, _Dst) when is_integer(N) -> + [{I,Arity,Func}|copy(Val, {x,0})++[{deallocate,N},return]]; +build_call(I, Arity, Func, none, Dst) -> + [{I,Arity,Func}|copy({x,0}, Dst)]. + +build_apply(Arity, {return,Dst,N}, Dst) when is_integer(N) -> + [{apply_last,Arity,N}]; +build_apply(Arity, {return,Val,N}, _Dst) when is_integer(N) -> + [{apply,Arity}|copy(Val, {x,0})++[{deallocate,N},return]]; +build_apply(Arity, none, Dst) -> + [{apply,Arity}|copy({x,0}, Dst)]. + +cg_instr(put_map, [{atom,assoc},SrcMap|Ss], Dst, Set) -> + Live = get_live(Set), + [{put_map_assoc,{f,0},SrcMap,Dst,Live,{list,Ss}}]; +cg_instr(bs_get_tail, [Src], Dst, Set) -> + Live = get_live(Set), + [{bs_get_tail,Src,Dst,Live}]; +cg_instr(bs_get_position, [Ctx], Dst, Set) -> + Live = get_live(Set), + [{bs_get_position,Ctx,Dst,Live}]; +cg_instr(Op, Args, Dst, _Set) -> + cg_instr(Op, Args, Dst). + +cg_instr(bs_init_writable, Args, Dst) -> + setup_args(Args) ++ [bs_init_writable|copy({x,0}, Dst)]; +cg_instr(bs_restore, [Ctx,Slot], _Dst) -> + case Slot of + {integer,N} -> + [{bs_restore2,Ctx,N}]; + {atom,start} -> + [{bs_restore2,Ctx,Slot}] + end; +cg_instr(bs_save, [Ctx,Slot], _Dst) -> + {integer,N} = Slot, + [{bs_save2,Ctx,N}]; +cg_instr(bs_set_position, [Ctx,Pos], _Dst) -> + [{bs_set_position,Ctx,Pos}]; +cg_instr(build_stacktrace, Args, Dst) -> + setup_args(Args) ++ [build_stacktrace|copy({x,0}, Dst)]; +cg_instr(set_tuple_element=Op, [New,Tuple,{integer,Index}], _Dst) -> + [{Op,New,Tuple,Index}]; +cg_instr({float,clearerror}, [], _Dst) -> + [fclearerror]; +cg_instr({float,get}, [Src], Dst) -> + [{fmove,Src,Dst}]; +cg_instr({float,put}, [Src], Dst) -> + [{fmove,Src,Dst}]; +cg_instr(get_hd=Op, [Src], Dst) -> + [{Op,Src,Dst}]; +cg_instr(get_tl=Op, [Src], Dst) -> + [{Op,Src,Dst}]; +cg_instr(get_tuple_element=Op, [Src,{integer,N}], Dst) -> + [{Op,Src,N,Dst}]; +cg_instr(put_list=Op, [Hd,Tl], Dst) -> + [{Op,Hd,Tl,Dst}]; +cg_instr(put_tuple, Elements, Dst) -> + [{put_tuple2,Dst,{list,Elements}}]; +cg_instr(put_tuple_arity, [{integer,Arity}], Dst) -> + [{put_tuple,Arity,Dst}]; +cg_instr(put_tuple_elements, Elements, _Dst) -> + [{put,E} || E <- Elements]; +cg_instr(raw_raise, Args, Dst) -> + setup_args(Args) ++ [raw_raise|copy({x,0}, Dst)]; +cg_instr(remove_message, [], _Dst) -> + [remove_message]; +cg_instr(resume, [A,B], _Dst) -> + [{bif,raise,{f,0},[A,B],{x,0}}]; +cg_instr(timeout, [], _Dst) -> + [timeout]. + +cg_test(bs_add=Op, Fail, [Src1,Src2,{integer,Unit}], Dst, _I) -> + [{Op,Fail,[Src1,Src2,Unit],Dst}]; +cg_test(bs_skip, Fail, Args, _Dst, I) -> + cg_bs_skip(Fail, Args, I); +cg_test(bs_utf8_size=Op, Fail, [Src], Dst, _I) -> + [{Op,Fail,Src,Dst}]; +cg_test(bs_utf16_size=Op, Fail, [Src], Dst, _I) -> + [{Op,Fail,Src,Dst}]; +cg_test({float,convert}, Fail, [Src], Dst, _I) -> + {f,0} = Fail, %Assertion. + [{fconv,Src,Dst}]; +cg_test({float,Op0}, Fail, Args, Dst, #cg_set{anno=Anno}) -> + Op = case Op0 of + '+' -> fadd; + '-' when length(Args) =:= 2 -> fsub; + '-' -> fnegate; + '*' -> fmul; + '/' -> fdiv + end, + [line(Anno),{bif,Op,Fail,Args,Dst}]; +cg_test(get_map_element, Fail, [Map,Key], Dst, _I) -> + [{get_map_elements,Fail,Map,{list,[Key,Dst]}}]; +cg_test(peek_message, Fail, [], Dst, _I) -> + [{loop_rec,Fail,{x,0}}|copy({x,0}, Dst)]; +cg_test(put_map, Fail, [{atom,exact},SrcMap|Ss], Dst, Set) -> + Live = get_live(Set), + [{put_map_exact,Fail,SrcMap,Dst,Live,{list,Ss}}]; +cg_test(wait_timeout, Fail, [Timeout], _Dst, _) -> + case Timeout of + {atom,infinity} -> + [{wait,Fail}]; + _ -> + [{wait_timeout,Fail,Timeout}] + end. + +cg_bs_get(Fail, #cg_set{dst=Dst0,args=[#b_literal{val=Type}|Ss0]}=Set, St) -> + Op = case Type of + integer -> bs_get_integer2; + float -> bs_get_float2; + binary -> bs_get_binary2; + utf8 -> bs_get_utf8; + utf16 -> bs_get_utf16; + utf32 -> bs_get_utf32 + end, + [Dst|Ss1] = beam_args([Dst0|Ss0], St), + Ss = case Ss1 of + [Ctx,{literal,Flags},Size,{integer,Unit}] -> + %% Plain integer/float/binary. + [Ctx,Size,Unit,field_flags(Flags, Set)]; + [Ctx,{literal,Flags}] -> + %% Utf8/16/32. + [Ctx,field_flags(Flags, Set)] + end, + Live = get_live(Set), + [{test,Op,Fail,Live,Ss,Dst}]. + +cg_bs_skip(Fail, [{atom,Type}|Ss0], Set) -> + Op = case Type of + utf8 -> bs_skip_utf8; + utf16 -> bs_skip_utf16; + utf32 -> bs_skip_utf32; + _ -> bs_skip_bits2 + end, + Live = get_live(Set), + Ss = case Ss0 of + [Ctx,{literal,Flags},Size,{integer,Unit}] -> + %% Plain integer/float/binary. + [Ctx,Size,Unit,field_flags(Flags, Set)]; + [Ctx,{literal,Flags}] -> + %% Utf8/16/32. + [Ctx,Live,field_flags(Flags, Set)] + end, + case {Type,Ss} of + {binary,[_,{atom,all},1,_]} -> + []; + {binary,[R,{atom,all},U,_]} -> + [{test,bs_test_unit,Fail,[R,U]}]; + {_,_} -> + [{test,Op,Fail,Ss}] + end. + +field_flags(Flags, #cg_set{anno=#{location:={File,Line}}}) -> + {field_flags,[{anno,[Line,{file,File}]}|Flags]}; +field_flags(Flags, _) -> + {field_flags,Flags}. + +cg_bs_put(Fail, [{atom,Type},{literal,Flags}|Args]) -> + Op = case Type of + integer -> bs_put_integer; + float -> bs_put_float; + binary -> bs_put_binary; + utf8 -> bs_put_utf8; + utf16 -> bs_put_utf16; + utf32 -> bs_put_utf32 + end, + case Args of + [Src,Size,{integer,Unit}] -> + [{Op,Fail,Size,Unit,{field_flags,Flags},Src}]; + [Src] -> + [{Op,Fail,{field_flags,Flags},Src}] + end. + +cg_bs_init(Dst, Size0, Alloc, Unit, Live, Fail) -> + Op = case Unit of + 1 -> bs_init_bits; + 8 -> bs_init2 + end, + Size = cg_bs_init_size(Size0), + [{Op,Fail,Size,Alloc,Live,{field_flags,[]},Dst}]. + +cg_bs_init_size({x,_}=R) -> R; +cg_bs_init_size({y,_}=R) -> R; +cg_bs_init_size({integer,Int}) -> Int. + +cg_catch(Agg, T0, Context, St0) -> + {Moves,T1} = cg_extract(T0, Agg, St0), + {T,St} = cg_block(T1, Context, St0), + {Moves++T,St}. + +cg_try(Agg, Tag, T0, Context, St0) -> + {Moves0,T1} = cg_extract(T0, Agg, St0), + Moves = order_moves(Moves0, {x,3}), + [#cg_set{op=kill_try_tag}|T2] = T1, + {T,St} = cg_block(T2, Context, St0), + {[{try_case,Tag}|Moves++T],St}. + +cg_extract([#cg_set{op=extract,dst=Dst0,args=Args0}|Is0], Agg, St) -> + [Dst,Agg,{integer,X}] = beam_args([Dst0|Args0], St), + {Ds,Is} = cg_extract(Is0, Agg, St), + case keymember(Dst, 3, Ds) of + true -> + %% This destination will be overwritten. + {Ds,Is}; + false -> + {copy({x,X}, Dst)++Ds,Is} + end; +cg_extract(Is, _, _) -> + {[],Is}. + +copy(Src, Src) -> []; +copy(Src, Dst) -> [{move,Src,Dst}]. + +force_reg({literal,_}=Lit, Reg) -> + {Reg,[{move,Lit,Reg}]}; +force_reg({integer,_}=Lit, Reg) -> + {Reg,[{move,Lit,Reg}]}; +force_reg({atom,_}=Lit, Reg) -> + {Reg,[{move,Lit,Reg}]}; +force_reg({float,_}=Lit, Reg) -> + {Reg,[{move,Lit,Reg}]}; +force_reg(nil=Lit, Reg) -> + {Reg,[{move,Lit,Reg}]}; +force_reg({Kind,_}=R, _) when Kind =:= x; Kind =:= y -> + {R,[]}. + +%% successors(Terminator) -> [Successor]. +%% Return an ordset of all successors for the given terminator. + +successors(#cg_br{succ=Succ,fail=Fail}) -> + ordsets:from_list([Succ,Fail]); +successors(#cg_switch{fail=Fail,list=List}) -> + ordsets:from_list([Fail|[Lbl || {_,Lbl} <- List]]); +successors(#cg_ret{}) -> []. + +%% linearize(Blocks) -> [{BlockLabel,#cg_blk{}}]. +%% Linearize the intermediate representation of the code. Also +%% translate blocks from the SSA records to internal record types +%% used only in this module. + +linearize(Blocks) -> + Linear = beam_ssa:linearize(Blocks), + linearize_1(Linear, Blocks). + +linearize_1([{?BADARG_BLOCK,_}|Ls], Blocks) -> + linearize_1(Ls, Blocks); +linearize_1([{L,Block0}|Ls], Blocks) -> + Block = translate_block(L, Block0, Blocks), + [{L,Block}|linearize_1(Ls, Blocks)]; +linearize_1([], _Blocks) -> []. + +%% translate_block(BlockLabel, #b_blk{}, Blocks) -> #cg_blk{}. +%% Translate a block to the internal records used in this module. +%% Also eliminate phi nodes, replacing them with 'copy' instructions +%% in the predecessor blocks. + +translate_block(L, #b_blk{anno=Anno,is=Is0,last=Last0}, Blocks) -> + Last = translate_terminator(Last0), + PhiCopies = translate_phis(L, Last, Blocks), + Is1 = translate_is(Is0, PhiCopies), + Is = case Anno of + #{frame_size:=Size} -> + Alloc = #cg_alloc{stack=Size}, + [Alloc|Is1]; + #{} -> Is1 + end, + #cg_blk{anno=Anno,is=Is,last=Last}. + +translate_is([#b_set{op=phi}|Is], Tail) -> + translate_is(Is, Tail); +translate_is([#b_set{anno=Anno0,op=Op,dst=Dst,args=Args}=I|Is], Tail) -> + Anno = case beam_ssa:clobbers_xregs(I) of + true -> Anno0#{clobbers=>true}; + false -> Anno0 + end, + [#cg_set{anno=Anno,op=Op,dst=Dst,args=Args}|translate_is(Is, Tail)]; +translate_is([], Tail) -> Tail. + +translate_terminator(#b_ret{anno=Anno,arg=Arg}) -> + Dealloc = case Anno of + #{deallocate:=N} -> N; + #{} -> none + end, + #cg_ret{arg=Arg,dealloc=Dealloc}; +translate_terminator(#b_br{bool=#b_literal{val=true},succ=Succ}) -> + #cg_br{bool=#b_literal{val=true},succ=Succ,fail=Succ}; +translate_terminator(#b_br{bool=#b_literal{val=false},fail=Fail}) -> + #cg_br{bool=#b_literal{val=true},succ=Fail,fail=Fail}; +translate_terminator(#b_br{bool=Bool,succ=Succ,fail=Fail}) -> + #cg_br{bool=Bool,succ=Succ,fail=Fail}; +translate_terminator(#b_switch{arg=Bool,fail=Fail,list=List}) -> + #cg_switch{arg=Bool,fail=Fail,list=List}. + +translate_phis(L, #cg_br{succ=Target,fail=Target}, Blocks) -> + #b_blk{is=Is} = maps:get(Target, Blocks), + Phis = takewhile(fun(#b_set{op=phi}) -> true; + (#b_set{}) -> false + end, Is), + phi_copies(Phis, L); +translate_phis(_, _, _) -> []. + +phi_copies([#b_set{dst=Dst,args=PhiArgs}|Sets], L) -> + CopyArgs = [V || {V,Target} <- PhiArgs, Target =:= L], + [#cg_set{op=copy,dst=Dst,args=CopyArgs}|phi_copies(Sets, L)]; +phi_copies([], _) -> []. + +%% opt_move_to_x0([Instruction]) -> [Instruction]. +%% Simple peep-hole optimization to move a {move,Any,{x,0}} past +%% any kill up to the next call instruction. (To give the loader +%% an opportunity to combine the 'move' and the 'call' instructions.) + +opt_move_to_x0(Moves) -> + opt_move_to_x0(Moves, []). + +opt_move_to_x0([{move,_,{x,0}}=I|Is0], Acc0) -> + case move_past_kill(Is0, I, Acc0) of + impossible -> opt_move_to_x0(Is0, [I|Acc0]); + {Is,Acc} -> opt_move_to_x0(Is, Acc) + end; +opt_move_to_x0([I|Is], Acc) -> + opt_move_to_x0(Is, [I|Acc]); +opt_move_to_x0([], Acc) -> reverse(Acc). + +move_past_kill([{kill,Src}|_], {move,Src,_}, _) -> + impossible; +move_past_kill([{kill,_}=I|Is], Move, Acc) -> + move_past_kill(Is, Move, [I|Acc]); +move_past_kill(Is, Move, Acc) -> + {Is,[Move|Acc]}. + +%% setup_args(Args, Anno, Context) -> [Instruction]. +%% setup_args(Args) -> [Instruction]. +%% Set up X registers for a call. + +setup_args(Args, Anno, none, St) -> + case {setup_args(Args),kill_yregs(Anno, St)} of + {Moves,[]} -> + Moves; + {Moves,Kills} -> + opt_move_to_x0(Moves ++ Kills) + end; +setup_args(Args, _, _, _) -> + setup_args(Args). + +setup_args([]) -> + []; +setup_args([_|_]=Args) -> + Moves = gen_moves(Args, 0, []), + Scratch = {x,1+last(sort([length(Args)-1|[X || {x,X} <- Args]]))}, + order_moves(Moves, Scratch). + +%% kill_yregs(Anno, #cg{}) -> [{kill,{y,Y}}]. +%% Kill Y registers that will not be used again. + +kill_yregs(#{kill_yregs:=Kill}, #cg{regs=Regs}) -> + ordsets:from_list([{kill,maps:get(V, Regs)} || V <- Kill]); +kill_yregs(#{}, #cg{}) -> []. + +%% gen_moves(As, I, Acc) +%% Generate the basic move instruction to move the arguments +%% to their proper registers. The list will be sorted on +%% destinations. (I.e. the move to {x,0} will be first -- +%% see the comment to order_moves/2.) + +gen_moves([A|As], I, Acc) -> + gen_moves(As, I+1, copy(A, {x,I}) ++ Acc); +gen_moves([], _, Acc) -> + keysort(3, Acc). + +%% order_moves([Move], ScratchReg) -> [Move] +%% Orders move instruction so that source registers are not +%% destroyed before they are used. If there are cycles +%% (such as {move,{x,0},{x,1}}, {move,{x,1},{x,1}}), +%% the scratch register is used to break up the cycle. +%% If possible, the first move of the input list is placed +%% last in the result list (to make the move to {x,0} occur +%% just before the call to allow the Beam loader to coalesce +%% the instructions). + +order_moves(Ms, Scr) -> order_moves(Ms, Scr, []). + +order_moves([{move,_,_}=M|Ms0], ScrReg, Acc0) -> + {Chain,Ms} = collect_chain(Ms0, [M], ScrReg), + Acc = reverse(Chain, Acc0), + order_moves(Ms, ScrReg, Acc); +order_moves([], _, Acc) -> Acc. + +collect_chain(Ms, Path, ScrReg) -> + collect_chain(Ms, Path, [], ScrReg). + +collect_chain([{move,Src,Same}=M|Ms0], [{move,Same,_}|_]=Path, Others, ScrReg) -> + case keymember(Src, 3, Path) of + false -> + collect_chain(reverse(Others, Ms0), [M|Path], [], ScrReg); + true -> + %% There is a cycle, which we must break up. + {break_up_cycle(M, Path, ScrReg),reverse(Others, Ms0)} + end; +collect_chain([M|Ms], Path, Others, ScrReg) -> + collect_chain(Ms, Path, [M|Others], ScrReg); +collect_chain([], Path, Others, _) -> + {Path,Others}. + +break_up_cycle({move,Src,_}=M, Path, ScrReg) -> + [{move,ScrReg,Src},M|break_up_cycle1(Src, Path, ScrReg)]. + +break_up_cycle1(Dst, [{move,Src,Dst}|Path], ScrReg) -> + [{move,Src,ScrReg}|Path]; +break_up_cycle1(Dst, [M|Path], LastMove) -> + [M|break_up_cycle1(Dst, Path, LastMove)]. + +%%% +%%% General utility functions. +%%% + +verify_beam_register({x,_}=Reg) -> Reg. + +is_beam_register({x,_}) -> true; +is_beam_register(_) -> false. + +get_register(V, Regs) -> + case is_beam_register(V) of + true -> V; + false -> maps:get(V, Regs) + end. + +beam_args(As, St) -> + [beam_arg(A, St) || A <- As]. + +beam_arg(#b_var{}=Name, #cg{regs=Regs}) -> + maps:get(Name, Regs); +beam_arg(#b_literal{val=Val}, _) -> + if + is_atom(Val) -> {atom,Val}; + is_float(Val) -> {float,Val}; + is_integer(Val) -> {integer,Val}; + Val =:= [] -> nil; + true -> {literal,Val} + end; +beam_arg(Reg, _) -> + verify_beam_register(Reg). + +new_block_label(L, St0) -> + {_Lbl,St} = label_for_block(L, St0), + St. + +def_block_label(L, #cg{labels=Labels,used_labels=Used}) -> + Lbl = maps:get(L, Labels), + case gb_sets:is_member(Lbl, Used) of + false -> []; + true -> [{label,Lbl}] + end. + +use_block_labels(Ls, St) -> + mapfoldl(fun use_block_label/2, St, Ls). + +use_block_label(L, #cg{used_labels=Used,catches=Catches}=St0) -> + {Lbl,St} = label_for_block(L, St0), + case gb_sets:is_member(L, Catches) of + true -> + {{catch_tag,{f,Lbl}}, + St#cg{used_labels=gb_sets:add(Lbl, Used)}}; + false -> + {{f,Lbl},St#cg{used_labels=gb_sets:add(Lbl, Used)}} + end. + +label_for_block(L, #cg{labels=Labels0}=St0) -> + case Labels0 of + #{L:=Lbl} -> + {Lbl,St0}; + #{} -> + {Lbl,St} = new_label(St0), + Labels = Labels0#{L=>Lbl}, + {Lbl,St#cg{labels=Labels}} + end. + +%% local_func_label(Name, Arity, State) -> {Label,State'} +%% local_func_label({Name,Arity}, State) -> {Label,State'} +%% Get the function entry label for a local function. + +local_func_label(Name, Arity, St) -> + local_func_label({Name,Arity}, St). + +local_func_label(Key, #cg{functable=Map}=St0) -> + case Map of + #{Key := Label} -> + {Label,St0}; + _ -> + {Label,St} = new_label(St0), + {Label,St#cg{functable=Map#{Key => Label}}} + end. + +%% is_gc_bif(Name, Args) -> true|false. +%% Determines whether the BIF Name/Arity might do a GC. + +-spec is_gc_bif(atom(), [beam_ssa:value()]) -> boolean(). + +is_gc_bif(hd, [_]) -> false; +is_gc_bif(tl, [_]) -> false; +is_gc_bif(self, []) -> false; +is_gc_bif(node, []) -> false; +is_gc_bif(node, [_]) -> false; +is_gc_bif(element, [_,_]) -> false; +is_gc_bif(get, [_]) -> false; +is_gc_bif(is_map_key, [_,_]) -> false; +is_gc_bif(map_get, [_,_]) -> false; +is_gc_bif(tuple_size, [_]) -> false; +is_gc_bif(Bif, Args) -> + Arity = length(Args), + not (erl_internal:bool_op(Bif, Arity) orelse + erl_internal:new_type_test(Bif, Arity) orelse + erl_internal:comp_op(Bif, Arity)). + +%% new_label(St) -> {L,St}. + +new_label(#cg{lcount=Next}=St) -> + {Next,St#cg{lcount=Next+1}}. + +%% call_line(tail|body, Func, Anno) -> [] | [{line,...}]. +%% Produce a line instruction if it will be needed by the +%% call to Func. + +call_line(_Context, {extfunc,Mod,Name,Arity}, Anno) -> + case erl_bifs:is_safe(Mod, Name, Arity) of + false -> + %% The call could be to a BIF. + %% We'll need a line instruction in case the + %% BIF call fails. + [line(Anno)]; + true -> + %% Call to a safe BIF. Since it cannot fail, + %% we don't need any line instruction here. + [] + end; +call_line(body, _, Anno) -> + [line(Anno)]; +call_line(tail, local, _) -> + %% Tail-recursive call to a local function. A line + %% instruction will not be useful. + []; +call_line(tail, _, Anno) -> + %% Call to a fun. + [line(Anno)]. + +%% line(Le) -> {line,[] | {location,File,Line}} +%% Create a line instruction, containing information about +%% the current filename and line number. A line information +%% instruction should be placed before any operation that could +%% cause an exception. + +line(#{location:={File,Line}}) -> + {line,[{location,File,Line}]}; +line(#{}) -> + {line,[]}. + +flatmapfoldl(F, Accu0, [Hd|Tail]) -> + {R,Accu1} = F(Hd, Accu0), + {Rs,Accu2} = flatmapfoldl(F, Accu1, Tail), + {R++Rs,Accu2}; +flatmapfoldl(_, Accu, []) -> {[],Accu}. |