diff options
author | Björn Gustavsson <[email protected]> | 2017-12-18 13:24:59 +0100 |
---|---|---|
committer | GitHub <[email protected]> | 2017-12-18 13:24:59 +0100 |
commit | 41f397d27f574c50dd917942ed2b481f2ff3bae7 (patch) | |
tree | f24932a1e106405412cec2eaf5820f1d0476955e /lib/compiler/src | |
parent | 6266f0a257f13b71287e92c54903e22c2f92ba96 (diff) | |
parent | b89044a800c4950072b14d5e372cb2db55f1966c (diff) | |
download | otp-41f397d27f574c50dd917942ed2b481f2ff3bae7.tar.gz otp-41f397d27f574c50dd917942ed2b481f2ff3bae7.tar.bz2 otp-41f397d27f574c50dd917942ed2b481f2ff3bae7.zip |
Merge pull request #1658 from bjorng/bjorn/compiler/delay-stackframe
Delay creation of stack frames
Diffstat (limited to 'lib/compiler/src')
-rw-r--r-- | lib/compiler/src/beam_block.erl | 89 | ||||
-rw-r--r-- | lib/compiler/src/beam_record.erl | 126 | ||||
-rw-r--r-- | lib/compiler/src/beam_utils.erl | 268 | ||||
-rw-r--r-- | lib/compiler/src/v3_codegen.erl | 94 |
4 files changed, 457 insertions, 120 deletions
diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl index 79c4b651db..fe1ce6f60b 100644 --- a/lib/compiler/src/beam_block.erl +++ b/lib/compiler/src/beam_block.erl @@ -37,13 +37,14 @@ function({function,Name,Arity,CLabel,Is0}) -> %% Collect basic blocks and optimize them. Is1 = blockify(Is0), Is2 = embed_lines(Is1), - Is3 = move_allocates(Is2), - Is4 = beam_utils:live_opt(Is3), - Is5 = opt_blocks(Is4), - Is6 = beam_utils:delete_live_annos(Is5), - - %% Done. - {function,Name,Arity,CLabel,Is6} + Is3 = beam_utils:anno_defs(Is2), + Is4 = move_allocates(Is3), + Is5 = beam_utils:live_opt(Is4), + Is6 = opt_blocks(Is5), + Is = beam_utils:delete_live_annos(Is6), + + %% Done. + {function,Name,Arity,CLabel,Is} catch Class:Error:Stack -> io:fwrite("Function: ~w/~w\n", [Name,Arity]), @@ -172,7 +173,7 @@ find_fixpoint(OptFun, Is0) -> %% safe to assume that if x(N) is initialized, then all lower-numbered %% x registers are also initialized. %% -%% For example, in general it is not safe to transform the following +%% For example, we must be careful when transforming the following %% instructions: %% %% get_tuple_element x(0) Element => x(1) @@ -184,13 +185,9 @@ find_fixpoint(OptFun, Is0) -> %% get_tuple_element x(0) Element => x(1) %% %% The transformation is safe if and only if x(1) has been -%% initialized previously. Unfortunately, beam_reorder may have moved -%% a get_tuple_element instruction so that x(1) is not always -%% initialized when this code is reached. To find whether or not x(1) -%% is initialized, we would need to analyze all code preceding these -%% two instructions (across branches). Since we currently don't have -%% any practical mechanism for doing that, we will have to -%% conservatively assume that the transformation is unsafe. +%% initialized previously. We will use the annotations added by +%% beam_utils:anno_defs/1 to determine whether x(a) has been +%% initialized. move_allocates([{block,Bl0}|Is]) -> Bl = move_allocates_1(reverse(Bl0), []), @@ -199,15 +196,20 @@ move_allocates([I|Is]) -> [I|move_allocates(Is)]; move_allocates([]) -> []. +move_allocates_1([{'%def',_}|Is], Acc) -> + move_allocates_1(Is, Acc); move_allocates_1([I|Is], [{set,[],[],{alloc,Live0,Info}}|Acc]=Acc0) -> - case {alloc_may_pass(I),alloc_live_regs(I, Live0)} of - {false,_} -> - move_allocates_1(Is, [I|Acc0]); - {true,not_possible} -> - move_allocates_1(Is, [I|Acc0]); - {true,Live} when is_integer(Live) -> - A = {set,[],[],{alloc,Live,Info}}, - move_allocates_1(Is, [A,I|Acc]) + case alloc_may_pass(I) of + false -> + move_allocates_1(Is, [I|Acc0]); + true -> + case alloc_live_regs(I, Is, Live0) of + not_possible -> + move_allocates_1(Is, [I|Acc0]); + Live when is_integer(Live) -> + A = {set,[],[],{alloc,Live,Info}}, + move_allocates_1(Is, [A,I|Acc]) + end end; move_allocates_1([I|Is], Acc) -> move_allocates_1(Is, [I|Acc]); @@ -471,16 +473,34 @@ count_ones(Bits, Acc) -> %% Calculate the new number of live registers when we move an allocate %% instruction upwards, passing a 'set' instruction. -alloc_live_regs({set,Ds,Ss,_}, Regs0) -> +alloc_live_regs({set,Ds,Ss,_}, Is, Regs0) -> Rset = x_live(Ss, x_dead(Ds, (1 bsl Regs0)-1)), - live_regs(0, Rset). + Live = live_regs(0, Rset), + case ensure_contiguous(Rset, Live) of + not_possible -> + %% Liveness information (looking forward in the + %% instruction stream) can't prove that moving this + %% allocation instruction is safe. Now use the annotation + %% of defined registers at the beginning of the current + %% block to see whether moving would be safe. + Def0 = defined_regs(Is, 0), + Def = Def0 band ((1 bsl Live) - 1), + ensure_contiguous(Rset bor Def, Live); + Live -> + %% Safe based on liveness information. + Live + end. live_regs(N, 0) -> N; -live_regs(N, Regs) when Regs band 1 =:= 1 -> - live_regs(N+1, Regs bsr 1); -live_regs(_, _) -> - not_possible. +live_regs(N, Regs) -> + live_regs(N+1, Regs bsr 1). + +ensure_contiguous(Regs, Live) -> + case (1 bsl Live) - 1 of + Regs -> Live; + _ -> not_possible + end. x_dead([{x,N}|Rs], Regs) -> x_dead(Rs, Regs band (bnot (1 bsl N))); x_dead([_|Rs], Regs) -> x_dead(Rs, Regs); @@ -489,3 +509,14 @@ x_dead([], Regs) -> Regs. x_live([{x,N}|Rs], Regs) -> x_live(Rs, Regs bor (1 bsl N)); x_live([_|Rs], Regs) -> x_live(Rs, Regs); x_live([], Regs) -> Regs. + +%% defined_regs(ReversedInstructions) -> RegBitmap. +%% Given a reversed instruction stream, determine the +%% the registers that are defined. + +defined_regs([{'%def',Def}|_], Regs) -> + Def bor Regs; +defined_regs([{set,Ds,_,{alloc,Live,_}}|_], Regs) -> + x_live(Ds, Regs bor ((1 bsl Live) - 1)); +defined_regs([{set,Ds,_,_}|Is], Regs) -> + defined_regs(Is, x_live(Ds, Regs)). diff --git a/lib/compiler/src/beam_record.erl b/lib/compiler/src/beam_record.erl index e4775ca6a4..db1053e48c 100644 --- a/lib/compiler/src/beam_record.erl +++ b/lib/compiler/src/beam_record.erl @@ -15,19 +15,12 @@ %% %% %CopyrightEnd% %% -%% File: beam_record.erl -%% Author: Björn-Egil Dahlberg -%% Created: 2014-09-03 -%% - --module(beam_record). --export([module/2]). %% Rewrite the instruction stream on tagged tuple tests. -%% Tagged tuples means a tuple of any arity with an atom as its first element. -%% Typically records, ok-tuples and error-tuples. -%% -%% from: +%% Tagged tuples means a tuple of any arity with an atom as its +%% first element, such as records and error tuples. +%% +%% From: %% ... %% {test,is_tuple,Fail,[Src]}. %% {test,test_arity,Fail,[Src,Sz]}. @@ -36,13 +29,16 @@ %% ... %% {test,is_eq_exact,Fail,[Dst,Atom]}. %% ... -%% to: +%% To: %% ... %% {test,is_tagged_tuple,Fail,[Src,Sz,Atom]}. %% ... +%% +-module(beam_record). +-export([module/2]). --import(lists, [reverse/1]). +-import(lists, [reverse/1,reverse/2]). -spec module(beam_utils:module_code(), [compile:option()]) -> {'ok',beam_utils:module_code()}. @@ -51,55 +47,85 @@ module({Mod,Exp,Attr,Fs0,Lc}, _Opt) -> Fs = [function(F) || F <- Fs0], {ok,{Mod,Exp,Attr,Fs,Lc}}. -function({function,Name,Arity,CLabel,Is}) -> +function({function,Name,Arity,CLabel,Is0}) -> try - Idx = beam_utils:index_labels(Is), - {function,Name,Arity,CLabel,rewrite(Is,Idx)} + Is1 = beam_utils:anno_defs(Is0), + Idx = beam_utils:index_labels(Is1), + Is = rewrite(reverse(Is1), Idx), + {function,Name,Arity,CLabel,Is} catch Class:Error:Stack -> io:fwrite("Function: ~w/~w\n", [Name,Arity]), erlang:raise(Class, Error, Stack) end. -rewrite(Is,Idx) -> - rewrite(Is,Idx,[]). +rewrite(Is, Idx) -> + rewrite(Is, Idx, 0, []). -rewrite([{test,is_tuple,Fail,[Src]}=I1, - {test,test_arity,Fail,[Src,N]}=I2|Is],Idx,Acc) -> - case is_tagged_tuple(Is,Fail,Src,Idx) of +rewrite([{test,test_arity,Fail,[Src,N]}=TA, + {test,is_tuple,Fail,[Src]}=TT|Is], Idx, Def, Acc0) -> + case is_tagged_tuple(Acc0, Def, Fail, Src, Idx) of no -> - rewrite(Is,Idx,[I2,I1|Acc]); - {Atom,[{block,[]}|Is1]} -> - rewrite(Is1,Idx,[{test,is_tagged_tuple,Fail,[Src,N,Atom]}|Acc]); - {Atom,Is1} -> - rewrite(Is1,Idx,[{test,is_tagged_tuple,Fail,[Src,N,Atom]}|Acc]) + rewrite(Is, Idx, 0, [TT,TA|Acc0]); + {yes,Atom,Acc} -> + I = {test,is_tagged_tuple,Fail,[Src,N,Atom]}, + rewrite(Is, Idx, Def, [I|Acc]) end; -rewrite([I|Is],Idx,Acc) -> - rewrite(Is,Idx,[I|Acc]); -rewrite([],_,Acc) -> reverse(Acc). - -is_tagged_tuple([{block,[{set,[Dst],[Src],{get_tuple_element,0}}=B|Bs]}, - {test,is_eq_exact,Fail,[Dst,{atom,_}=Atom]}|Is],Fail,Src,Idx) -> +rewrite([{block,[{'%def',Def}|Bl]}|Is], Idx, _Def, Acc) -> + rewrite(Is, Idx, Def, [{block,Bl}|Acc]); +rewrite([{label,L}=I|Is], Idx0, Def, Acc) -> + Idx = beam_utils:index_label(L, Acc, Idx0), + rewrite(Is, Idx, Def, [I|Acc]); +rewrite([I|Is], Idx, Def, Acc) -> + rewrite(Is, Idx, Def, [I|Acc]); +rewrite([], _, _, Acc) -> Acc. - %% if Dst is killed in the instruction stream and at fail label, - %% we can safely remove get_tuple_element. - %% - %% if Dst is not killed in the stream, we cannot remove get_tuple_element - %% since it is referenced. - - case is_killed(Dst,Is,Fail,Idx) of - true -> {Atom,[{block,Bs}|Is]}; - false -> {Atom,[{block,[B|Bs]}|Is]} +is_tagged_tuple([{block,Bl}, + {test,is_eq_exact,Fail,[Dst,{atom,_}=Atom]}|Is], + Def, Fail, Src, Idx) -> + case is_tagged_tuple_1(Bl, Is, Fail, Src, Dst, Idx, Def, []) of + no -> + no; + {yes,[]} -> + {yes,Atom,Is}; + {yes,[_|_]=Block} -> + {yes,Atom,[{block,Block}|Is]} end; -is_tagged_tuple([{block,[{set,_,_,_}=B|Bs]}, - {test,is_eq_exact,_,_}=I|Is],Fail,Src,Idx) -> - case is_tagged_tuple([{block,Bs},I|Is],Fail,Src,Idx) of - {Atom,[{block,Bsr}|Isr]} -> {Atom,[{block,[B|Bsr]}|Isr]}; - no -> no +is_tagged_tuple(_, _, _, _, _) -> + no. + +is_tagged_tuple_1([{set,[Dst],[Src],{get_tuple_element,0}}=I|Bl], + Is, Fail, Src, Dst, Idx, Def, Acc) -> + %% Check usage of Dst to find out whether the get_tuple_element + %% is needed. + case usage(Dst, Is, Fail, Idx) of + killed -> + %% Safe to remove the get_tuple_element instruction. + {yes,reverse(Acc, Bl)}; + used -> + %% Actively used. Must keep instruction. + {yes,reverse(Acc, [I|Bl])}; + not_used -> + %% Not actually used (but must be initialized). + case is_defined(Dst, Def) of + false -> + %% Dst must be initialized, but the + %% actual value does not matter. + Kill = {set,[Dst],[nil],move}, + {yes,reverse(Acc, [Kill|Bl])}; + true -> + %% The register is previously initialized. + %% We can remove the instruction. + {yes,reverse(Acc, Bl)} + end end; -is_tagged_tuple(_Is,_Fail,_Src,_Idx) -> +is_tagged_tuple_1([I|Bl], Is, Fail, Src, Dst, Idx, Def, Acc) -> + is_tagged_tuple_1(Bl, Is, Fail, Src, Dst, Idx, Def, [I|Acc]); +is_tagged_tuple_1(_, _, _, _, _, _, _, _) -> no. -is_killed(Dst,Is,{_,Lbl},Idx) -> - beam_utils:is_killed(Dst,Is,Idx) andalso - beam_utils:is_killed_at(Dst,Lbl,Idx). +usage(Dst, Is, Fail, Idx) -> + beam_utils:usage(Dst, [{test,is_number,Fail,[nil]}|Is], Idx). + +is_defined({x,X}, Def) -> + (Def bsr X) band 1 =:= 1. diff --git a/lib/compiler/src/beam_utils.erl b/lib/compiler/src/beam_utils.erl index 941ae95110..e61d6a43b4 100644 --- a/lib/compiler/src/beam_utils.erl +++ b/lib/compiler/src/beam_utils.erl @@ -22,11 +22,13 @@ -module(beam_utils). -export([is_killed_block/2,is_killed/3,is_killed_at/3, - is_not_used/3, + is_not_used/3,usage/3, empty_label_index/0,index_label/3,index_labels/1,replace_labels/4, code_at/2,bif_to_test/3,is_pure_test/1, live_opt/1,delete_live_annos/1,combine_heap_needs/2, - split_even/1]). + anno_defs/1, + split_even/1 + ]). -export_type([code_index/0,module_code/0,instruction/0]). @@ -60,6 +62,23 @@ {lbl :: code_index(), %Label to code index. res :: result_cache()}). %Result cache for each label. +%% usage(Register, [Instruction], State) -> killed|not_used|used. +%% Determine the usage of Register in the instruction sequence. +%% The return value is one of: +%% +%% killed - The register is not used in any way. +%% not_used - The register is referenced only by an allocating instruction +%% (the actual value does not matter). +%% used - The register is used (its value do matter). + +-spec usage(beam_asm:reg(), [instruction()], code_index()) -> + 'killed' | 'not_used' | 'used'. + +usage(R, Is, D) -> + St = #live{lbl=D,res=gb_trees:empty()}, + {Usage,_} = check_liveness(R, Is, St), + Usage. + %% is_killed_block(Register, [Instruction]) -> true|false %% Determine whether a register is killed by the instruction sequence inside @@ -293,6 +312,23 @@ combine_heap_needs(H1, H2) -> combine_alloc_lists([H1,H2]). +%% anno_defs(Instructions) -> Instructions' +%% Add {'%def',RegisterBitmap} annotations to the beginning of +%% each block. Iff bit X is set in the the bitmap, it means +%% that {x,X} is defined when the block is entered. + +-spec anno_defs([instruction()]) -> [instruction()]. + +anno_defs(Is0) -> + {Bef,[Fi|Is1]} = + splitwith(fun({func_info,_,_,_}) -> false; + (_) -> true + end, Is0), + {func_info,_,_,Arity} = Fi, + Regs = init_def_regs(Arity), + Is = defs(Is1, Regs, #{}), + Bef ++ [Fi|Is]. + %% split_even/1 %% [1,2,3,4,5,6] -> {[1,3,5],[2,4,6]} @@ -300,7 +336,6 @@ combine_heap_needs(H1, H2) -> split_even(Rs) -> split_even(Rs, [], []). - %%% %%% Local functions. %%% @@ -318,6 +353,10 @@ check_liveness(R, [{block,Blk}|Is], St0) -> case check_liveness_block(R, Blk, St0) of {transparent,St1} -> check_liveness(R, Is, St1); + {alloc_used,St1} -> + %% Used by an allocating instruction, but value not referenced. + %% Must check the rest of the instructions. + not_used(check_liveness(R, Is, St1)); {Other,_}=Res when is_atom(Other) -> Res end; @@ -376,9 +415,16 @@ check_liveness(R, [{bs_init,_,_,none,Ss,Dst}|Is], St) -> check_liveness(R, [{bs_init,_,_,Live,Ss,Dst}|Is], St) -> case R of {x,X} -> - case X < Live orelse member(R, Ss) of - true -> {used,St}; - false -> {killed,St} + case member(R, Ss) of + true -> + {used,St}; + false -> + if + X < Live -> + not_used(check_liveness(R, Is, St)); + true -> + {killed,St} + end end; {y,_} -> case member(R, Ss) of @@ -588,7 +634,7 @@ check_liveness_ret(R, R, St) -> {used,St}; check_liveness_ret(_, _, St) -> {killed,St}. %% check_liveness_block(Reg, [Instruction], State) -> -%% {killed | not_used | used | transparent,State'} +%% {killed | not_used | used | alloc_used | transparent,State'} %% Finds out how Reg is used in the instruction sequence inside a block. %% Returns one of: %% killed - Reg is assigned a new value or killed by an @@ -596,6 +642,7 @@ check_liveness_ret(_, _, St) -> {killed,St}. %% not_used - The value is not used, but the register is referenced %% e.g. by an allocation instruction %% transparent - Reg is neither used nor killed +%% alloc_used - Used only in an allocate instruction %% used - Reg is explicitly used by an instruction %% %% '%live' annotations are not allowed. @@ -609,7 +656,7 @@ check_liveness_block({x,X}=R, [{set,Ds,Ss,{alloc,Live,Op}}|Is], St0) -> true -> case check_liveness_block_1(R, Ss, Ds, Op, Is, St0) of {killed,St} -> {not_used,St}; - {transparent,St} -> {not_used,St}; + {transparent,St} -> {alloc_used,St}; {_,_}=Res -> Res end end; @@ -949,3 +996,208 @@ split_even([], Ss, Ds) -> {reverse(Ss),reverse(Ds)}; split_even([S,D|Rs], Ss, Ds) -> split_even(Rs, [S|Ss], [D|Ds]). + +%%% +%%% Add annotations for defined registers. +%%% +%%% This analysis is done by scanning the instructions in +%%% execution order. +%%% + +defs([{apply,_}=I|Is], _Regs, D) -> + [I|defs(Is, 1, D)]; +defs([{bif,_,{f,Fail},_Src,Dst}=I|Is], Regs0, D) -> + Regs = def_regs([Dst], Regs0), + [I|defs(Is, Regs, update_regs(Fail, Regs0, D))]; +defs([{block,Block0}|Is], Regs0, D0) -> + {Block,Regs,D} = defs_list(Block0, Regs0, D0), + [{block,[{'%def',Regs0}|Block]}|defs(Is, Regs, D)]; +defs([{bs_init,{f,L},_,_,_,Dst}=I|Is], Regs0, D) -> + Regs = def_regs([Dst], Regs0), + [I|defs(Is, Regs, update_regs(L, Regs, D))]; +defs([{bs_put,{f,L},_,_}=I|Is], Regs, D) -> + [I|defs(Is, Regs, update_regs(L, Regs, D))]; +defs([build_stacktrace=I|Is], _Regs, D) -> + [I|defs(Is, 1, D)]; +defs([{call,_,_}=I|Is], _Regs, D) -> + [I|defs(Is, 1, D)]; +defs([{call_ext,_,{extfunc,M,F,A}}=I|Is], _Regs, D) -> + case erl_bifs:is_exit_bif(M, F, A) of + false -> + [I|defs(Is, 1, D)]; + true -> + [I|defs_unreachable(Is, D)] + end; +defs([{call_ext,_,_}=I|Is], _Regs, D) -> + [I|defs(Is, 1, D)]; +defs([{call_fun,_}=I|Is], _Regs, D) -> + [I|defs(Is, 1, D)]; +defs([{'catch',_,{f,L}}=I|Is], Regs, D) -> + RegsAtLabel = init_def_regs(1), + [I|defs(Is, Regs, update_regs(L, RegsAtLabel, D))]; +defs([{catch_end,_}=I|Is], _Regs, D) -> + Regs = init_def_regs(1), + [I|defs(Is, Regs, D)]; +defs([{gc_bif,_,{f,Fail},Live,_Src,Dst}=I|Is], Regs0, D) -> + true = all_defined(Live, Regs0), %Assertion. + Regs = def_regs([Dst], init_def_regs(Live)), + [I|defs(Is, Regs, update_regs(Fail, Regs0, D))]; +defs([{get_map_elements,{f,L},_Src,{list,DstList}}=I|Is], Regs0, D) -> + {_,Ds} = beam_utils:split_even(DstList), + Regs = def_regs(Ds, Regs0), + [I|defs(Is, Regs, update_regs(L, Regs0, D))]; +defs([{get_tuple_element,_,_,Dst}=I|Is], Regs0, D) -> + Regs = def_regs([Dst], Regs0), + [I|defs(Is, Regs, D)]; +defs([{jump,{f,L}}=I|Is], Regs, D) -> + [I|defs_unreachable(Is, update_regs(L, Regs, D))]; +defs([{label,L}=I|Is], Regs0, D) -> + case D of + #{L:=Regs1} -> + Regs = Regs0 band Regs1, + [I|defs(Is, Regs, D)]; + #{} -> + [I|defs(Is, Regs0, D)] + end; +defs([{loop_rec,{f,L},{x,0}}=I|Is], _Regs, D0) -> + RegsAtLabel = init_def_regs(0), + D = update_regs(L, RegsAtLabel, D0), + [I|defs(Is, init_def_regs(1), D)]; +defs([{loop_rec_end,_}=I|Is], _Regs, D) -> + [I|defs(Is, 0, D)]; +defs([{make_fun2,_,_,_,_}=I|Is], _Regs, D) -> + [I|defs(Is, 1, D)]; +defs([{move,_,Dst}=I|Is], Regs0, D) -> + Regs = def_regs([Dst], Regs0), + [I|defs(Is, Regs, D)]; +defs([{put_map,{f,Fail},_,_,Dst,_,_}=I|Is], Regs0, D) -> + Regs = def_regs([Dst], Regs0), + [I|defs(Is, Regs, update_regs(Fail, Regs0, D))]; +defs([return=I|Is], _Regs, D) -> + [I|defs_unreachable(Is, D)]; +defs([{select,_,_Src,Fail,List}=I|Is], Regs, D0) -> + D = update_list([Fail|List], Regs, D0), + [I|defs_unreachable(Is, D)]; +defs([{test,_,{f,L},_}=I|Is], Regs, D) -> + [I|defs(Is, Regs, update_regs(L, Regs, D))]; +defs([{test,_,{f,L},Live,_,Dst}=I|Is], Regs0, D) -> + true = all_defined(Live, Regs0), %Assertion. + Regs = def_regs([Dst], init_def_regs(Live)), + [I|defs(Is, Regs, update_regs(L, Regs0, D))]; +defs([{'try',_,{f,L}}=I|Is], Regs, D) -> + RegsAtLabel = init_def_regs(3), + [I|defs(Is, Regs, update_regs(L, RegsAtLabel, D))]; +defs([{try_case,_}=I|Is], _Regs, D) -> + [I|defs(Is, init_def_regs(3), D)]; +defs([{wait,_}=I|Is], _Regs, D) -> + [I|defs_unreachable(Is, D)]; +defs([{wait_timeout,_,_}=I|Is], _Regs, D) -> + [I|defs(Is, 0, D)]; + +%% Exceptions. +defs([{badmatch,_}=I|Is], _Regs, D) -> + [I|defs_unreachable(Is, D)]; +defs([{case_end,_}=I|Is], _Regs, D) -> + [I|defs_unreachable(Is, D)]; +defs([if_end=I|Is], _Regs, D) -> + [I|defs_unreachable(Is, D)]; +defs([{try_case_end,_}=I|Is], _Regs, D) -> + [I|defs_unreachable(Is, D)]; + +%% Neutral instructions +defs([{bs_context_to_binary,_}=I|Is], Regs, D) -> + [I|defs(Is, Regs, D)]; +defs([{bs_restore2,_,_}=I|Is], Regs, D) -> + [I|defs(Is, Regs, D)]; +defs([{bs_save2,_,_}=I|Is], Regs, D) -> + [I|defs(Is, Regs, D)]; +defs([{deallocate,_}=I|Is], Regs, D) -> + [I|defs(Is, Regs, D)]; +defs([{kill,_}=I|Is], Regs, D) -> + [I|defs(Is, Regs, D)]; +defs([{line,_}=I|Is], Regs, D) -> + [I|defs(Is, Regs, D)]; +defs([{recv_mark,_}=I|Is], Regs, D) -> + [I|defs(Is, Regs, D)]; +defs([{recv_set,_}=I|Is], Regs, D) -> + [I|defs(Is, Regs, D)]; +defs([timeout=I|Is], Regs, D) -> + [I|defs(Is, Regs, D)]; +defs([{trim,_,_}=I|Is], Regs, D) -> + [I|defs(Is, Regs, D)]; +defs([{try_end,_}=I|Is], Regs, D) -> + [I|defs(Is, Regs, D)]; +defs([{'%',_}=I|Is], Regs, D) -> + [I|defs(Is, Regs, D)]; +defs([], _, _) -> []. + +defs_unreachable([{label,L}=I|Is], D) -> + case D of + #{L:=Regs} -> + [I|defs(Is, Regs, D)]; + #{} -> + defs_unreachable(Is, D) + end; +defs_unreachable([_|Is], D) -> + defs_unreachable(Is, D); +defs_unreachable([], _D) -> []. + +defs_list(Is, Regs, D) -> + defs_list(Is, Regs, D, []). + +defs_list([{set,Ds,_,{alloc,Live,Info}}=I|Is], Regs0, D0, Acc) -> + true = all_defined(Live, Regs0), %Assertion. + D = case Info of + {gc_bif,_,{f,Fail}} -> + update_regs(Fail, Regs0, D0); + {put_map,_,{f,Fail}} -> + update_regs(Fail, Regs0, D0); + _ -> + D0 + end, + Regs = def_regs(Ds, init_def_regs(Live)), + defs_list(Is, Regs, D, [I|Acc]); +defs_list([{set,Ds,_,Info}=I|Is], Regs0, D0, Acc) -> + D = case Info of + {bif,_,{f,Fail}} -> + update_regs(Fail, Regs0, D0); + {try_catch,'catch',{f,Fail}} -> + update_regs(Fail, init_def_regs(1), D0); + {try_catch,'try',{f,Fail}} -> + update_regs(Fail, init_def_regs(3), D0); + _ -> + D0 + end, + Regs = def_regs(Ds, Regs0), + defs_list(Is, Regs, D, [I|Acc]); +defs_list([], Regs, D, Acc) -> + {reverse(Acc),Regs,D}. + +init_def_regs(Arity) -> + (1 bsl Arity) - 1. + +def_regs([{x,X}|T], Regs) -> + def_regs(T, Regs bor (1 bsl X)); +def_regs([_|T], Regs) -> + def_regs(T, Regs); +def_regs([], Regs) -> Regs. + +update_list([{f,L}|T], Regs, D0) -> + D = update_regs(L, Regs, D0), + update_list(T, Regs, D); +update_list([_|T], Regs, D) -> + update_list(T, Regs, D); +update_list([], _Regs, D) -> D. + +update_regs(L, Regs0, D) -> + case D of + #{L:=Regs1} -> + Regs = Regs0 band Regs1, + D#{L:=Regs}; + #{} -> + D#{L=>Regs0} + end. + +all_defined(Live, Regs) -> + All = (1 bsl Live) - 1, + Regs band All =:= All. diff --git a/lib/compiler/src/v3_codegen.erl b/lib/compiler/src/v3_codegen.erl index 8189420c1c..62c6c54b9f 100644 --- a/lib/compiler/src/v3_codegen.erl +++ b/lib/compiler/src/v3_codegen.erl @@ -446,7 +446,7 @@ cg_fun(Les, Hvs, Vdb, AtomMod, NameArity, Anno, St0) -> put_reg(V, Reg) end, [], Hvs), stk=[]}, 0, Vdb), - {B,_Aft,St} = cg_list(Les, 0, Vdb, Bef, + {B,_Aft,St} = cg_list(Les, Vdb, Bef, St3#cg{bfail=0, ultimate_failure=UltimateMatchFail, is_top_block=true}), @@ -498,19 +498,19 @@ cg(#cg_need_heap{h=H}, _Vdb, Bef, St) -> %% cg_list([Kexpr], FirstI, Vdb, StackReg, St) -> {[Ainstr],StackReg,St}. -cg_list(Kes, I, Vdb, Bef, St0) -> +cg_list(Kes, Vdb, Bef, St0) -> {Keis,{Aft,St1}} = flatmapfoldl(fun (Ke, {Inta,Sta}) -> {Keis,Intb,Stb} = cg(Ke, Vdb, Inta, Sta), {Keis,{Intb,Stb}} - end, {Bef,St0}, need_heap(Kes, I)), + end, {Bef,St0}, need_heap(Kes)), {Keis,Aft,St1}. %% need_heap([Lkexpr], I, St) -> [Lkexpr]. %% Insert need_heap instructions in Kexpr list. Try to be smart and %% collect them together as much as possible. -need_heap(Kes0, _I) -> +need_heap(Kes0) -> {Kes,H} = need_heap_0(reverse(Kes0), 0, []), %% Prepend need_heap if necessary. @@ -741,26 +741,55 @@ block_cg(Es, Le, _Vdb, Bef, St) -> block_cg(Es, Le, Bef, St). block_cg(Es, Le, Bef, #cg{is_top_block=false}=St) -> - cg_block(Es, Le#l.i, Le#l.vdb, Bef, St); -block_cg(Es, Le, Bef, St0) -> - {Is0,Aft,St} = cg_block(Es, Le#l.i, Le#l.vdb, Bef, - St0#cg{is_top_block=false,need_frame=false}), - Is = top_level_block(Is0, Aft, max_reg(Bef#sr.reg), St), - {Is,Aft,St#cg{is_top_block=true}}. - -cg_block([], _I, _Vdb, Bef, St0) -> + cg_block(Es, Le#l.vdb, Bef, St); +block_cg(Es, Le, Bef, #cg{is_top_block=true}=St0) -> + %% No stack frame has been established yet. Do we need one? + case need_stackframe(Es) of + true -> + %% We need a stack frame. Generate the code and add the + %% code for creating and deallocating the stack frame. + {Is0,Aft,St} = cg_block(Es, Le#l.vdb, Bef, + St0#cg{is_top_block=false,need_frame=false}), + Is = top_level_block(Is0, Aft, max_reg(Bef#sr.reg), St), + {Is,Aft,St#cg{is_top_block=true}}; + false -> + %% This sequence of instructions ending in a #k_match{} (a + %% 'case' or 'if') in the Erlang code does not need a + %% stack frame yet. Delay the creation (if a stack frame + %% is needed at all, it will be created inside the + %% #k_match{}). + cg_list(Es, Le#l.vdb, Bef, St0) + end. + +%% need_stackframe([Kexpr]) -> true|false. +%% Does this list of instructions need a stack frame? +%% +%% A sequence of instructions that don't clobber the X registers +%% followed by a single #k_match{} doesn't need a stack frame. + +need_stackframe([H|T]) -> + case H of + #k_bif{op=#k_internal{}} -> true; + #k_put{arg=#k_binary{}} -> true; + #k_bif{} -> need_stackframe(T); + #k_put{} -> need_stackframe(T); + #k_guard_match{} -> need_stackframe(T); + #k_match{} when T =:= [] -> false; + _ -> true + end; +need_stackframe([]) -> false. + +cg_block([], _Vdb, Bef, St0) -> {[],Bef,St0}; -cg_block(Kes0, I, Vdb, Bef, St0) -> +cg_block(Kes0, Vdb, Bef, St0) -> {Kes2,Int1,St1} = case basic_block(Kes0) of {Kes1,LastI,Args,Rest} -> - Ke = hd(Kes1), - #l{i=Fb} = get_kanno(Ke), - cg_basic_block(Kes1, Fb, LastI, Args, Vdb, Bef, St0); + cg_basic_block(Kes1, LastI, Args, Vdb, Bef, St0); {Kes1,Rest} -> - cg_list(Kes1, I, Vdb, Bef, St0) + cg_list(Kes1, Vdb, Bef, St0) end, - {Kes3,Int2,St2} = cg_block(Rest, I, Vdb, Int1, St1), + {Kes3,Int2,St2} = cg_block(Rest, Vdb, Int1, St1), {Kes2 ++ Kes3,Int2,St2}. basic_block(Kes) -> basic_block(Kes, []). @@ -838,12 +867,12 @@ func_vars(_) -> []. %% save_carefully/4 during code generation to only save the variables %% that can be saved without growing the stack frame. -cg_basic_block(Kes, Fb, Lf, As, Vdb, Bef, St0) -> +cg_basic_block(Kes, Lf, As, Vdb, Bef, St0) -> Int0 = reserve_arg_regs(As, Bef), Int = extend_stack(Int0, Lf, Lf+1, Vdb), {Keis,{Aft,St1}} = flatmapfoldl(fun(Ke, St) -> cg_basic_block(Ke, St, Lf, Vdb) end, - {Int,St0}, need_heap(Kes, Fb)), + {Int,St0}, need_heap(Kes)), {Keis,Aft,St1}. cg_basic_block(#cg_need_heap{}=Ke, {Bef,St0}, _Lf, Vdb) -> @@ -1478,8 +1507,8 @@ guard_clause_cg(#k_guard_clause{anno=#l{vdb=Vdb},guard=G,body=B}, Fail, Bef, St0 %% the correct exit point. Primops and tests all go to the next %% instruction on success or jump to a failure label. -guard_cg(#k_protected{arg=Ts,ret=Rs,anno=#l{i=I,vdb=Pdb}}, Fail, _Vdb, Bef, St) -> - protected_cg(Ts, Rs, Fail, I, Pdb, Bef, St); +guard_cg(#k_protected{arg=Ts,ret=Rs,anno=#l{vdb=Pdb}}, Fail, _Vdb, Bef, St) -> + protected_cg(Ts, Rs, Fail, Pdb, Bef, St); guard_cg(#k_test{anno=#l{i=I},op=Test0,args=As,inverted=Inverted}, Fail, Vdb, Bef, St0) -> #k_remote{mod=#k_atom{val=erlang},name=#k_atom{val=Test}} = Test0, @@ -1500,13 +1529,13 @@ guard_cg(G, _Fail, Vdb, Bef, St) -> %% guard_cg_list([Kexpr], Fail, I, Vdb, StackReg, St) -> %% {[Ainstr],StackReg,St}. -guard_cg_list(Kes, Fail, I, Vdb, Bef, St0) -> +guard_cg_list(Kes, Fail, Vdb, Bef, St0) -> {Keis,{Aft,St1}} = flatmapfoldl(fun (Ke, {Inta,Sta}) -> {Keis,Intb,Stb} = guard_cg(Ke, Fail, Vdb, Inta, Sta), {Keis,{Intb,Stb}} - end, {Bef,St0}, need_heap(Kes, I)), + end, {Bef,St0}, need_heap(Kes)), {Keis,Aft,St1}. %% protected_cg([Kexpr], [Ret], Fail, I, Vdb, Bef, St) -> {[Ainstr],Aft,St}. @@ -1516,15 +1545,14 @@ guard_cg_list(Kes, Fail, I, Vdb, Bef, St0) -> %% return values then these must be set to 'false' on failure, %% control always passes to the next instruction. -protected_cg(Ts, [], Fail, I, Vdb, Bef, St0) -> +protected_cg(Ts, [], Fail, Vdb, Bef, St0) -> %% Protect these calls, revert when done. - {Tis,Aft,St1} = guard_cg_list(Ts, Fail, I, Vdb, Bef, - St0#cg{bfail=Fail}), + {Tis,Aft,St1} = guard_cg_list(Ts, Fail, Vdb, Bef, St0#cg{bfail=Fail}), {Tis,Aft,St1#cg{bfail=St0#cg.bfail}}; -protected_cg(Ts, Rs, _Fail, I, Vdb, Bef, St0) -> +protected_cg(Ts, Rs, _Fail, Vdb, Bef, St0) -> {Pfail,St1} = new_label(St0), {Psucc,St2} = new_label(St1), - {Tis,Aft,St3} = guard_cg_list(Ts, Pfail, I, Vdb, Bef, + {Tis,Aft,St3} = guard_cg_list(Ts, Pfail, Vdb, Bef, St2#cg{bfail=Pfail}), %%ok = io:fwrite("cg ~w: ~p~n", [?LINE,{Rs,I,Vdb,Aft}]), %% Set return values to false. @@ -1896,17 +1924,17 @@ cg_recv_wait(#k_atom{val=infinity}, #cg_block{anno=Le,es=Tes}, I, Bef, St0) -> %% But to keep the stack and register information up to date, %% we will generate the code for the 'after' body, and then discard it. Int1 = clear_dead(Bef, I, Le#l.vdb), - {_,Int2,St1} = cg_block(Tes, Le#l.i, Le#l.vdb, + {_,Int2,St1} = cg_block(Tes, Le#l.vdb, Int1#sr{reg=clear_regs(Int1#sr.reg)}, St0), {[{wait,{f,St1#cg.recv}}],Int2,St1}; cg_recv_wait(#k_int{val=0}, #cg_block{anno=Le,es=Tes}, _I, Bef, St0) -> - {Tis,Int,St1} = cg_block(Tes, Le#l.i, Le#l.vdb, Bef, St0), + {Tis,Int,St1} = cg_block(Tes, Le#l.vdb, Bef, St0), {[timeout|Tis],Int,St1}; cg_recv_wait(Te, #cg_block{anno=Le,es=Tes}, I, Bef, St0) -> Reg = cg_reg_arg(Te, Bef), %% Must have empty registers here! Bug if anything in registers. Int0 = clear_dead(Bef, I, Le#l.vdb), - {Tis,Int,St1} = cg_block(Tes, Le#l.i, Le#l.vdb, + {Tis,Int,St1} = cg_block(Tes, Le#l.vdb, Int0#sr{reg=clear_regs(Int0#sr.reg)}, St0), {[{wait_timeout,{f,St1#cg.recv},Reg},timeout] ++ Tis,Int,St1}. @@ -1967,7 +1995,7 @@ catch_cg(#cg_block{es=C}, #k_var{name=R}, Le, Vdb, Bef, St0) -> CatchTag = Le#l.i, Int1 = Bef#sr{stk=put_catch(CatchTag, Bef#sr.stk)}, CatchReg = fetch_stack({catch_tag,CatchTag}, Int1#sr.stk), - {Cis,Int2,St2} = cg_block(C, Le#l.i, Le#l.vdb, Int1, + {Cis,Int2,St2} = cg_block(C, Le#l.vdb, Int1, St1#cg{break=B,in_catch=true}), [] = Int2#sr.reg, %Assertion. Aft = Int2#sr{reg=[{0,R}],stk=drop_catch(CatchTag, Int2#sr.stk)}, |