%% -*- erlang-indent-level: 2 -*- %% %% %CopyrightBegin% %% %% Copyright Ericsson AB 2005-2009. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in %% compliance with the License. You should have received a copy of the %% Erlang Public License along with this software. If not, it can be %% retrieved online at http://www.erlang.org/. %% %% Software distributed under the License is distributed on an "AS IS" %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See %% the License for the specific language governing rights and limitations %% under the License. %% %% %CopyrightEnd% %% %% Floating point handling. -ifdef(HIPE_AMD64). -define(HIPE_X86_X87, hipe_amd64_x87). -define(HIPE_X86_DEFUSE, hipe_amd64_defuse). -define(HIPE_X86_LIVENESS, hipe_amd64_liveness). -define(HIPE_X86_REGISTERS, hipe_amd64_registers). -else. -define(HIPE_X86_X87, hipe_x86_x87). -define(HIPE_X86_DEFUSE, hipe_x86_defuse). -define(HIPE_X86_LIVENESS, hipe_x86_liveness). -define(HIPE_X86_REGISTERS, hipe_x86_registers). -endif. -module(?HIPE_X86_X87). -export([map/1]). -include("../x86/hipe_x86.hrl"). -include("../main/hipe.hrl"). %%---------------------------------------------------------------------- map(Defun) -> CFG0 = hipe_x86_cfg:init(Defun), %% hipe_x86_cfg:pp(CFG0), Liveness = ?HIPE_X86_LIVENESS:analyse(CFG0), StartLabel = hipe_x86_cfg:start_label(CFG0), {CFG1,_} = do_blocks([], [StartLabel], CFG0, Liveness, [], gb_trees:empty()), hipe_x86_cfg:linearise(CFG1). do_blocks(Pred, [Lbl|Lbls], CFG, Liveness, Map, BlockMap) -> case gb_trees:lookup(Lbl, BlockMap) of none -> %% This block has not been visited. Block = hipe_x86_cfg:bb(CFG, Lbl), Succ = hipe_x86_cfg:succ(CFG, Lbl), NewBlockMap = gb_trees:insert(Lbl, Map, BlockMap), LiveOut = [X || X <- ?HIPE_X86_LIVENESS:liveout(Liveness, Lbl), is_fp(X)], Code = hipe_bb:code(Block), ReverseCode = lists:reverse(Code), {NewCode0, NewMap, NewBlockMap1, Dirty} = do_block(ReverseCode, LiveOut, Map, NewBlockMap), NewCFG1 = case Dirty of true -> NewBlock = hipe_bb:code_update(Block, NewCode0), hipe_x86_cfg:bb_add(CFG, Lbl, NewBlock); _ -> CFG end, {NewCFG3, NewBlockMap2} = do_blocks(Lbl, Succ, NewCFG1, Liveness, NewMap, NewBlockMap1), do_blocks(Pred, Lbls, NewCFG3, Liveness, Map, NewBlockMap2); {value, fail} -> %% Don't have to follow this trace any longer. do_blocks(Pred,Lbls, CFG, Liveness, Map, BlockMap); {value, ExistingMap} -> %% This block belongs to a trace already handled. %% The Map coming in must be identical to the one used %% when the block was processed. if ExistingMap =:= Map -> do_blocks(Pred, Lbls, CFG, Liveness, Map, BlockMap); true -> NewCFG = do_shuffle(Pred, Lbl, CFG, Map, ExistingMap), do_blocks(Pred, Lbls, NewCFG, Liveness, Map, BlockMap) end end; do_blocks(_Pred, [], CFG, _Liveness, _Map, BlockMap) -> {CFG, BlockMap}. do_block(Ins, LiveOut, Map, BlockMap) -> do_block(Ins, LiveOut, Map, BlockMap, false). do_block([I|Is], LiveOut, Map, BlockMap, Dirty) -> case handle_insn(I) of false -> {NewCode, NewMap, NewBlockMap, NewDirty} = do_block(Is, LiveOut, Map, BlockMap, Dirty), {NewCode++[I], NewMap, NewBlockMap, NewDirty}; true -> Def = ordsets:from_list(?HIPE_X86_DEFUSE:insn_def(I)), Use = ordsets:from_list(?HIPE_X86_DEFUSE:insn_use(I)), NewLiveOut = ordsets:filter(fun(X) -> is_fp(X) end, ordsets:union(ordsets:subtract(LiveOut, Def), Use)), {NewCode, NewMap, NewBlockMap, NewDirty} = do_block(Is, NewLiveOut, Map, BlockMap, Dirty), {NewI, NewMap1, NewBlockMap1} = do_insn(I, LiveOut, NewMap, NewBlockMap), NewDirty1 = if NewDirty =:= true -> true; NewI =:= [I] -> false; true -> true end, {NewCode++NewI, NewMap1, NewBlockMap1, NewDirty1} end; do_block([], LiveOut, Map, BlockMap, Dirty) -> case [X || X <- Map, not lists:member(X, LiveOut)] of [] -> {[], Map, BlockMap, Dirty}; Pop -> {PopIns, NewMap} = pop_dead(Pop, Map), {PopIns, NewMap, BlockMap, true} end. do_shuffle(Pred, Lbl, CFG, OldMap, NewMap) -> %% First make sure both maps have the same members. Push = NewMap -- OldMap, Pop = OldMap -- NewMap, {PopInsn, OldMap0} = pop_dead(Pop, OldMap), {PushInsn, OldMap1} = case Push of []-> {[], OldMap0}; _-> push_list(lists:reverse(Push), OldMap0) end, Code = if OldMap1 =:= NewMap -> %% It was enough to push and pop. PopInsn ++ PushInsn ++ [hipe_x86:mk_jmp_label(Lbl)]; true -> %% Shuffle the positions so the maps match Cycles = find_swap_cycles(OldMap1, NewMap), SwitchInsns = do_switching(Cycles), PopInsn ++ PushInsn ++ SwitchInsns ++ [hipe_x86:mk_jmp_label(Lbl)] end, %% Update the CFG. NewLabel = hipe_gensym:get_next_label(x86), NewCFG1 = hipe_x86_cfg:bb_add(CFG, NewLabel, hipe_bb:mk_bb(Code)), OldPred = hipe_x86_cfg:bb(NewCFG1, Pred), PredCode = hipe_bb:code(OldPred), NewLast = redirect(lists:last(PredCode), Lbl,NewLabel), NewPredCode = butlast(PredCode) ++ [NewLast], NewPredBB = hipe_bb:code_update(OldPred, NewPredCode), hipe_x86_cfg:bb_add(NewCFG1, Pred, NewPredBB). find_swap_cycles(OldMap, NewMap) -> Moves = [get_pos(X, NewMap, 1) || X <- OldMap], find_swap_cycles(OldMap, Moves, lists:seq(1, length(OldMap)), []). find_swap_cycles(OldMap, Moves, NotHandled, Cycles) -> if NotHandled =:= [] -> Cycles; true -> Cycle = find_cycle(Moves, [hd(NotHandled)]), NewNotHandled = NotHandled -- Cycle, case lists:member(1, Cycle) of true -> %% The cycle that contains the first element on the stack %% must be processed last. NewCycle = format_cycle(Cycle), find_swap_cycles(OldMap, Moves, NewNotHandled, Cycles ++ [NewCycle]); _ -> NewCycle = format_cycle(Cycle), find_swap_cycles(OldMap, Moves, NewNotHandled, [NewCycle|Cycles]) end end. find_cycle(Moves, Cycle) -> To = lists:nth(lists:last(Cycle), Moves), if To =:= hd(Cycle) -> Cycle; true -> find_cycle(Moves, Cycle ++ [To]) end. format_cycle(C) -> %% The position numbers start with 1 - should start with 0. %% If position 0 is in the cycle it will be permuted until %% the 0 is first and then remove it. %% Otherwise the first element is also added last. NewCycle = [X - 1 || X <- C], case lists:member(0, NewCycle) of true -> format_cycle(NewCycle, []); _ -> NewCycle ++ [hd(NewCycle)] end. format_cycle([H|T], NewCycle) -> case H of 0 -> T ++ NewCycle; _ -> format_cycle(T, NewCycle ++ [H]) end. do_switching(Cycles) -> do_switching(Cycles, []). do_switching([C|Cycles], Insns) -> NewInsns = Insns ++ [hipe_x86:mk_fp_unop(fxch, mk_st(X)) || X <- C], do_switching(Cycles, NewInsns); do_switching([], Insns) -> Insns. redirect(Insn, OldLbl, NewLbl) -> case Insn of #pseudo_call{contlab = ContLab, sdesc = SDesc} -> #x86_sdesc{exnlab = ExnLab} = SDesc, if ContLab =:= OldLbl -> Insn#pseudo_call{contlab = NewLbl}; ExnLab =:= OldLbl -> Insn#pseudo_call{sdesc = SDesc#x86_sdesc{exnlab = NewLbl}} end; _ -> hipe_x86_cfg:redirect_jmp(Insn, OldLbl, NewLbl) end. do_insn(I, LiveOut, Map, BlockMap) -> case I of #pseudo_call{'fun' = Fun, contlab = ContLab} -> case Fun of %% We don't want to spill anything if an exception has been thrown. {_, 'handle_fp_exception'} -> NewBlockMap = case gb_trees:lookup(ContLab, BlockMap) of {value, fail} -> BlockMap; {value, _} -> gb_trees:update(ContLab, fail, BlockMap); none -> gb_trees:insert(ContLab, fail, BlockMap) end, {[I], [], NewBlockMap}; _ -> {pop_all(Map)++[I],[],BlockMap} end; #fp_unop{op = 'fwait'} -> Store = pseudo_pop(Map), {Store ++ [I], Map, BlockMap}; #fp_unop{} -> {NewI, NewMap} = do_fp_unop(I, LiveOut, Map), {NewI, NewMap, BlockMap}; #fp_binop{} -> {NewI, NewMap} = do_fp_binop(I, LiveOut, Map), {NewI, NewMap, BlockMap}; #fmove{src = Src, dst = Dst} -> if Src =:= Dst -> %% Don't need to keep this instruction! %% However, we may need to pop from the stack. case is_liveOut(Src, LiveOut) of true-> {[], Map, BlockMap}; false -> {SwitchInsn, NewMap0} = switch_first(Dst, Map), NewMap = pop(NewMap0), {SwitchInsn++pop_insn(), NewMap, BlockMap} end; true -> {NewI, NewMap} = do_fmove(Src, Dst, LiveOut, Map), {NewI, NewMap, BlockMap} end; _ -> {[I], Map, BlockMap} end. do_fmove(Src, Dst = #x86_mem{}, LiveOut, Map) -> %% Storing a float from the stack into memory. {SwitchInsn, NewMap0} = switch_first(Src, Map), case is_liveOut(Src, LiveOut) of true -> {SwitchInsn ++ [hipe_x86:mk_fp_unop(fst, Dst)], NewMap0}; _ -> NewMap1 = pop(NewMap0), {SwitchInsn ++ [hipe_x86:mk_fp_unop(fstp, Dst)], NewMap1} end; do_fmove(Src = #x86_mem{}, Dst, _LiveOut, Map) -> %% Pushing a float into the stack. case in_map(Dst, Map) of true -> ?EXIT({loadingExistingFpVariable,{Src,Dst}}); _ -> ok end, {PushOp, [_|NewMap0]} = push(Src, Map), %% We want Dst in the map rather than Src. NewMap = [Dst|NewMap0], {PushOp, NewMap}; do_fmove(Src, Dst, LiveOut, Map) -> %% Copying a float that either is spilled or is on the fp stack, %% or converting a fixnum in a temp to a float on the fp stack. case in_map(Dst, Map) of true -> ?EXIT({copyingToExistingFpVariable,{Src,Dst}}); _ -> ok end, IsConv = case Src of #x86_temp{type = Type} -> Type =/= 'double'; _ -> false end, case IsConv of true -> do_conv(Src, Dst, Map); _ -> %% Copying. case {is_liveOut(Src, LiveOut), in_map(Src, Map)} of {false, true} -> %% Just remap Dst to Src {Head, [_|T]} = lists:splitwith(fun(X) -> X =/= Src end, Map), {[], Head ++ [Dst|T]}; _ -> {PushOp, [_|NewMap0]} = push(Src, Map), %% We want Dst in the map rather than Src. NewMap = [Dst|NewMap0], {PushOp, NewMap} end end. do_conv(Src = #x86_temp{reg = Reg}, Dst, Map) -> %% Converting. Src must not be a register, so we %% might have to put it into memory in between. {Move, NewSrc} = case ?HIPE_X86_REGISTERS:is_precoloured(Reg) of true -> Temp = hipe_x86:mk_new_temp('untagged'), {[hipe_x86:mk_move(Src,Temp)], Temp}; _ -> {[], Src} end, {PushOp, [_|NewMap0]} = push(NewSrc, Map), %% We want Dst in the map rather than NewSrc. NewMap = [Dst|NewMap0], case length(PushOp) of 1 -> %% No popping of memory object on fpstack {Move ++ [hipe_x86:mk_fp_unop(fild, NewSrc)], NewMap}; _ -> %% H contains pop instructions. Must be kept! Head = butlast(PushOp), {Move ++ Head ++ [hipe_x86:mk_fp_unop(fild, NewSrc)], NewMap} end. do_fp_unop(I = #fp_unop{arg = Arg, op = fchs}, Liveout, Map) -> %% This is fchs, the only operation without a %% popping version. Needs special handling. case is_liveOut(Arg, Liveout) of true -> {SwitchIns, NewMap} = switch_first(Arg, Map), {SwitchIns ++ [I#fp_unop{arg = []}], NewMap}; false -> %% Don't need to keep this instruction! %% However, we may need to pop Src from the stack. case in_map(Arg, Map) of true -> {SwitchInsn, NewMap0} = switch_first(Arg, Map), NewMap = pop(NewMap0), {SwitchInsn ++ pop_insn(), NewMap}; _ -> {[],Map} end end. do_fp_binop(#fp_binop{src = Src, dst = Dst, op = Op}, LiveOut, Map) -> case {is_liveOut(Src, LiveOut), is_liveOut(Dst, LiveOut)} of {true, true} -> keep_both(Op, Src, Dst, Map); {true, false} -> keep_src(Op, Src, Dst, Map); {false, true} -> keep_dst(Op, Src, Dst, Map); {false, false} -> %% Both Dst and Src are popped. keep_none(Op, Src, Dst, Map) end. keep_both(Op, Src, Dst, Map) -> %% Keep both Dst and Src if it is there. {SwitchInsn, NewMap} = switch_first(Dst, Map), NewSrc = get_new_opnd(Src, NewMap), Insn = format_fp_binop(Op, NewSrc, mk_st(0)), {SwitchInsn++Insn, NewMap}. keep_src(Op, Src, Dst, Map) -> %% Pop Dst but keep Src in stack if it is there. {SwitchInsn, NewMap0} = switch_first(Dst, Map), NewSrc = get_new_opnd(Src, NewMap0), NewMap = pop(NewMap0), Insn = format_fp_binop(Op, NewSrc, mk_st(0)), {SwitchInsn ++ Insn ++ pop_insn(), NewMap}. keep_dst(Op, Src, Dst, Map) -> %% Keep Dst but pop Src. %% Dst must be in stack. DstInMap = in_map(Dst, Map), SrcInMap = in_map(Src, Map), case SrcInMap of true -> case DstInMap of true -> %% Src must be popped. If Dst is on top of the stack we can %% alter the operation rather than shuffle the stack. {SwitchInsn, Insn, NewMap} = if hd(Map) =:= Dst -> NewOp = mk_op_pop(reverse_op(Op)), NewDst = get_new_opnd(Src, Map), TmpMap = lists:map(fun(X) -> if X =:= Src -> Dst; true -> X end end, Map), {[], format_fp_binop(NewOp, mk_st(0), NewDst), pop(TmpMap)}; true -> {SwitchInsn1, NewMap0} = switch_first(Src, Map), NewDst = get_new_opnd(Dst,NewMap0), NewOp = mk_op_pop(Op), {SwitchInsn1,format_fp_binop(NewOp, mk_st(0), NewDst), pop(NewMap0)} end, {SwitchInsn ++ Insn, NewMap}; _ -> %% Src is on the stack, but Dst isn't. Use memory command to avoid %% unnecessary loading instructions. {SwitchInsn, NewMap0} = switch_first(Src, Map), NewOp = reverse_op(Op), NewMap = [Dst] ++ tl(NewMap0), Insn = format_fp_binop(NewOp, Dst, mk_st(0)), {SwitchInsn ++ Insn, NewMap} end; _ -> %% Src isn't in the map so it doesn't have to be popped. {SwitchInsn, NewMap} = switch_first(Dst, Map), {SwitchInsn ++ [#fp_unop{arg = Src, op = Op}], NewMap} end. keep_none(Op, Src, Dst, Map) -> %% Dst must be on stack. {PushInsn, NewMap0} = case in_map(Dst, Map) of true -> {[], Map}; _ -> push(Dst, Map) end, case in_map(Src, NewMap0) of true -> %% Src must be popped. {SwitchInsn1, NewMap1} = switch_first(Src, NewMap0), NewOp = mk_op_pop(Op), NewDst = get_new_opnd(Dst,NewMap1), NewMap2 = pop(NewMap1), %% Then Dst has to be popped. {PopInsn, NewMap} = pop_member(Dst, NewMap2), Insn = format_fp_binop(NewOp, mk_st(0), NewDst), {PushInsn ++ SwitchInsn1 ++ Insn ++ PopInsn, NewMap}; _ -> %% Src isn't in the map so it doesn't have to be popped. {SwitchInsn, NewMap1} = switch_first(Dst, NewMap0), NewMap = pop(NewMap1), {SwitchInsn ++ [#fp_unop{arg = Src, op = Op}] ++ pop_insn(), NewMap} end. format_fp_binop(Op, Src = #x86_temp{}, Dst = #x86_fpreg{reg = Reg}) -> %% Handle that st(0) is sometimes implicit. if Reg =:= 0 -> [hipe_x86:mk_fp_unop(Op, Src)]; true -> [hipe_x86:mk_fp_binop(Op, Src, Dst)] end; format_fp_binop(Op, Src, Dst) -> [hipe_x86:mk_fp_binop(Op, Src, Dst)]. in_map(X, Map) -> lists:member(X, Map). push_list(L, Map) -> push_list(L, Map, []). push_list([H|T], Map, Acc) -> {Insn, NewMap} = push(H,Map), push_list(T, NewMap, Acc++Insn); push_list([], Map, Acc) -> {Acc, Map}. push(X, Map0) -> {PopInsn, Map} = if length(Map0) > 7 -> pop_a_temp(Map0); true -> {[], Map0} end, NewX = get_new_opnd(X,Map), NewMap = [X | Map], PushOp = [hipe_x86:mk_fp_unop(fld, NewX)], {PopInsn ++ PushOp, NewMap}. pop([_|Map]) -> Map. pop_insn() -> [hipe_x86:mk_fp_unop('fstp',mk_st(0))]. pop_dead(Dead, Map) -> Dead0 = [X || X <- Map, lists:member(X,Dead)], pop_dead(Dead0, Map, []). pop_dead([D|Dead], Map, Code) -> {I, NewMap0} = switch_first(D, Map), NewMap = pop(NewMap0), Store = case D of #x86_temp{} -> [hipe_x86:mk_fp_unop('fstp', D)]; _ -> pop_insn() end, pop_dead(Dead, NewMap, Code++I++Store); pop_dead([], Map, Code) -> {Code,Map}. pop_all(Map) -> {Code, _} = pop_dead(Map, Map), Code. pop_member(Member, Map) -> {Head,[_|T]} = lists:splitwith(fun(X)-> X =/= Member end, Map), {[hipe_x86:mk_fp_unop('fstp', mk_st(get_pos(Member, Map, 0)))], Head++T}. pop_a_temp(Map) -> Temp = find_a_temp(Map), {SwitchInsn, NewMap0} = switch_first(Temp, Map), NewMap = pop(NewMap0), {SwitchInsn ++ [hipe_x86:mk_fp_unop('fstp', Temp)], NewMap}. find_a_temp([H = #x86_temp{}|_]) -> H; find_a_temp([_|T]) -> find_a_temp(T); find_a_temp([]) -> ?EXIT({noTempOnFPStack,{}}). switch_first(X, Map = [H|_]) -> Pos = get_pos(X, Map, 0), case Pos of 0 -> {[], Map}; notFound -> push(X, Map); _ -> {[_|Head], [_|Tail]} = lists:splitwith(fun(Y)-> Y =/= X end, Map), NewMap = [X|Head] ++ [H|Tail], Ins = hipe_x86:mk_fp_unop(fxch, mk_st(Pos)), {[Ins], NewMap} end; switch_first(X, Map) -> push(X, Map). get_pos(X, [H|T], Pos) -> if X =:= H -> Pos; true -> get_pos(X, T, Pos+1) end; get_pos(_, [], _) -> notFound. get_new_opnd(X, Map) -> I = get_pos(X, Map, 0), case I of notFound -> %% The operand is probably a spilled float. X; _ -> mk_st(I) end. is_fp(#x86_fpreg{}) -> true; is_fp(#x86_mem{type = Type}) -> Type =:= 'double'; is_fp(#x86_temp{type = Type}) -> Type =:= 'double'. handle_insn(I) -> case I of #fmove{} -> true; #fp_unop{} -> true; #fp_binop{} -> true; #pseudo_call{} ->true; %% #ret{} -> true; _ -> false end. is_liveOut(X, LiveOut) -> ordsets:is_element(X, LiveOut). mk_st(X) -> hipe_x86:mk_fpreg(X, false). reverse_op(Op) -> case Op of 'fsub' -> 'fsubr'; 'fdiv' -> 'fdivr'; 'fsubr'-> 'fsub'; 'fdivr' -> 'fdiv'; _ -> Op end. mk_op_pop(Op) -> case Op of 'fadd'-> 'faddp'; 'fdiv' -> 'fdivp'; 'fdivr' -> 'fdivrp'; 'fmul' -> 'fmulp'; 'fsub' -> 'fsubp'; 'fsubr' -> 'fsubrp'; _ -> ?EXIT({operandHasNoPopVariant,{Op}}) end. butlast([X|Xs]) -> butlast(Xs,X). butlast([],_) -> []; butlast([X|Xs],Y) -> [Y|butlast(Xs,X)]. %%pp_insn(Op, Src, Dst) -> %% pp([hipe_x86:mk_fp_binop(Op, Src, Dst)]). %%pp([I|Ins]) -> %% hipe_x86_pp:pp_insn(I), %% pp(Ins); %%pp([]) -> %% []. pseudo_pop(Map) when length(Map) > 0 -> Dst = hipe_x86:mk_new_temp('double'), pseudo_pop(Dst, length(Map), []); pseudo_pop(_) -> []. pseudo_pop(Dst, St, Acc) when St > 1 -> %% Store all members of the stack to a single temporary to force %% any floating point overflow exceptions to occur even though we %% don't have overflow for the extended double precision in the x87. pseudo_pop(Dst, St-1, [hipe_x86:mk_fp_unop('fxch', mk_st(St-1)), hipe_x86:mk_fp_unop('fst', Dst), hipe_x86:mk_fp_unop('fxch', mk_st(St-1)) |Acc]); pseudo_pop(Dst, _St, Acc) -> [hipe_x86:mk_fp_unop('fst', Dst)|Acc].