%%% -*- erlang-indent-level: 2 -*-
%%%
%%% Licensed under the Apache License, Version 2.0 (the "License");
%%% you may not use this file except in compliance with the License.
%%% You may obtain a copy of the License at
%%%
%%%     http://www.apache.org/licenses/LICENSE-2.0
%%%
%%% Unless required by applicable law or agreed to in writing, software
%%% distributed under the License is distributed on an "AS IS" BASIS,
%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%%% See the License for the specific language governing permissions and
%%% limitations under the License.
%%%
%%%----------------------------------------------------------------------
%%% File    : hipe_x86_postpass.erl
%%% Author  : Christoffer Vikström <chvi3471@student.uu.se>
%%% Purpose : Contain postpass optimisations for x86-assembler code.
%%% Created : 5 Aug 2003 by Christoffer Vikström <chvi3471@student.uu.se>
%%%----------------------------------------------------------------------

-ifndef(HIPE_X86_POSTPASS).
-define(HIPE_X86_POSTPASS, hipe_x86_postpass).
-endif.

-module(?HIPE_X86_POSTPASS).
-export([postpass/2]).
-include("../x86/hipe_x86.hrl").

%%>----------------------------------------------------------------------<
%  Procedure : postpass/2
%  Purpose   : Function that performs a nr of postpass optimizations on
%              the hipe x86-assembler code before it is encoded and loaded.
%%>----------------------------------------------------------------------<
postpass(#defun{code=Code0}=Defun, Options) ->
  Code1 = pseudo_insn_expansion(Code0),
  Code2 = case proplists:get_bool(peephole, Options) of
	    true  -> peephole_optimization(Code1);
	    false -> Code1
	  end,
  Code3 = trivial_goto_elimination(Code2),
  Defun#defun{code=Code3}.


%%>----------------------------------------------------------------------<
%  Procedure : peep/1
%  Purpose   : Function that does peephole optimizations. It works by 
%              moving a window over the code and looking at a sequence of 
%              a few instructions. Replaces long sequences of instructions
%              with shorter ones and removes unnecesary ones.
%  Arguments : Insns   - List of pseudo x86-assembler records.
%              Res     - Returned list of pseudo x86-assembler records. 
%                        Kept reversed, until it is returned.
%  Return    : An optimized list of pseudo x86-assembler records with 
%              (hopefully) fewer or faster instructions.
%%>----------------------------------------------------------------------< 
peephole_optimization(Insns) -> 
  peep(Insns, [], []).


%% MoveSelf related peep-opts
%% ------------------------------
peep([#fmove{src=Src, dst=Src} | Insns], Res,Lst) ->
    peep(Insns, Res, [moveSelf1|Lst]);
peep([I=#fmove{src=Src, dst=Dst}, 
      #fmove{src=Dst, dst=Src} | Insns], Res,Lst) -> 
    peep(Insns, [I|Res], [moveSelf2|Lst]);
peep([#movsx{src=Src, dst=Src} | Insns], Res,Lst) -> 
    peep(Insns, Res, [moveSelf3|Lst]);
peep([I=#movsx{src=Src, dst=Dst}, 
      #movsx{src=Dst, dst=Src} | Insns], Res,Lst) -> 
    peep(Insns, [I|Res], [moveSelf4|Lst]);
peep([#movzx{src=Src, dst=Src} | Insns], Res,Lst) -> 
    peep(Insns, Res, [moveSelf5|Lst]);
peep([I=#movzx{src=Src, dst=Dst}, 
      #movzx{src=Dst, dst=Src} | Insns], Res,Lst) ->
    peep(Insns, [I|Res], [moveSelf6|Lst]);
peep([#cmovcc{src=Src, dst=Src} | Insns], Res,Lst) -> 
    peep(Insns, Res, [moveSelf7|Lst]);
peep([I=#cmovcc{src=Src, dst=Dst}, 
      #cmovcc{src=Dst, dst=Src}|Insns], Res,Lst) -> 
    peep(Insns, [I|Res], [moveSelf8|Lst]);
peep([#move{src=#x86_temp{reg=X}, 
	    dst=#x86_temp{reg=X}} | Insns], Res,Lst) -> 
    peep(Insns, Res, [moveSelf9|Lst]);
peep([I=#move{src=#x86_temp{reg=Src}, dst=#x86_temp{reg=Dst}}, 
      #move{src=#x86_temp{reg=Dst}, dst=#x86_temp{reg=Src}} | Insns], Res,Lst) -> 
    peep(Insns, [I|Res], [moveSelf0|Lst]);


%% ElimBinALMDouble
%% ----------------
peep([Move=#move{src=Src, dst=Dst}, Alu=#alu{src=Src, dst=Dst}|Insns], Res, Lst)
  when not is_record(Dst, x86_mem) ->
  peep([Alu#alu{src=Dst}|Insns], [Move|Res], [elimBinALMDouble|Lst]);


%% ElimFBinDouble
%% --------------
peep([Move=#fmove{src=Src, dst=Dst}, 
      BinOp=#fp_binop{src=Src, dst=Dst}|Insns], Res, Lst) ->
  peep([BinOp#fp_binop{src=Dst}|Insns], [Move|Res], [elimFBinDouble|Lst]);


%% CommuteBinALMD
%% --------------
peep([#move{src=Src1, dst=Dst}, 
      #alu{aluop=Op,src=Src2,dst=Dst}|Insns], Res, Lst)
  when (Src1 =:= #x86_imm{}) and (Src2 =/= #x86_imm{}) and 
       ((Op =:= 'add') or (Op =:= 'and') or (Op =:= 'or') or (Op =:= 'xor'))  ->
  peep(Insns, [#alu{aluop=Op,src=Src1,dst=Dst},
	       #move{src=Src2, dst=Dst}|Res], 
       [commuteBinALMD|Lst]);


%% ElimCmp0
%% --------
peep([#cmp{src=#x86_imm{value=0}, dst=Dst=#x86_temp{}}|Insns],Res,Lst) ->
  %% TEST leaves the adjust flag undefined, whereas CMP sets it properly (in
  %% this case to 0). However, since HiPE does not use any instructions that
  %% read the adjust flag, we can do this transform safely.
  peep(Insns, [#test{src=Dst, dst=Dst} | Res], [elimCmp0_1|Lst]);
peep([#cmp{src=Src=#x86_temp{}, dst=#x86_imm{value=0}},
      J=#jcc{cc=Cond}|Insns],Res,Lst)
  when Cond =:= 'e'; Cond =:= 'ne' -> % We're commuting the comparison
  peep(Insns, [J, #test{src=Src, dst=Src} | Res], [elimCmp0_2|Lst]);

%% ElimCmpTest
%% -----------
peep([I|Insns],Res,Lst) when (I =:= #cmp{}) or (I =:= #test{}) -> 
    case check(Insns) of
	#jcc{} ->
	    peep(Insns, [I|Res], Lst);
	#jmp_fun{} ->
	    peep(Insns, [I|Res], Lst);
	#jmp_label{} ->
	    peep(Insns, [I|Res], Lst);
	#jmp_switch{} ->
	    peep(Insns, [I|Res], Lst);
	#cmovcc{} ->
	    peep(Insns, [I|Res], Lst);
	#ret{} ->
	    peep(Insns, [I|Res], Lst);
	_ ->
	    peep(Insns, Res, [elimCmpTest|Lst])
    end;


%% ElimPushPop
%% -----------
peep([#push{src=Opr}, #pop{dst=Opr} | Insns], Res, Lst) ->
    peep(Insns, Res, [elimPushPop|Lst]);


% %% ElimIFF
% %% -------
peep([#jcc{label=Lab}, I=#label{label=Lab}|Insns], Res, Lst) ->
     peep(Insns, [I, #jmp_label{label=Lab}|Res], [elimIFF|Lst]);


%% ElimSet0
%% --------
peep([#move{src=#x86_imm{value=0},dst=Dst=#x86_temp{}}|Insns],Res,Lst) ->
  peep(Insns, [#alu{aluop='xor', src=Dst, dst=Dst}|Res], [elimSet0|Lst]);    

%% ElimMDPow2
%% ----------
peep([B = #alu{aluop=Op,src=#x86_imm{value=Val},dst=Dst}|Insns], Res, Lst) ->
    {IsLog2, Size, Sign} = log2(Val),
    case ((Op =:= imul) or (Op =:= idiv)) and IsLog2 of
	true ->
	    Sh = case Sign of positive -> 'bsl'; negative -> 'bsr' end,
	    peep(Insns, 
		 [#shift{shiftop=Sh, src=#x86_imm{value=Size}, dst=Dst}|Res], 
		 [elimMDPow2|Lst]);
	false ->
	    peep(Insns, [B|Res], Lst)
    end;

%% LeaToAdd
%% This rule transforms lea into add when the destination is the same as one of
%% the operands. Sound because lea is never used where the condition codes are
%% live (and would be clobbered by add).
%% ----------
peep([#lea{mem=#x86_mem{base=#x86_temp{reg=DstR},off=Src},
	   temp=Dst=#x86_temp{reg=DstR}}|Insns], Res, Lst) ->
     peep(Insns, [#alu{aluop='add',src=Src,dst=Dst}|Res], [leaToAdd|Lst]);
peep([#lea{mem=#x86_mem{base=Src,off=#x86_temp{reg=DstR}},
	   temp=Dst=#x86_temp{reg=DstR}}|Insns], Res, Lst) ->
     peep(Insns, [#alu{aluop='add',src=Src,dst=Dst}|Res], [leaToAdd|Lst]);

%% SubToDec
%% This rule turns "subl $1,Dst; jl Lab" into "decl Dst; jl Lab", which
%% changes reduction counter tests to use decl instead of subl.
%% However, on Athlon64 this leads to a small but measurable decrease
%% in performance. The use of dec is also not recommended on P4, so
%% this transformation is disabled.
%% peep([#alu{aluop='sub',src=#x86_imm{value=1},dst=Dst},J=#jcc{cc='l'}|Insns], Res, Lst) ->
%%   peep(Insns, [J, #dec{dst=Dst} | Res], [subToDec|Lst]);

%% Standard list recursion clause
%% ------------------------------
peep([I | Insns], Res, Lst) ->
     peep(Insns, [I|Res], Lst);
peep([], Res, _Lst) ->
    lists:reverse(Res). 

%% Simple goto elimination
%% -----------------------
trivial_goto_elimination(Insns) -> goto_elim(Insns, []).

goto_elim([#jmp_label{label=Label}, I = #label{label=Label}|Insns], Res) ->
  goto_elim([I|Insns], Res);
goto_elim([#jcc{cc=CC, label=Label} = IJCC,
	   #jmp_label{label=BranchTgt},
	   #label{label=Label} = ILBL|Insns], Res) ->
  goto_elim([IJCC#jcc{cc=hipe_x86:neg_cc(CC), label=BranchTgt},
	     ILBL|Insns], Res);
goto_elim([I | Insns], Res) ->
  goto_elim(Insns, [I|Res]);
goto_elim([], Res) ->
  lists:reverse(Res). 


%%>----------------------------------------------------------------------<
%%  Procedure : expand/1
%%  Purpose   : Expands pseudo instructions.
%%  Arguments : Insns - An x86-instruction list.
%%  Return    : An expanded instruction list.
%%  Notes     : 
%%>----------------------------------------------------------------------<
pseudo_insn_expansion(Insns) -> expand(Insns, []).
expand([I|Tail], Res) ->
    case I of
	#pseudo_jcc{cc=Cc,true_label=TrueLab,false_label=FalseLab} ->
	    expand(Tail, [hipe_x86:mk_jmp_label(FalseLab),
			  hipe_x86:mk_jcc(Cc, TrueLab) | Res]);
	#pseudo_tailcall_prepare{} ->
	    expand(Tail, Res);
	#pseudo_call{'fun'=Fun,sdesc=SDesc,contlab=ContLab,linkage=Linkage} ->
	    expand(Tail, [hipe_x86:mk_jmp_label(ContLab),
			  hipe_x86:mk_call(Fun, SDesc, Linkage) | Res]);
	_ ->
	    expand(Tail, [I|Res])
    end;
expand([], Res) -> lists:reverse(Res).

%% Log2 function
%% -------------
%% Used by ElimMDPow2 clause of peep(..)
log2(Nr) -> log2(Nr, 0).
log2(0, _) -> {false, 0, positive};
log2(Nr, I) ->
    case (Nr band 1) =:= 1 of
	true ->
	    case Nr of
		1 ->
		    {true, I, positive};
		-1 ->
		    {true, I, negative};
		_ ->
		    {false, 0, positive}
	    end;
	false ->
	    log2((Nr bsr 1), I+1)
    end.

%% Skips through all comments and move instructions and returns the next one
%% -------------------------------------------------------------------------
%% Used by ElimCmpTest above.
check([I|Ins]) ->
    case I of
	#comment{} ->
	    check(Ins);
	#move{} ->
	    check(Ins);
	#fmove{} ->
	    check(Ins);
	#movsx{} ->
	    check(Ins);
	#movzx{} ->
	    check(Ins);
	OtherI ->
	    OtherI
    end.