11 files changed, 196 insertions, 54 deletions
diff --git a/lib/hipe/x86/hipe_rtl_to_x86.erl b/lib/hipe/x86/hipe_rtl_to_x86.erl
index 4c8c98551c..851b7da2dd 100644
--- a/lib/hipe/x86/hipe_rtl_to_x86.erl
+++ b/lib/hipe/x86/hipe_rtl_to_x86.erl
@@ -91,26 +91,31 @@ conv_insn(I, Map, Data) ->
     #alub{} ->
       %% dst = src1 op src2; if COND goto label
       BinOp = conv_binop(hipe_rtl:alub_op(I)),
-      {Dst, Map0} = conv_dst(hipe_rtl:alub_dst(I), Map),
-      {FixSrc1, Src1, Map1} = conv_src(hipe_rtl:alub_src1(I), Map0),
-      {FixSrc2, Src2, Map2} = conv_src(hipe_rtl:alub_src2(I), Map1),
+      {FixSrc1, Src1, Map0} = conv_src(hipe_rtl:alub_src1(I), Map),
+      {FixSrc2, Src2, Map1} = conv_src(hipe_rtl:alub_src2(I), Map0),
       Cc = conv_cond(hipe_rtl:alub_cond(I)),
-      I1 = [hipe_x86:mk_pseudo_jcc(Cc,
-				   hipe_rtl:alub_true_label(I),
-				   hipe_rtl:alub_false_label(I),
-				   hipe_rtl:alub_pred(I))],
-      I2 = conv_alu(Dst, Src1, BinOp, Src2, I1),
-      {FixSrc1++FixSrc2++I2, Map2, Data};
-    #branch{} ->
-      %% <unused> = src1 - src2; if COND goto label
-      {FixSrc1, Src1, Map0} = conv_src(hipe_rtl:branch_src1(I), Map),
-      {FixSrc2, Src2, Map1} = conv_src(hipe_rtl:branch_src2(I), Map0),
-      Cc = conv_cond(hipe_rtl:branch_cond(I)),
-      I2 = conv_branch(Src1, Cc, Src2,
-		       hipe_rtl:branch_true_label(I),
-		       hipe_rtl:branch_false_label(I),
-		       hipe_rtl:branch_pred(I)),
-      {FixSrc1++FixSrc2++I2, Map1, Data};
+      BranchOp = conv_branchop(BinOp),
+      HasDst = hipe_rtl:alub_has_dst(I),
+      {I2, Map3} =
+	case (not HasDst) andalso BranchOp =/= none of
+	  true ->
+	    {conv_branch(Src1, BranchOp, Src2, Cc,
+			 hipe_rtl:alub_true_label(I),
+			 hipe_rtl:alub_false_label(I),
+			 hipe_rtl:alub_pred(I)), Map1};
+	  false ->
+	    {Dst, Map2} =
+	      case HasDst of
+		false -> {new_untagged_temp(), Map1};
+		true -> conv_dst(hipe_rtl:alub_dst(I), Map1)
+	      end,
+	    I1 = [hipe_x86:mk_pseudo_jcc(Cc,
+					 hipe_rtl:alub_true_label(I),
+					 hipe_rtl:alub_false_label(I),
+					 hipe_rtl:alub_pred(I))],
+	    {conv_alu(Dst, Src1, BinOp, Src2, I1), Map2}
+	end,
+      {FixSrc1++FixSrc2++I2, Map3, Data};
     #call{} ->
       %%	push <arg1>
       %%	...
@@ -252,7 +257,9 @@ conv_insn(I, Map, Data) ->
 
 conv_alu_nocc(Dst, Src1, 'add', Src2, Tail) ->
   case (not same_opnd(Dst, Src1)) andalso (not same_opnd(Dst, Src2))
-    andalso (hipe_x86:is_temp(Src1) orelse hipe_x86:is_temp(Src2))
+    %% We could use orelse instead of xor here to generate lea T1(T2), T3, but
+    %% they seem to move coalesce so well that move+add is better for them.
+    andalso (hipe_x86:is_temp(Src1) xor hipe_x86:is_temp(Src2))
   of
     false -> conv_alu(Dst, Src1, 'add', Src2, Tail);
     true -> % Use LEA
@@ -263,6 +270,16 @@ conv_alu_nocc(Dst, Src1, 'add', Src2, Tail) ->
 	    end,
       [hipe_x86:mk_lea(Mem, Dst) | Tail]
   end;
+conv_alu_nocc(Dst, Src1, 'sub', Src2, Tail) ->
+  case (not same_opnd(Dst, Src1)) andalso hipe_x86:is_temp(Src1)
+    andalso (not hipe_x86:is_temp(Src2))
+  of
+    false -> conv_alu(Dst, Src1, 'sub', Src2, Tail);
+    true -> % Use LEA
+      Imm = hipe_x86:mk_imm(-hipe_x86:imm_value(Src2)),
+      Mem = hipe_x86:mk_mem(Src1, Imm, typeof_dst(Dst)),
+      [hipe_x86:mk_lea(Mem, Dst) | Tail]
+  end;
 conv_alu_nocc(Dst, Src1, BinOp, Src2, Tail) ->
   conv_alu(Dst, Src1, BinOp, Src2, Tail).
 
@@ -360,28 +377,41 @@ conv_shift(Dst, Src1, BinOp, Src2) ->
 %%% Finalise the conversion of a conditional branch operation, taking
 %%% care to not introduce more temps and moves than necessary.
 
-conv_branch(Src1, Cc, Src2, TrueLab, FalseLab, Pred) ->
+conv_branchop('sub') -> 'cmp';
+conv_branchop('and') ->  'test';
+conv_branchop(_) -> none.
+
+branchop_commutes('cmp') -> false;
+branchop_commutes('test') -> true.
+
+conv_branch(Src1, Op, Src2, Cc, TrueLab, FalseLab, Pred) ->
   case hipe_x86:is_imm(Src1) of
     false ->
-      mk_branch(Src1, Cc, Src2, TrueLab, FalseLab, Pred);
+      mk_branch(Src1, Op, Src2, Cc, TrueLab, FalseLab, Pred);
     true ->
       case hipe_x86:is_imm(Src2) of
 	false ->
-	  NewCc = commute_cc(Cc),
-	  mk_branch(Src2, NewCc, Src1, TrueLab, FalseLab, Pred);
+	  NewCc = case branchop_commutes(Op) of
+		    true -> Cc;
+		    false -> commute_cc(Cc)
+		  end,
+	  mk_branch(Src2, Op, Src1, NewCc, TrueLab, FalseLab, Pred);
 	true ->
 	  %% two immediates, let the optimiser clean it up
 	  Tmp = new_untagged_temp(),
 	  [hipe_x86:mk_move(Src1, Tmp) |
-	   mk_branch(Tmp, Cc, Src2, TrueLab, FalseLab, Pred)]
+	   mk_branch(Tmp, Op, Src2, Cc, TrueLab, FalseLab, Pred)]
       end
   end.
 
-mk_branch(Src1, Cc, Src2, TrueLab, FalseLab, Pred) ->
+mk_branch(Src1, Op, Src2, Cc, TrueLab, FalseLab, Pred) ->
   %% PRE: not(is_imm(Src1))
-  [hipe_x86:mk_cmp(Src2, Src1),
+  [mk_branchtest(Src1, Op, Src2),
    hipe_x86:mk_pseudo_jcc(Cc, TrueLab, FalseLab, Pred)].
 
+mk_branchtest(Src1, cmp, Src2) -> hipe_x86:mk_cmp(Src2, Src1);
+mk_branchtest(Src1, test, Src2) -> hipe_x86:mk_test(Src2, Src1).
+
 %%% Convert an RTL ALU or ALUB binary operator.
 
 conv_binop(BinOp) ->
diff --git a/lib/hipe/x86/hipe_x86.erl b/lib/hipe/x86/hipe_x86.erl
index 33d7f77cf1..45bf1ad736 100644
--- a/lib/hipe/x86/hipe_x86.erl
+++ b/lib/hipe/x86/hipe_x86.erl
@@ -37,7 +37,7 @@
 	 mk_imm_from_addr/2,
 	 mk_imm_from_atom/1,
 	 is_imm/1,
-	 %% imm_value/1,
+	 imm_value/1,
 
 	 mk_mem/3,
 	 %% is_mem/1,
@@ -201,7 +201,7 @@
 	 shift_src/1,
 	 shift_dst/1,
 
-	 %% mk_test/2,
+	 mk_test/2,
 	 test_src/1,
 	 test_dst/1,
 
@@ -218,6 +218,10 @@
 	 %% highest_temp/1
 	]).
 
+%% Other utilities
+-export([neg_cc/1
+	]).
+
 %%%
 %%% Low-level accessors.
 %%%
@@ -241,7 +245,7 @@ mk_imm_from_addr(Addr, Type) ->
 mk_imm_from_atom(Atom) ->
     mk_imm(Atom).
 is_imm(X) -> case X of #x86_imm{} -> true; _ -> false end.
-%% imm_value(#x86_imm{value=Value}) -> Value.
+imm_value(#x86_imm{value=Value}) -> Value.
 
 mk_mem(Base, Off, Type) -> #x86_mem{base=Base, off=Off, type=Type}.
 %% is_mem(X) -> case X of #x86_mem{} -> true; _ -> false end.
@@ -305,7 +309,7 @@ mk_cmp(Src, Dst) -> #cmp{src=Src, dst=Dst}.
 cmp_src(#cmp{src=Src}) -> Src.
 cmp_dst(#cmp{dst=Dst}) -> Dst.
 
-%% mk_test(Src, Dst) -> #test{src=Src, dst=Dst}.
+mk_test(Src, Dst) -> #test{src=Src, dst=Dst}.
 test_src(#test{src=Src}) -> Src.
 test_dst(#test{dst=Dst}) -> Dst.
 
diff --git a/lib/hipe/x86/hipe_x86_assemble.erl b/lib/hipe/x86/hipe_x86_assemble.erl
index e21223a5b1..e692ff0ebb 100644
--- a/lib/hipe/x86/hipe_x86_assemble.erl
+++ b/lib/hipe/x86/hipe_x86_assemble.erl
@@ -599,10 +599,20 @@ temp_to_xmm(#x86_temp{reg=Reg}) ->
   {xmm, Reg}. 
 
 -ifdef(HIPE_AMD64).
+temp_to_rm8(#x86_temp{reg=Reg}) ->
+  {rm8, ?HIPE_X86_ENCODE:rm_reg(Reg)}.
 temp_to_rm64(#x86_temp{reg=Reg}) ->
   {rm64, hipe_amd64_encode:rm_reg(Reg)}.
+-else.
+temp_to_rm8(#x86_temp{reg=Reg}) ->
+  true = ?HIPE_X86_ENCODE:reg_has_8bit(Reg),
+  {rm8, ?HIPE_X86_ENCODE:rm_reg(Reg)}.
+temp_to_rm16(#x86_temp{reg=Reg}) ->
+  {rm16, ?HIPE_X86_ENCODE:rm_reg(Reg)}.
 -endif.
 
+temp_to_rm32(#x86_temp{reg=Reg}) ->
+  {rm32, ?HIPE_X86_ENCODE:rm_reg(Reg)}.
 temp_to_rmArch(#x86_temp{reg=Reg}) ->
   {?RMArch, ?HIPE_X86_ENCODE:rm_reg(Reg)}.
 temp_to_rm64fp(#x86_temp{reg=Reg}) ->
@@ -878,15 +888,29 @@ resolve_alu_args(Src, Dst, Context) ->
 %%% test
 resolve_test_args(Src, Dst, Context) ->
   case Src of
-    #x86_imm{} -> % imm8 not allowed
-      {_ImmSize,ImmValue} = translate_imm(Src, Context, false),
-      NewDst =
-	case Dst of
-	  #x86_temp{reg=0} -> ?EAX;
-	  #x86_temp{} -> temp_to_rmArch(Dst);
-	  #x86_mem{} -> mem_to_rmArch(Dst)
-	end,
-      {NewDst, {imm32,ImmValue}};
+    %% Since we're using an 8-bit instruction, the immediate is not sign
+    %% extended. Thus, we can use immediates up to 255.
+    #x86_imm{value=ImmVal}
+      when is_integer(ImmVal), ImmVal >= 0, ImmVal =< 255 ->
+      Imm = {imm8, ImmVal},
+      case Dst of
+	#x86_temp{reg=0} -> {al, Imm};
+	#x86_temp{} -> resolve_test_imm8_reg(Imm, Dst);
+	#x86_mem{} -> {mem_to_rm8(Dst), Imm}
+      end;
+    #x86_imm{value=ImmVal} when is_integer(ImmVal), ImmVal >= 0 ->
+      {case Dst of
+	 #x86_temp{reg=0} -> eax;
+	 #x86_temp{} -> temp_to_rm32(Dst);
+	 #x86_mem{} -> mem_to_rm32(Dst)
+       end, {imm32, ImmVal}};
+    #x86_imm{} -> % Negative ImmVal; use word-sized instr, imm32
+      {_, ImmVal} = translate_imm(Src, Context, false),
+      {case Dst of
+	 #x86_temp{reg=0} -> ?EAX;
+	 #x86_temp{} -> temp_to_rmArch(Dst);
+	 #x86_mem{} -> mem_to_rmArch(Dst)
+       end, {imm32, ImmVal}};
     #x86_temp{} ->
       NewDst =
 	case Dst of
@@ -896,6 +920,18 @@ resolve_test_args(Src, Dst, Context) ->
       {NewDst, temp_to_regArch(Src)}
   end.
 
+-ifdef(HIPE_AMD64).
+resolve_test_imm8_reg(Imm, Dst) -> {temp_to_rm8(Dst), Imm}.
+-else.
+resolve_test_imm8_reg(Imm = {imm8, ImmVal}, Dst = #x86_temp{reg=Reg}) ->
+  case ?HIPE_X86_ENCODE:reg_has_8bit(Reg) of
+    true -> {temp_to_rm8(Dst), Imm};
+    false ->
+      %% Register does not exist in 8-bit version; use 16-bit instead
+      {temp_to_rm16(Dst), {imm16, ImmVal}}
+  end.
+-endif.
+
 %%% shifts
 resolve_shift_args(Src, Dst, Context) ->
   RM32 =
diff --git a/lib/hipe/x86/hipe_x86_defuse.erl b/lib/hipe/x86/hipe_x86_defuse.erl
index 4455def74e..ab26370a80 100644
--- a/lib/hipe/x86/hipe_x86_defuse.erl
+++ b/lib/hipe/x86/hipe_x86_defuse.erl
@@ -60,7 +60,7 @@ insn_def(I) ->
     #pseudo_tailcall_prepare{} -> tailcall_clobbered();
     #shift{dst=Dst} -> dst_def(Dst);
     %% call, cmp, comment, jcc, jmp_fun, jmp_label, jmp_switch, label
-    %% pseudo_jcc, pseudo_tailcall, push, ret
+    %% pseudo_jcc, pseudo_tailcall, push, ret, test
     _ -> []
   end.
 
@@ -120,6 +120,7 @@ insn_use(I) ->
     #push{src=Src} -> addtemp(Src, []);
     #ret{} -> [hipe_x86:mk_temp(?HIPE_X86_REGISTERS:?RV(), 'tagged')];
     #shift{src=Src,dst=Dst} -> addtemp(Src, addtemp(Dst, []));
+    #test{src=Src, dst=Dst} -> addtemp(Src, addtemp(Dst, []));
     %% comment, jcc, jmp_label, label, pseudo_jcc, pseudo_tailcall_prepare
     _ -> []
   end.
diff --git a/lib/hipe/x86/hipe_x86_encode.erl b/lib/hipe/x86/hipe_x86_encode.erl
index 3b7be86608..2d1663d0d6 100644
--- a/lib/hipe/x86/hipe_x86_encode.erl
+++ b/lib/hipe/x86/hipe_x86_encode.erl
@@ -65,6 +65,7 @@
 	 cc/1,
 	 % 8-bit registers
 	 %% al/0, cl/0, dl/0, bl/0, ah/0, ch/0, dh/0, bh/0,
+	 reg_has_8bit/1,
 	 % 32-bit registers
 	 %% eax/0, ecx/0, edx/0, ebx/0, esp/0, ebp/0, esi/0, edi/0,
 	 % operands
@@ -143,6 +144,8 @@ cc(g) -> ?CC_G.
 %% dh() -> ?DH.
 %% bh() -> ?BH.
 
+reg_has_8bit(Reg) -> Reg =< ?BL.
+
 %%% 32-bit registers
 
 -define(EAX, 2#000).
@@ -700,8 +703,16 @@ shd_op_sizeof(Opnds) ->
 
 test_encode(Opnds) ->
     case Opnds of
+	{al, {imm8,Imm8}} ->
+	    [16#A8, Imm8];
+	{ax, {imm16,Imm16}} ->
+	    [?PFX_OPND, 16#A9 | le16(Imm16, [])];
 	{eax, {imm32,Imm32}} ->
 	    [16#A9 | le32(Imm32, [])];
+	{{rm8,RM8}, {imm8,Imm8}} ->
+	    [16#F6 | encode_rm(RM8, 2#000, [Imm8])];
+	{{rm16,RM16}, {imm16,Imm16}} ->
+	    [?PFX_OPND, 16#F7 | encode_rm(RM16, 2#000, le16(Imm16, []))];
 	{{rm32,RM32}, {imm32,Imm32}} ->
 	    [16#F7 | encode_rm(RM32, 2#000, le32(Imm32, []))];
 	{{rm32,RM32}, {reg32,Reg32}} ->
@@ -710,8 +721,16 @@ test_encode(Opnds) ->
 
 test_sizeof(Opnds) ->
     case Opnds of
+	{al, {imm8,_}} ->
+	    1 + 1;
+	{ax, {imm16,_}} ->
+	    2 + 2;
 	{eax, {imm32,_}} ->
 	    1 + 4;
+	{{rm8,RM8}, {imm8,_}} ->
+	    1 + sizeof_rm(RM8) + 1;
+	{{rm16,RM16}, {imm16,_}} ->
+	    2 + sizeof_rm(RM16) + 2;
 	{{rm32,RM32}, {imm32,_}} ->
 	    1 + sizeof_rm(RM32) + 4;
 	{{rm32,RM32}, {reg32,_}} ->
@@ -1283,7 +1302,11 @@ dotest1(OS) ->
     t(OS,'sub',{RM32,Imm8}),
     t(OS,'sub',{RM32,Reg32}),
     t(OS,'sub',{Reg32,RM32}),
+    t(OS,'test',{al,Imm8}),
+    t(OS,'test',{ax,Imm16}),
     t(OS,'test',{eax,Imm32}),
+    t(OS,'test',{RM8,Imm8}),
+    t(OS,'test',{RM16,Imm16}),
     t(OS,'test',{RM32,Imm32}),
     t(OS,'test',{RM32,Reg32}),
     t(OS,'xor',{eax,Imm32}),
diff --git a/lib/hipe/x86/hipe_x86_frame.erl b/lib/hipe/x86/hipe_x86_frame.erl
index fc782571bf..17253ad46f 100644
--- a/lib/hipe/x86/hipe_x86_frame.erl
+++ b/lib/hipe/x86/hipe_x86_frame.erl
@@ -116,6 +116,8 @@ do_insn(I, LiveOut, Context, FPoff) ->
       {do_ret(I, Context, FPoff), context_framesize(Context)};
     #shift{} ->
       {[do_shift(I, Context, FPoff)], FPoff};
+    #test{} ->
+      {[do_test(I, Context, FPoff)], FPoff};
     _ ->	% comment, jmp, label, pseudo_jcc, pseudo_tailcall_prepare
       {[I], FPoff}
   end.
@@ -188,6 +190,12 @@ do_shift(I, Context, FPoff) ->
   Dst = conv_opnd(Dst0, FPoff, Context),
   I#shift{src=Src,dst=Dst}.
 
+do_test(I, Context, FPoff) ->
+  #test{src=Src0,dst=Dst0} = I,
+  Src = conv_opnd(Src0, FPoff, Context),
+  Dst = conv_opnd(Dst0, FPoff, Context),
+  I#test{src=Src,dst=Dst}.
+
 conv_opnd(Opnd, FPoff, Context) ->
   case opnd_is_pseudo(Opnd) of
     false ->
diff --git a/lib/hipe/x86/hipe_x86_postpass.erl b/lib/hipe/x86/hipe_x86_postpass.erl
index 4515822a34..f88a841cca 100644
--- a/lib/hipe/x86/hipe_x86_postpass.erl
+++ b/lib/hipe/x86/hipe_x86_postpass.erl
@@ -120,19 +120,15 @@ peep([#move{src=Src1, dst=Dst},
 
 %% ElimCmp0
 %% --------
-peep([C=#cmp{src=Src, dst=Dst},J=#jcc{cc=Cond, label=Lab}|Insns],Res,Lst) ->
-    case (((Src =:= #x86_imm{value=0}) or (Dst =:= #x86_imm{value=0})) and
-	  ((Cond =:= 'eq') or (Cond =:= 'neq'))) of
-	true ->
-	    Src2 = case Src of #x86_imm{value=0} -> Src; _ -> Dst end, 
-	    Cond2 = case Cond of 'eq' -> 'z'; 'neq' -> 'nz' end,
-	    Test = #test{src=Src2, dst=#x86_imm{value=0}},
-	    Jump = #jcc{cc=Cond2, label=Lab},
-	    peep(Insns, [Jump, Test|Res], [elimCmp0|Lst]);
-	_ ->
-	    peep(Insns, [J,C|Res], Lst)
-    end;
-
+peep([#cmp{src=#x86_imm{value=0}, dst=Dst=#x86_temp{}}|Insns],Res,Lst) ->
+  %% TEST leaves the adjust flag undefined, whereas CMP sets it properly (in
+  %% this case to 0). However, since HiPE does not use any instructions that
+  %% read the adjust flag, we can do this transform safely.
+  peep(Insns, [#test{src=Dst, dst=Dst} | Res], [elimCmp0_1|Lst]);
+peep([#cmp{src=Src=#x86_temp{}, dst=#x86_imm{value=0}},
+      J=#jcc{cc=Cond}|Insns],Res,Lst)
+  when Cond =:= 'e'; Cond =:= 'ne' -> % We're commuting the comparison
+  peep(Insns, [J, #test{src=Src, dst=Src} | Res], [elimCmp0_2|Lst]);
 
 %% ElimCmpTest
 %% -----------
@@ -187,6 +183,18 @@ peep([B = #alu{aluop=Op,src=#x86_imm{value=Val},dst=Dst}|Insns], Res, Lst) ->
 	    peep(Insns, [B|Res], Lst)
     end;
 
+%% LeaToAdd
+%% This rule transforms lea into add when the destination is the same as one of
+%% the operands. Sound because lea is never used where the condition codes are
+%% live (and would be clobbered by add).
+%% ----------
+peep([#lea{mem=#x86_mem{base=#x86_temp{reg=DstR},off=Src},
+	   temp=Dst=#x86_temp{reg=DstR}}|Insns], Res, Lst) ->
+     peep(Insns, [#alu{aluop='add',src=Src,dst=Dst}|Res], [leaToAdd|Lst]);
+peep([#lea{mem=#x86_mem{base=Src,off=#x86_temp{reg=DstR}},
+	   temp=Dst=#x86_temp{reg=DstR}}|Insns], Res, Lst) ->
+     peep(Insns, [#alu{aluop='add',src=Src,dst=Dst}|Res], [leaToAdd|Lst]);
+
 %% SubToDec
 %% This rule turns "subl $1,Dst; jl Lab" into "decl Dst; jl Lab", which
 %% changes reduction counter tests to use decl instead of subl.
@@ -209,6 +217,11 @@ trivial_goto_elimination(Insns) -> goto_elim(Insns, []).
 
 goto_elim([#jmp_label{label=Label}, I = #label{label=Label}|Insns], Res) ->
   goto_elim([I|Insns], Res);
+goto_elim([#jcc{cc=CC, label=Label} = IJCC,
+	   #jmp_label{label=BranchTgt},
+	   #label{label=Label} = ILBL|Insns], Res) ->
+  goto_elim([IJCC#jcc{cc=hipe_x86:neg_cc(CC), label=BranchTgt},
+	     ILBL|Insns], Res);
 goto_elim([I | Insns], Res) ->
   goto_elim(Insns, [I|Res]);
 goto_elim([], Res) ->
diff --git a/lib/hipe/x86/hipe_x86_pp.erl b/lib/hipe/x86/hipe_x86_pp.erl
index ff26a31877..942201a051 100644
--- a/lib/hipe/x86/hipe_x86_pp.erl
+++ b/lib/hipe/x86/hipe_x86_pp.erl
@@ -188,6 +188,12 @@ pp_insn(Dev, I, Pre) ->
       io:format(Dev, ", ", []),
       pp_dst(Dev, Dst),
       io:format(Dev, "\n", []);
+    #test{src=Src, dst=Dst} ->
+      io:format(Dev, "\ttest ", []),
+      pp_src(Dev, Src),
+      io:format(Dev, ", ", []),
+      pp_dst(Dev, Dst),
+      io:format(Dev, "\n", []);
     #fp_binop{src=Src, dst=Dst, op=Op} ->
       io:format(Dev, "\t~s ", [Op]),
       pp_dst(Dev, Dst),
diff --git a/lib/hipe/x86/hipe_x86_ra_finalise.erl b/lib/hipe/x86/hipe_x86_ra_finalise.erl
index edfd7b332c..1fd617570a 100644
--- a/lib/hipe/x86/hipe_x86_ra_finalise.erl
+++ b/lib/hipe/x86/hipe_x86_ra_finalise.erl
@@ -162,6 +162,10 @@ ra_insn(I, Map, FpMap) ->
       Src = ra_opnd(Src0, Map),
       Dst = ra_opnd(Dst0, Map),
       I#shift{src=Src,dst=Dst};
+    #test{src=Src0,dst=Dst0} ->
+      Src = ra_opnd(Src0, Map),
+      Dst = ra_opnd(Dst0, Map),
+      I#test{src=Src,dst=Dst};
     _ ->
       exit({?MODULE,ra_insn,I})
   end.
diff --git a/lib/hipe/x86/hipe_x86_ra_naive.erl b/lib/hipe/x86/hipe_x86_ra_naive.erl
index 35de692e07..9371e4b1a5 100644
--- a/lib/hipe/x86/hipe_x86_ra_naive.erl
+++ b/lib/hipe/x86/hipe_x86_ra_naive.erl
@@ -100,6 +100,8 @@ do_insn(I) ->	% Insn -> Insn list
       do_fp_binop(I);
     #shift{} ->
       do_shift(I);
+    #test{} ->
+      do_test(I);
     #label{} ->
       [I];
     #pseudo_jcc{} ->
@@ -310,6 +312,11 @@ do_shift(I) ->
       FixDst ++ [I#shift{dst=Dst}]
   end.
 
+do_test(I) ->
+  #test{src=Src0,dst=Dst0} = I,
+  {FixSrc, Src, FixDst, Dst} = do_binary(Src0, Dst0),
+  FixSrc ++ FixDst ++ [I#test{src=Src,dst=Dst}].
+
 %%% Fix the operands of a binary op.
 %%% 1. remove pseudos from any explicit memory operands
 %%% 2. if both operands are (implicit or explicit) memory operands,
diff --git a/lib/hipe/x86/hipe_x86_ra_postconditions.erl b/lib/hipe/x86/hipe_x86_ra_postconditions.erl
index f496b71828..e7c397b5b7 100644
--- a/lib/hipe/x86/hipe_x86_ra_postconditions.erl
+++ b/lib/hipe/x86/hipe_x86_ra_postconditions.erl
@@ -83,6 +83,8 @@ do_insn(I, TempMap, Strategy) ->	% Insn -> {Insn list, DidSpill}
       do_fmove(I, TempMap, Strategy);
     #shift{} ->
       do_shift(I, TempMap, Strategy);
+    #test{} ->
+      do_test(I, TempMap, Strategy);
     _ ->
       %% comment, jmp*, label, pseudo_call, pseudo_jcc, pseudo_tailcall,
       %% pseudo_tailcall_prepare, push, ret
@@ -308,6 +310,14 @@ do_shift(I, TempMap, Strategy) ->
       {FixDst ++ [I#shift{dst=Dst}], DidSpill}
   end.
 
+%%% Fix a test op.
+
+do_test(I, TempMap, Strategy) ->
+  #test{src=Src0,dst=Dst0} = I,
+  {FixSrc, Src, FixDst, Dst, DidSpill} =
+    do_binary(Src0, Dst0, TempMap, Strategy),
+  {FixSrc ++ FixDst ++ [I#test{src=Src,dst=Dst}], DidSpill}.
+
 %%% Fix the operands of a binary op.
 %%% 1. remove pseudos from any explicit memory operands
 %%% 2. if both operands are (implicit or explicit) memory operands,