Merge branch 'margnus1/hipe/refactor-rtl/PR-1243'

* margnus1/hipe/refactor-rtl/PR-1243: hipe_x86: Fix encoding of test instr w/ neg imm hipe_tagscheme: Simplify test_two_fixnums with imm hipe_icode: Always const-propagate if&call args hipe_tagscheme: x86 lea+test for mask_and_compare hipe_tagscheme: Improve fixnum_addsub with imm hipe: Make realloc_binary fast case true branch hipe_x86_postpass: Negate conditions in goto elim hipe_tagscheme: Improve fixnum add on x86 hipe_rtl_to_x86: Use LEA only for immediate adds hipe_x86: LeaToAdd peephole rule hipe_sparc: move coalescing hipe_ppc: better rlwinm pp hipe_ppc: move coalescing hipe_rtl: drop alub dest when unused hipe_rtl: unify branch and alub hipe_x86: Fix&activate ElimCmp0 peephole rule hipe_{x86,amd64}: Finish test instr implementation
author: Sverker Eriksson <[email protected]> 2016-11-22 12:02:07 +0100
committer: Sverker Eriksson <[email protected]> 2016-11-22 12:02:07 +0100
commit: 3d7b55f946162b5a129241dbe67397784a1ba1a5 (patch)
tree: 8a3809296bdfcdd16ebbf78975ea18034b22d62c
parent: 9491f6727f12e37241863bd5becbd1f336ff7659 (diff)
parent: fda3c9575d77bed0250f76f17e92d18836e15d0c (diff)
download: otp-3d7b55f946162b5a129241dbe67397784a1ba1a5.tar.gz
otp-3d7b55f946162b5a129241dbe67397784a1ba1a5.tar.bz2
otp-3d7b55f946162b5a129241dbe67397784a1ba1a5.zip
30 files changed, 598 insertions, 537 deletions
diff --git a/lib/hipe/amd64/hipe_amd64_encode.erl b/lib/hipe/amd64/hipe_amd64_encode.erl
index df15732cea..16bd705055 100644
--- a/lib/hipe/amd64/hipe_amd64_encode.erl
+++ b/lib/hipe/amd64/hipe_amd64_encode.erl
@@ -63,7 +63,7 @@
 -export([% condition codes
 	 cc/1,
 	 % 8-bit registers
-	 %% al/0, cl/0, dl/0, bl/0, ah/0, ch/0, dh/0, bh/0,
+	 %% al/0, cl/0, dl/0, bl/0,
 	 % 32-bit registers
 	 %% eax/0, ecx/0, edx/0, ebx/0, esp/0, ebp/0, esi/0, edi/0,
 	 % operands
@@ -127,19 +127,15 @@ cc(g) -> ?CC_G.
 -define(CL, 2#001).
 -define(DL, 2#010).
 -define(BL, 2#011).
--define(AH, 2#100).
--define(CH, 2#101).
--define(DH, 2#110).
--define(BH, 2#111).
+-define(SPL, 2#100).
+-define(BPL, 2#101).
+-define(SIL, 2#110).
+-define(DIL, 2#111).
 
 %% al() -> ?AL.
 %% cl() -> ?CL.
 %% dl() -> ?DL.
 %% bl() -> ?BL.
-%% ah() -> ?AH.
-%% ch() -> ?CH.
-%% dh() -> ?DH.
-%% bh() -> ?BH.
 
 %%% 32-bit registers
 
@@ -208,6 +204,7 @@ rex_([]) -> 0;
 rex_([{r8, Reg8}| Rest]) ->             % 8 bit registers
     case Reg8 of
 	{rm_mem, _} -> rex_(Rest);
+	{rm_reg, R} -> rex_([{r8, R} | Rest]);
 	4 -> (1 bsl 8) bor rex_(Rest);
 	5 -> (1 bsl 8) bor rex_(Rest);
 	6 -> (1 bsl 8) bor rex_(Rest);
@@ -825,12 +822,26 @@ shd_op_encode(Opcode, Opnds) ->
 
 test_encode(Opnds) ->
     case Opnds of
+	{al, {imm8,Imm8}} ->
+	    [16#A8, Imm8];
+	{ax, {imm16,Imm16}} ->
+	    [?PFX_OPND_16BITS, 16#A9 | le16(Imm16, [])];
 	{eax, {imm32,Imm32}} ->
 	    [16#A9 | le32(Imm32, [])];
+	{rax, {imm32,Imm32}} ->
+	    [rex([{w,1}]), 16#A9 | le32(Imm32, [])];
+	{{rm8,RM8}, {imm8,Imm8}} ->
+	    [rex([{r8,RM8}]), 16#F6 | encode_rm(RM8, 2#000, [Imm8])];
+	{{rm16,RM16}, {imm16,Imm16}} ->
+	    [?PFX_OPND_16BITS, 16#F7 | encode_rm(RM16, 2#000, le16(Imm16, []))];
 	{{rm32,RM32}, {imm32,Imm32}} ->
 	    [16#F7 | encode_rm(RM32, 2#000, le32(Imm32, []))];
+	{{rm64,RM64}, {imm32,Imm32}} ->
+	    [rex([{w,1}]), 16#F7 | encode_rm(RM64, 2#000, le32(Imm32, []))];
 	{{rm32,RM32}, {reg32,Reg32}} ->
-	    [16#85 | encode_rm(RM32, Reg32, [])]
+	    [16#85 | encode_rm(RM32, Reg32, [])];
+	{{rm64,RM64}, {reg64,Reg64}} ->
+	    [rex([{w,1}]), 16#85 | encode_rm(RM64, Reg64, [])]
     end.
 
 %% test_sizeof(Opnds) ->
@@ -1309,18 +1320,21 @@ dotest1(OS) ->
     Imm32 = {imm32,Word32},
     Imm16 = {imm16,Word16},
     Imm8 = {imm8,Word8},
+    RM64 = {rm64,rm_reg(?EDX)},
     RM32 = {rm32,rm_reg(?EDX)},
     RM16 = {rm16,rm_reg(?EDX)},
     RM8 = {rm8,rm_reg(?EDX)},
+    RM8REX = {rm8,rm_reg(?SIL)},
     Rel32 = {rel32,Word32},
     Rel8 = {rel8,Word8},
     Moffs32 = {moffs32,Word32},
     Moffs16 = {moffs16,Word32},
     Moffs8 = {moffs8,Word32},
     CC = {cc,?CC_G},
+    Reg64 = {reg64,?EAX},
     Reg32 = {reg32,?EAX},
     Reg16 = {reg16,?EAX},
-    Reg8 = {reg8,?AH},
+    Reg8 = {reg8,?SPL},
     EA = {ea,ea_base(?ECX)},
     % exercise each instruction definition
     t(OS,'adc',{eax,Imm32}),
@@ -1465,9 +1479,17 @@ dotest1(OS) ->
     t(OS,'sub',{RM32,Imm8}),
     t(OS,'sub',{RM32,Reg32}),
     t(OS,'sub',{Reg32,RM32}),
+    t(OS,'test',{al,Imm8}),
+    t(OS,'test',{ax,Imm16}),
     t(OS,'test',{eax,Imm32}),
+    t(OS,'test',{rax,Imm32}),
+    t(OS,'test',{RM8,Imm8}),
+    t(OS,'test',{RM8REX,Imm8}),
+    t(OS,'test',{RM16,Imm16}),
     t(OS,'test',{RM32,Imm32}),
+    t(OS,'test',{RM64,Imm32}),
     t(OS,'test',{RM32,Reg32}),
+    t(OS,'test',{RM64,Reg64}),
     t(OS,'xor',{eax,Imm32}),
     t(OS,'xor',{RM32,Imm32}),
     t(OS,'xor',{RM32,Imm8}),
diff --git a/lib/hipe/arm/hipe_rtl_to_arm.erl b/lib/hipe/arm/hipe_rtl_to_arm.erl
index 2f9181d517..c964c222aa 100644
--- a/lib/hipe/arm/hipe_rtl_to_arm.erl
+++ b/lib/hipe/arm/hipe_rtl_to_arm.erl
@@ -62,7 +62,6 @@ conv_insn(I, Map, Data) ->
   case I of
     #alu{} -> conv_alu(I, Map, Data);
     #alub{} -> conv_alub(I, Map, Data);
-    #branch{} -> conv_branch(I, Map, Data);
     #call{} -> conv_call(I, Map, Data);
     #comment{} -> conv_comment(I, Map, Data);
     #enter{} -> conv_enter(I, Map, Data);
@@ -111,6 +110,17 @@ commute_arithop(ArithOp) ->
     _ -> ArithOp
   end.
 
+conv_cmpop('add') -> 'cmn';
+conv_cmpop('sub') -> 'cmp';
+conv_cmpop('and') -> 'tst';
+conv_cmpop('xor') -> 'teq';
+conv_cmpop(_) -> none.
+
+cmpop_commutes('cmp') -> false;
+cmpop_commutes('cmn') -> true;
+cmpop_commutes('tst') -> true;
+cmpop_commutes('teq') -> true.
+
 mk_alu(S, Dst, Src1, RtlAluOp, Src2) ->
   case hipe_rtl:is_shift_op(RtlAluOp) of
     true ->
@@ -223,71 +233,77 @@ fix_aluop_imm(AluOp, Imm) -> % {FixAm1,NewAluOp,Am1}
 
 conv_alub(I, Map, Data) ->
   %% dst = src1 aluop src2; if COND goto label
-  {Dst, Map0} = conv_dst(hipe_rtl:alub_dst(I), Map),
-  {Src1, Map1} = conv_src(hipe_rtl:alub_src1(I), Map0),
-  {Src2, Map2} = conv_src(hipe_rtl:alub_src2(I), Map1),
+  {Src1, Map0} = conv_src(hipe_rtl:alub_src1(I), Map),
+  {Src2, Map1} = conv_src(hipe_rtl:alub_src2(I), Map0),
   RtlAluOp = hipe_rtl:alub_op(I),
-  Cond0 = conv_alub_cond(RtlAluOp, hipe_rtl:alub_cond(I)),
-  Cond =
-    case {RtlAluOp,Cond0} of
-      {'mul','vs'} -> 'ne';	% overflow becomes not-equal
-      {'mul','vc'} -> 'eq';	% no-overflow becomes equal
-      {'mul',_} -> exit({?MODULE,I});
-      {_,_} -> Cond0
-    end,
-  I2 = mk_pseudo_bc(
-	  Cond,
-	  hipe_rtl:alub_true_label(I),
-	  hipe_rtl:alub_false_label(I),
-	  hipe_rtl:alub_pred(I)),
-  S = true,
-  I1 = mk_alu(S, Dst, Src1, RtlAluOp, Src2),
-  {I1 ++ I2, Map2, Data}.
-
-conv_branch(I, Map, Data) ->
-  %% <unused> = src1 - src2; if COND goto label
-  {Src1, Map0} = conv_src(hipe_rtl:branch_src1(I), Map),
-  {Src2, Map1} = conv_src(hipe_rtl:branch_src2(I), Map0),
-  Cond = conv_branch_cond(hipe_rtl:branch_cond(I)),
-  I2 = mk_branch(Src1, Cond, Src2,
-		 hipe_rtl:branch_true_label(I),
-		 hipe_rtl:branch_false_label(I),
-		 hipe_rtl:branch_pred(I)),
-  {I2, Map1, Data}.
+  RtlCond = hipe_rtl:alub_cond(I),
+  HasDst = hipe_rtl:alub_has_dst(I),
+  CmpOp = conv_cmpop(RtlAluOp),
+  Cond0 = conv_alub_cond(RtlAluOp, RtlCond),
+  case (not HasDst) andalso CmpOp =/= none of
+    true ->
+      I1 = mk_branch(Src1, CmpOp, Src2, Cond0,
+		     hipe_rtl:alub_true_label(I),
+		     hipe_rtl:alub_false_label(I),
+		     hipe_rtl:alub_pred(I)),
+      {I1, Map1, Data};
+    false ->
+      {Dst, Map2} =
+	case HasDst of
+	  false -> {new_untagged_temp(), Map1};
+	  true -> conv_dst(hipe_rtl:alub_dst(I), Map1)
+	end,
+      Cond =
+	case {RtlAluOp,Cond0} of
+	  {'mul','vs'} -> 'ne';	% overflow becomes not-equal
+	  {'mul','vc'} -> 'eq';	% no-overflow becomes equal
+	  {'mul',_} -> exit({?MODULE,I});
+	  {_,_} -> Cond0
+	end,
+      I2 = mk_pseudo_bc(
+	     Cond,
+	     hipe_rtl:alub_true_label(I),
+	     hipe_rtl:alub_false_label(I),
+	     hipe_rtl:alub_pred(I)),
+      S = true,
+      I1 = mk_alu(S, Dst, Src1, RtlAluOp, Src2),
+      {I1 ++ I2, Map2, Data}
+  end.
 
-mk_branch(Src1, Cond, Src2, TrueLab, FalseLab, Pred) ->
+mk_branch(Src1, CmpOp, Src2, Cond, TrueLab, FalseLab, Pred) ->
   case hipe_arm:is_temp(Src1) of
     true ->
       case hipe_arm:is_temp(Src2) of
 	true ->
-	  mk_branch_rr(Src1, Src2, Cond, TrueLab, FalseLab, Pred);
+	  mk_branch_rr(Src1, CmpOp, Src2, Cond, TrueLab, FalseLab, Pred);
 	_ ->
-	  mk_branch_ri(Src1, Cond, Src2, TrueLab, FalseLab, Pred)
+	  mk_branch_ri(Src1, CmpOp, Src2, Cond, TrueLab, FalseLab, Pred)
       end;
     _ ->
       case hipe_arm:is_temp(Src2) of
 	true ->
-	  NewCond = commute_cond(Cond),
-	  mk_branch_ri(Src2, NewCond, Src1, TrueLab, FalseLab, Pred);
+	  NewCond =
+	    case cmpop_commutes(CmpOp) of
+	      true -> Cond;
+	      false ->  commute_cond(Cond)
+	    end,
+	  mk_branch_ri(Src2, CmpOp, Src1, NewCond, TrueLab, FalseLab, Pred);
 	_ ->
-	  mk_branch_ii(Src1, Cond, Src2, TrueLab, FalseLab, Pred)
+	  mk_branch_ii(Src1, CmpOp, Src2, Cond, TrueLab, FalseLab, Pred)
       end
   end.
 
-mk_branch_ii(Imm1, Cond, Imm2, TrueLab, FalseLab, Pred) ->
+mk_branch_ii(Imm1, CmpOp, Imm2, Cond, TrueLab, FalseLab, Pred) ->
   Tmp = new_untagged_temp(),
   mk_li(Tmp, Imm1,
-	mk_branch_ri(Tmp, Cond, Imm2,
+	mk_branch_ri(Tmp, CmpOp, Imm2, Cond,
 		     TrueLab, FalseLab, Pred)).
 
-mk_branch_ri(Src, Cond, Imm, TrueLab, FalseLab, Pred) ->
-  {FixAm1,NewCmpOp,Am1} = fix_aluop_imm('cmp', Imm),
-  FixAm1 ++ mk_cmp_bc(NewCmpOp, Src, Am1, Cond, TrueLab, FalseLab, Pred).
-
-mk_branch_rr(Src1, Src2, Cond, TrueLab, FalseLab, Pred) ->
-  mk_cmp_bc('cmp', Src1, Src2, Cond, TrueLab, FalseLab, Pred).
+mk_branch_ri(Src, CmpOp, Imm, Cond, TrueLab, FalseLab, Pred) ->
+  {FixAm1,NewCmpOp,Am1} = fix_aluop_imm(CmpOp, Imm),
+  FixAm1 ++ mk_branch_rr(Src, NewCmpOp, Am1, Cond, TrueLab, FalseLab, Pred).
 
-mk_cmp_bc(CmpOp, Src, Am1, Cond, TrueLab, FalseLab, Pred) ->
+mk_branch_rr(Src, CmpOp, Am1, Cond, TrueLab, FalseLab, Pred) ->
   [hipe_arm:mk_cmp(CmpOp, Src, Am1) |
    mk_pseudo_bc(Cond, TrueLab, FalseLab, Pred)].
 
@@ -637,6 +653,7 @@ conv_alub_cond(RtlAluOp, Cond) ->	% may be unsigned, depends on aluop
   case {RtlAluOp, Cond} of	% handle allowed alub unsigned conditions
     {'add', 'ltu'} -> 'hs';	% add+ltu == unsigned overflow == carry set == hs
     %% add more cases when needed
+    {'sub', _} -> conv_branch_cond(Cond);
     _ -> conv_cond(Cond)
   end.
 
diff --git a/lib/hipe/icode/hipe_icode.erl b/lib/hipe/icode/hipe_icode.erl
index 78508dff22..d2d08e0253 100644
--- a/lib/hipe/icode/hipe_icode.erl
+++ b/lib/hipe/icode/hipe_icode.erl
@@ -438,6 +438,7 @@
 	 if_true_label/1,
 	 if_false_label/1,
 	 if_args/1,
+	 if_args_update/2,
 	 if_pred/1,
 	 %% is_if/1,
 	 
@@ -594,6 +595,7 @@
 	 uses/1,
 	 defines/1,
 	 is_safe/1,
+	 reduce_unused/1,
 	 strip_comments/1,
 	 subst/2,
 	 subst_uses/2,
@@ -713,6 +715,9 @@ if_op_update(IF, NewOp) -> IF#icode_if{op=NewOp}.
 -spec if_args(#icode_if{}) -> [icode_term_arg()].
 if_args(#icode_if{args=Args}) -> Args.
 
+-spec if_args_update(#icode_if{}, [icode_term_arg()]) -> #icode_if{}.
+if_args_update(IF, Args) -> IF#icode_if{args=Args}.
+
 -spec if_true_label(#icode_if{}) -> icode_lbl().
 if_true_label(#icode_if{true_label=TrueLbl}) -> TrueLbl.
 
@@ -1765,6 +1770,18 @@ is_safe(Instr) ->
     #icode_end_try{} -> false
   end.
 
+%% @doc Produces a simplified instruction sequence that is equivalent to [Instr]
+%% under the assumption that all results of Instr are unused, or 'false' if
+%% there is no such sequence (other than [Instr] itself).
+
+-spec reduce_unused(icode_instr()) -> false | [icode_instr()].
+
+reduce_unused(Instr) ->
+  case is_safe(Instr) of
+    true -> [];
+    false -> false
+  end.
+
 %%-----------------------------------------------------------------------
 
 -spec highest_var(icode_instrs()) -> non_neg_integer().
diff --git a/lib/hipe/icode/hipe_icode_ssa_const_prop.erl b/lib/hipe/icode/hipe_icode_ssa_const_prop.erl
index 4ab4d7e95d..5d3d5413bc 100644
--- a/lib/hipe/icode/hipe_icode_ssa_const_prop.erl
+++ b/lib/hipe/icode/hipe_icode_ssa_const_prop.erl
@@ -97,11 +97,13 @@ visit_expression(Instruction, Environment) ->
       visit_begin_handler     (Instruction, EvaluatedArguments, Environment);
     #icode_begin_try{} ->
       visit_begin_try         (Instruction, EvaluatedArguments, Environment);
-    #icode_fail{} ->                
+    #icode_fail{} ->
       visit_fail              (Instruction, EvaluatedArguments, Environment);
-    _ ->
-      %% label, end_try, comment, return,
-      {[], [], Environment}
+    #icode_comment{} -> {[], [], Environment};
+    #icode_end_try{} -> {[], [], Environment};
+    #icode_enter{} ->   {[], [], Environment};
+    #icode_label{} ->   {[], [], Environment};
+    #icode_return{} ->  {[], [], Environment}
   end.
 
 %%-----------------------------------------------------------------------------
@@ -463,11 +465,15 @@ update_instruction(Instruction, Environment) ->
       update_type(Instruction, Environment);
     #icode_switch_tuple_arity{} ->
       update_switch_tuple_arity(Instruction, Environment);
-    _ ->
-      %% goto, comment, label, return, begin_handler, end_try,
-      %% begin_try, fail
-      %% We could but don't handle: catch?, fail?
-      [Instruction]
+    %% We could but don't handle: catch?, fail?
+    #icode_begin_handler{} -> [Instruction];
+    #icode_begin_try{} ->     [Instruction];
+    #icode_comment{} ->       [Instruction];
+    #icode_end_try{} ->       [Instruction];
+    #icode_fail{} ->          [Instruction];
+    #icode_goto{} ->          [Instruction];
+    #icode_label{} ->         [Instruction];
+    #icode_return{} ->        [Instruction]
   end.
 
 %%-----------------------------------------------------------------------------
@@ -502,14 +508,12 @@ update_call(Instruction, Environment) ->
 			  [Instruction, NewInstructions]),
 	  NewInstructions
       end;
-%%     %% [] ->  %% No destination; we don't touch this
-%%     [] -> 
-%%       NewArguments = update_arguments(hipe_icode:call_args(Instruction),
-%%                                       Environment),
-%%       [hipe_icode:call_args_update(Instruction, NewArguments)];
+    %% [] ->  %% No destination; we don't touch this
     %% List-> %% Means register allocation; not implemented at this point
     _ ->
-      [Instruction]
+      NewArguments = update_arguments(hipe_icode:call_args(Instruction),
+                                      Environment),
+      [hipe_icode:call_args_update(Instruction, NewArguments)]
   end.
 
 %%-----------------------------------------------------------------------------
@@ -574,7 +578,9 @@ update_if(Instruction, Environment) ->
       %% Convert the if-test to a type test if possible.
       Op = hipe_icode:if_op(Instruction),
       case Op =:= '=:=' orelse Op =:= '=/=' of
-	false -> [Instruction];
+	false ->
+	  [hipe_icode:if_args_update(
+	     Instruction, update_arguments(Args, Environment))];
 	true ->
 	  [Arg1, Arg2] = Args,
 	  case EvaluatedArguments of
@@ -604,8 +610,9 @@ conv_if_to_type(I, Const, Arg) when is_atom(Const);
   NewI = hipe_icode:mk_type([Arg], Test, T, F),
   ?CONST_PROP_MSG("if: ~w ---> type ~w\n", [I, NewI]),
   [NewI];
-conv_if_to_type(I, _, _) ->
-  [I].
+conv_if_to_type(I, Const, Arg) ->
+  %% Note: we are potentially commuting the (equality) comparison here
+  [hipe_icode:if_args_update(I, [Arg, hipe_icode:mk_const(Const)])].
 
 %%-----------------------------------------------------------------------------
 
diff --git a/lib/hipe/llvm/hipe_rtl_to_llvm.erl b/lib/hipe/llvm/hipe_rtl_to_llvm.erl
index 20813f8bd7..f8911c1909 100644
--- a/lib/hipe/llvm/hipe_rtl_to_llvm.erl
+++ b/lib/hipe/llvm/hipe_rtl_to_llvm.erl
@@ -156,9 +156,6 @@ translate_instr(I, Relocs, Data) ->
     #alub{} ->
       {I2, Relocs2} = trans_alub(I, Relocs),
       {I2, Relocs2, Data};
-    #branch{} ->
-      {I2, Relocs2} = trans_branch(I, Relocs),
-      {I2, Relocs2, Data};
     #call{} ->
       {I2, Relocs2} =
         case hipe_rtl:call_fun(I) of
@@ -255,7 +252,6 @@ trans_alub(I, Relocs) ->
 trans_alub_overflow(I, Sign, Relocs) ->
   {Src1, I1} = trans_src(hipe_rtl:alub_src1(I)),
   {Src2, I2} = trans_src(hipe_rtl:alub_src2(I)),
-  RtlDst = hipe_rtl:alub_dst(I),
   TmpDst = mk_temp(),
   Name = trans_alub_op(I, Sign),
   NewRelocs = relocs_store(Name, {call, remote, {llvm, Name, 2}}, Relocs),
@@ -266,7 +262,10 @@ trans_alub_overflow(I, Sign, Relocs) ->
 			                   [{WordTy, Src1}, {WordTy, Src2}], []),
   %% T1{0}: result of the operation
   I4 = hipe_llvm:mk_extractvalue(TmpDst, ReturnType, T1 , "0", []),
-  I5 = store_stack_dst(TmpDst, RtlDst),
+  I5 = case hipe_rtl:alub_has_dst(I) of
+	 false -> [];
+	 true -> store_stack_dst(TmpDst, hipe_rtl:alub_dst(I))
+       end,
   T2 = mk_temp(),
   %% T1{1}: Boolean variable indicating overflow
   I6 = hipe_llvm:mk_extractvalue(T2, ReturnType, T1, "1", []),
@@ -311,42 +310,35 @@ trans_alub_op(I, Sign) ->
   Name ++ Type.
 
 trans_alub_no_overflow(I, Relocs) ->
+  {Src1, I1} = trans_src(hipe_rtl:alub_src1(I)),
+  {Src2, I2} = trans_src(hipe_rtl:alub_src2(I)),
+  WordTy = hipe_llvm:mk_int(?BITS_IN_WORD),
   %% alu
-  T = hipe_rtl:mk_alu(hipe_rtl:alub_dst(I), hipe_rtl:alub_src1(I),
-                      hipe_rtl:alub_op(I), hipe_rtl:alub_src2(I)),
-  %% A trans_alu instruction cannot change relocations
-  {I1, _} = trans_alu(T, Relocs),
+  {CmpLhs, CmpRhs, I5, Cond} =
+    case {hipe_rtl:alub_has_dst(I), hipe_rtl:alub_op(I)} of
+      {false, 'sub'} ->
+	Cond0 = trans_branch_rel_op(hipe_rtl:alub_cond(I)),
+	{Src1, Src2, [], Cond0};
+      {HasDst, AlubOp} ->
+	TmpDst = mk_temp(),
+	Op = trans_op(AlubOp),
+	I3 = hipe_llvm:mk_operation(TmpDst, Op, WordTy, Src1, Src2, []),
+	I4 = case HasDst of
+	       false -> [];
+	       true -> store_stack_dst(TmpDst, hipe_rtl:alub_dst(I))
+	     end,
+	Cond0 = trans_alub_rel_op(hipe_rtl:alub_cond(I)),
+	{TmpDst, "0", [I4, I3], Cond0}
+    end,
   %% icmp
-  %% Translate destination as src, to match with the semantics of instruction
-  {Dst, I2} = trans_src(hipe_rtl:alub_dst(I)),
-  Cond = trans_rel_op(hipe_rtl:alub_cond(I)),
   T3 = mk_temp(),
-  WordTy = hipe_llvm:mk_int(?BITS_IN_WORD),
-  I5 = hipe_llvm:mk_icmp(T3, Cond, WordTy, Dst, "0"),
+  I6 = hipe_llvm:mk_icmp(T3, Cond, WordTy, CmpLhs, CmpRhs),
   %% br
   Metadata = branch_metadata(hipe_rtl:alub_pred(I)),
   True_label = mk_jump_label(hipe_rtl:alub_true_label(I)),
   False_label = mk_jump_label(hipe_rtl:alub_false_label(I)),
-  I6 = hipe_llvm:mk_br_cond(T3, True_label, False_label, Metadata),
-  {[I6, I5, I2, I1], Relocs}.
-
-%%
-%% branch
-%%
-trans_branch(I, Relocs) ->
-  {Src1, I1} = trans_src(hipe_rtl:branch_src1(I)),
-  {Src2, I2} = trans_src(hipe_rtl:branch_src2(I)),
-  Cond = trans_rel_op(hipe_rtl:branch_cond(I)),
-  %% icmp
-  T1 = mk_temp(),
-  WordTy = hipe_llvm:mk_int(?BITS_IN_WORD),
-  I3 = hipe_llvm:mk_icmp(T1, Cond, WordTy, Src1, Src2),
-  %% br
-  True_label = mk_jump_label(hipe_rtl:branch_true_label(I)),
-  False_label = mk_jump_label(hipe_rtl:branch_false_label(I)),
-  Metadata = branch_metadata(hipe_rtl:branch_pred(I)),
-  I4 = hipe_llvm:mk_br_cond(T1, True_label, False_label, Metadata),
-  {[I4, I3, I2, I1], Relocs}.
+  I7 = hipe_llvm:mk_br_cond(T3, True_label, False_label, Metadata),
+  {[I7, I6, I5, I2, I1], Relocs}.
 
 branch_metadata(X) when X =:= 0.5 -> [];
 branch_metadata(X) when X > 0.5 -> ?BRANCH_META_TAKEN;
@@ -1162,7 +1154,7 @@ trans_dst(A) ->
 		       true ->
 			 "%DL" ++ integer_to_list(hipe_rtl:const_label_label(A)) ++ "_var";
 		       false ->
-			 exit({?MODULE, trans_dst, {"Bad RTL argument",A}})
+			 error(badarg, [A])
 		     end
 		 end
 	     end,
@@ -1260,14 +1252,19 @@ trans_op(Op) ->
     Other -> exit({?MODULE, trans_op, {"Unknown RTL operator", Other}})
   end.
 
-trans_rel_op(Op) ->
+trans_branch_rel_op(Op) ->
   case Op of
-    eq -> eq;
-    ne -> ne;
     gtu -> ugt;
     geu -> uge;
     ltu -> ult;
     leu -> ule;
+    _ -> trans_alub_rel_op(Op)
+  end.
+
+trans_alub_rel_op(Op) ->
+  case Op of
+    eq -> eq;
+    ne -> ne;
     gt -> sgt;
     ge -> sge;
     lt -> slt;
@@ -1300,7 +1297,10 @@ insn_dst(I) ->
     #alu{} ->
       [hipe_rtl:alu_dst(I)];
     #alub{} ->
-      [hipe_rtl:alub_dst(I)];
+      case hipe_rtl:alub_has_dst(I) of
+	true -> [hipe_rtl:alub_dst(I)];
+	false -> []
+      end;
     #call{} ->
       case hipe_rtl:call_dstlist(I) of
         [] -> [];
diff --git a/lib/hipe/ppc/hipe_ppc_frame.erl b/lib/hipe/ppc/hipe_ppc_frame.erl
index 8d37159ad8..58924409a8 100644
--- a/lib/hipe/ppc/hipe_ppc_frame.erl
+++ b/lib/hipe/ppc/hipe_ppc_frame.erl
@@ -98,7 +98,10 @@ do_pseudo_move(I, Context, FPoff) ->
 	  Offset = pseudo_offset(Src, FPoff, Context),
 	  mk_load(hipe_ppc:ldop_word(), Dst, Offset, mk_sp(), []);
 	_ ->
-	  [hipe_ppc:mk_alu('or', Dst, Src, Src)]
+	  case hipe_ppc:temp_reg(Dst) =:= hipe_ppc:temp_reg(Src) of
+	    true -> [];
+	    false -> [hipe_ppc:mk_alu('or', Dst, Src, Src)]
+	  end
       end
   end.
 
diff --git a/lib/hipe/ppc/hipe_ppc_pp.erl b/lib/hipe/ppc/hipe_ppc_pp.erl
index e69e6b64a2..0ff7a76bce 100644
--- a/lib/hipe/ppc/hipe_ppc_pp.erl
+++ b/lib/hipe/ppc/hipe_ppc_pp.erl
@@ -170,6 +170,12 @@ pp_insn(Dev, I, Pre) ->
       io:format(Dev, ", ", []),
       pp_temp(Dev, Base2),
       io:format(Dev, "\n", []);
+    #unary{unop={UnOp,I1,I2,I3}, dst=Dst, src=Src} ->
+      io:format(Dev, "\t~s ", [UnOp]),
+      pp_temp(Dev, Dst),
+      io:format(Dev, ", ", []),
+      pp_temp(Dev, Src),
+      io:format(Dev, ", ~s, ~s, ~s\n", [to_hex(I1),to_hex(I2),to_hex(I3)]);
     #unary{unop=UnOp, dst=Dst, src=Src} ->
       io:format(Dev, "\t~w ", [unop_name(UnOp)]),
       pp_temp(Dev, Dst),
diff --git a/lib/hipe/ppc/hipe_rtl_to_ppc.erl b/lib/hipe/ppc/hipe_rtl_to_ppc.erl
index a01e67a789..09f1ce5a49 100644
--- a/lib/hipe/ppc/hipe_rtl_to_ppc.erl
+++ b/lib/hipe/ppc/hipe_rtl_to_ppc.erl
@@ -80,7 +80,6 @@ conv_insn(I, Map, Data) ->
   case I of
     #alu{} -> conv_alu(I, Map, Data);
     #alub{} -> conv_alub(I, Map, Data);
-    #branch{} -> conv_branch(I, Map, Data);
     #call{} -> conv_call(I, Map, Data);
     #comment{} -> conv_comment(I, Map, Data);
     #enter{} -> conv_enter(I, Map, Data);
@@ -441,36 +440,53 @@ mk_alu_rr(Dst, Src1, RtlAluOp, Src2) ->
 
 conv_alub(I, Map, Data) ->
   %% dst = src1 aluop src2; if COND goto label
-  {Dst, Map0} = conv_dst(hipe_rtl:alub_dst(I), Map),
-  {Src1, Map1} = conv_src(hipe_rtl:alub_src1(I), Map0),
-  {Src2, Map2} = conv_src(hipe_rtl:alub_src2(I), Map1),
-  {AluOp, BCond} =
-    case {hipe_rtl:alub_op(I), hipe_rtl:alub_cond(I)} of
-      {'add', 'ltu'} ->
-	{'addc', 'eq'};
-      {RtlAlubOp, RtlAlubCond} ->
-	{conv_alub_op(RtlAlubOp), conv_alub_cond(RtlAlubCond)}
-    end,
-  BC = mk_pseudo_bc(BCond,
-		    hipe_rtl:alub_true_label(I),
-		    hipe_rtl:alub_false_label(I),
-		    hipe_rtl:alub_pred(I)),
-  I2 =
-    case {AluOp, BCond} of
-      {'addc', 'eq'} ->	% copy XER[CA] to CR0[EQ] before the BC
-	TmpR = new_untagged_temp(),
-	[hipe_ppc:mk_mfspr(TmpR, 'xer'),
-	 hipe_ppc:mk_mtcr(TmpR) |
-	 BC];
-      _ -> BC
-    end,
-  {NewSrc1, NewSrc2} =
-    case AluOp of
-      'subf' -> {Src2, Src1};
-      _ -> {Src1, Src2}
-    end,
-  I1 = mk_alub(Dst, NewSrc1, AluOp, NewSrc2, BCond),
-  {I1 ++ I2, Map2, Data}.
+  HasDst = hipe_rtl:alub_has_dst(I),
+  {Src1, Map0} = conv_src(hipe_rtl:alub_src1(I), Map),
+  {Src2, Map1} = conv_src(hipe_rtl:alub_src2(I), Map0),
+  RtlAlubOp = hipe_rtl:alub_op(I),
+  RtlAlubCond = hipe_rtl:alub_cond(I),
+  case {HasDst, RtlAlubOp} of
+    {false, sub} ->
+      {BCond,Sign} = conv_branch_cond(RtlAlubCond),
+      I2 = mk_branch(Src1, BCond, Sign, Src2,
+		     hipe_rtl:alub_true_label(I),
+		     hipe_rtl:alub_false_label(I),
+		     hipe_rtl:alub_pred(I)),
+      {I2, Map1, Data};
+    _ ->
+      {Dst, Map2} =
+	case HasDst of
+	  false -> {new_untagged_temp(), Map1};
+	  true -> conv_dst(hipe_rtl:alub_dst(I), Map1)
+	end,
+      {AluOp, BCond} =
+	case {RtlAlubOp, RtlAlubCond} of
+	  {'add', 'ltu'} ->
+	    {'addc', 'eq'};
+	  {_, _} ->
+	    {conv_alub_op(RtlAlubOp), conv_alub_cond(RtlAlubCond)}
+	end,
+      BC = mk_pseudo_bc(BCond,
+			hipe_rtl:alub_true_label(I),
+			hipe_rtl:alub_false_label(I),
+			hipe_rtl:alub_pred(I)),
+      I2 =
+	case {AluOp, BCond} of
+	  {'addc', 'eq'} ->	% copy XER[CA] to CR0[EQ] before the BC
+	    TmpR = new_untagged_temp(),
+	    [hipe_ppc:mk_mfspr(TmpR, 'xer'),
+	     hipe_ppc:mk_mtcr(TmpR) |
+	     BC];
+	  _ -> BC
+	end,
+      {NewSrc1, NewSrc2} =
+	case AluOp of
+	  'subf' -> {Src2, Src1};
+	  _ -> {Src1, Src2}
+	end,
+      I1 = mk_alub(Dst, NewSrc1, AluOp, NewSrc2, BCond),
+      {I1 ++ I2, Map2, Data}
+  end.
 
 conv_alub_op(RtlAluOp) ->
   case {get(hipe_target_arch), RtlAluOp} of
@@ -689,17 +705,6 @@ mk_alub_rr_Rc(Dst, Src1, AluOp, Src2) ->
     end,
   [hipe_ppc:mk_alu(AluOpDot, Dst, Src1, Src2)].
 
-conv_branch(I, Map, Data) ->
-  %% <unused> = src1 - src2; if COND goto label
-  {Src1, Map0} = conv_src(hipe_rtl:branch_src1(I), Map),
-  {Src2, Map1} = conv_src(hipe_rtl:branch_src2(I), Map0),
-  {BCond,Sign} = conv_branch_cond(hipe_rtl:branch_cond(I)),
-  I2 = mk_branch(Src1, BCond, Sign, Src2,
-		 hipe_rtl:branch_true_label(I),
-		 hipe_rtl:branch_false_label(I),
-		 hipe_rtl:branch_pred(I)),
-  {I2, Map1, Data}.
-
 conv_branch_cond(Cond) -> % may be unsigned
   case Cond of
     gtu -> {'gt', 'unsigned'};
diff --git a/lib/hipe/rtl/hipe_rtl.erl b/lib/hipe/rtl/hipe_rtl.erl
index 0726827299..d39969a0ed 100644
--- a/lib/hipe/rtl/hipe_rtl.erl
+++ b/lib/hipe/rtl/hipe_rtl.erl
@@ -187,18 +187,14 @@
 
 	 mk_branch/5,
 	 mk_branch/6,
-	 branch_src1/1,
-	 branch_src2/1,
-	 branch_cond/1,
-	 branch_true_label/1,
-	 branch_false_label/1,
-	 branch_pred/1,
+	 mk_branch/7,
 	 %% is_branch/1,
 	 %% branch_true_label_update/2,
 	 %% branch_false_label_update/2,
 
 	 mk_alub/7,
 	 mk_alub/8,
+	 alub_has_dst/1,
 	 alub_dst/1,
 	 alub_src1/1,
 	 alub_op/1,
@@ -338,6 +334,7 @@
 	 defines/1,
 	 redirect_jmp/3,
 	 is_safe/1,
+	 reduce_unused/1,
 	 %% highest_var/1,
 	 pp/1,
 	 pp/2,
@@ -588,37 +585,25 @@ is_label(#label{}) -> true;
 is_label(_) -> false.
 
 %%
-%% branch
-%%
-
-mk_branch(Src1, Op, Src2, True, False) ->
-  mk_branch(Src1, Op, Src2, True, False, 0.5).
-mk_branch(Src1, Op, Src2, True, False, P) ->
-  #branch{src1=Src1, 'cond'=Op, src2=Src2, true_label=True,
-	  false_label=False, p=P}.
-branch_src1(#branch{src1=Src1}) -> Src1.
-branch_src1_update(Br, NewSrc) -> Br#branch{src1=NewSrc}.
-branch_src2(#branch{src2=Src2}) -> Src2.
-branch_src2_update(Br, NewSrc) -> Br#branch{src2=NewSrc}.
-branch_cond(#branch{'cond'=Cond}) -> Cond.
-branch_true_label(#branch{true_label=TrueLbl}) -> TrueLbl.
-branch_true_label_update(Br, NewTrue) -> Br#branch{true_label=NewTrue}.
-branch_false_label(#branch{false_label=FalseLbl}) -> FalseLbl.
-branch_false_label_update(Br, NewFalse) -> Br#branch{false_label=NewFalse}.
-branch_pred(#branch{p=P}) -> P.
-
-%%
 %% alub
 %%
 
 -type alub_cond() :: 'eq' | 'ne' | 'ge' | 'geu' | 'gt' | 'gtu' | 'le'
                    | 'leu' | 'lt' | 'ltu' | 'overflow' | 'not_overflow'.
 
+mk_branch(Src1, Cond, Src2, True, False) ->
+  mk_branch(Src1, Cond, Src2, True, False, 0.5).
+mk_branch(Src1, Cond, Src2, True, False, P) ->
+  mk_branch(Src1, 'sub', Src2, Cond, True, False, P).
+mk_branch(Src1, Op, Src2, Cond, True, False, P) ->
+  mk_alub([], Src1, Op, Src2, Cond, True, False, P).
+
 mk_alub(Dst, Src1, Op, Src2, Cond, True, False) ->
   mk_alub(Dst, Src1, Op, Src2, Cond, True, False, 0.5).
 mk_alub(Dst, Src1, Op, Src2, Cond, True, False, P) ->
   #alub{dst=Dst, src1=Src1, op=Op, src2=Src2, 'cond'=Cond,
 	true_label=True, false_label=False, p=P}.
+alub_has_dst(#alub{dst=Dst}) -> Dst =/= [].
 alub_dst(#alub{dst=Dst}) -> Dst.
 alub_dst_update(A, NewDst) -> A#alub{dst=NewDst}.
 alub_src1(#alub{src1=Src1}) -> Src1.
@@ -943,8 +928,7 @@ args(I) ->
   case I of
     #alu{} -> [alu_src1(I), alu_src2(I)];
     #alub{} -> [alub_src1(I), alub_src2(I)];
-    #branch{} -> [branch_src1(I), branch_src2(I)];
-    #call{} -> 
+    #call{} ->
       Args = call_arglist(I) ++ hipe_rtl_arch:call_used(),
       case call_is_known(I) of
 	false -> [call_fun(I) | Args];
@@ -987,8 +971,8 @@ args(I) ->
 defines(Instr) ->
   Defs = case Instr of
 	   #alu{} -> [alu_dst(Instr)];
+	   #alub{dst=[]} -> [];
 	   #alub{} -> [alub_dst(Instr)];
-	   #branch{} -> [];
 	   #call{} -> call_dstlist(Instr) ++ hipe_rtl_arch:call_defined();
 	   #comment{} -> [];
 	   #enter{} -> [];
@@ -1042,9 +1026,6 @@ subst_uses(Subst, I) ->
     #alub{} ->
       I0 = alub_src1_update(I, subst1(Subst, alub_src1(I))),
       alub_src2_update(I0, subst1(Subst, alub_src2(I)));
-    #branch{} ->
-      I0 = branch_src1_update(I, subst1(Subst, branch_src1(I))),
-      branch_src2_update(I0, subst1(Subst, branch_src2(I)));
     #call{} ->
       case call_is_known(I) of
 	false ->
@@ -1126,11 +1107,6 @@ subst_uses_llvm(Subst, I) ->
       {NewSrc1, _ } = subst1_llvm(Subst1, alub_src1(I)),
       I0 =  alub_src1_update(I, NewSrc1),
       alub_src2_update(I0, NewSrc2);
-    #branch{} ->
-      {NewSrc2, Subst1} = subst1_llvm(Subst, branch_src2(I)),
-      {NewSrc1, _ } = subst1_llvm(Subst1, branch_src1(I)),
-      I0 = branch_src1_update(I, NewSrc1),
-      branch_src2_update(I0, NewSrc2);
     #call{} ->
       case call_is_known(I) of
         false ->
@@ -1243,10 +1219,10 @@ subst_defines(Subst, I)->
   case I of
     #alu{} ->
       alu_dst_update(I, subst1(Subst, alu_dst(I)));
+    #alub{dst=[]} ->
+      I;
     #alub{} ->
       alub_dst_update(I, subst1(Subst, alub_dst(I)));
-    #branch{} ->
-      I;
     #call{} ->
       call_dstlist_update(I, subst_list(Subst, call_dstlist(I)));
     #comment{} ->
@@ -1313,7 +1289,6 @@ is_safe(Instr) ->
   case Instr of
     #alu{} -> true;
     #alub{} -> false;
-    #branch{} -> false;
     #call{} -> false;
     #comment{} -> false;
     #enter{} -> false;
@@ -1340,6 +1315,24 @@ is_safe(Instr) ->
     #switch{} -> false %% Maybe this is safe...
   end.
 
+%% @spec reduce_unused(rtl_instruction())
+%%           -> false | [rtl_instruction()].
+%%
+%% @doc Produces a simplified instruction sequence that is equivalent to [Instr]
+%% under the assumption that all results of Instr are unused, or 'false' if
+%% there is no such sequence (other than [Instr] itself).
+
+reduce_unused(Instr) ->
+  case Instr of
+    #alub{dst=Dst} when Dst =/= [] ->
+      [Instr#alub{dst=[]}];
+    _ ->
+      case is_safe(Instr) of
+	true -> [];
+	false -> false
+      end
+  end.
+
 %%
 %% True if argument is an alu-operator
 %%
@@ -1386,17 +1379,6 @@ redirect_jmp(Jmp, ToOld, ToNew) ->
   %% OBS: In a jmp instruction more than one labels may be identical
   %%      and thus need redirection!
   case Jmp of
-    #branch{} ->
-      TmpJmp = case branch_true_label(Jmp) of
-		 ToOld -> branch_true_label_update(Jmp, ToNew);
-		 _ -> Jmp
-	       end,
-      case branch_false_label(TmpJmp) of
-	ToOld ->
-	  branch_false_label_update(TmpJmp, ToNew);
-	_ ->
-	  TmpJmp
-      end;
     #switch{} ->
       NewLbls = [case Lbl =:= ToOld of
 		   true -> ToNew;
@@ -1591,13 +1573,6 @@ pp_instr(Dev, I) ->
       io:format(Dev, "~n", []);
     #label{} ->
       io:format(Dev, "L~w:~n", [label_name(I)]);
-    #branch{} ->
-      io:format(Dev, "    if (", []),
-      pp_arg(Dev, branch_src1(I)),
-      io:format(Dev, " ~w ", [branch_cond(I)]),
-      pp_arg(Dev, branch_src2(I)),
-      io:format(Dev, ") then L~w (~.2f) else L~w~n", 
-		[branch_true_label(I), branch_pred(I), branch_false_label(I)]);
     #switch{} ->
       io:format(Dev, "    switch (", []),
       pp_arg(Dev, switch_src(I)),
@@ -1606,7 +1581,10 @@ pp_instr(Dev, I) ->
       io:format(Dev, ">\n", []);
     #alub{} ->
       io:format(Dev, "    ", []),
-      pp_arg(Dev, alub_dst(I)),
+      case alub_has_dst(I) of
+	true -> pp_arg(Dev, alub_dst(I));
+	false -> io:format(Dev, "_", [])
+      end,
       io:format(Dev, " <- ", []),
       pp_arg(Dev, alub_src1(I)),
       io:format(Dev, " ~w ", [alub_op(I)]),
diff --git a/lib/hipe/rtl/hipe_rtl.hrl b/lib/hipe/rtl/hipe_rtl.hrl
index cc76e7e5c4..74020c6045 100644
--- a/lib/hipe/rtl/hipe_rtl.hrl
+++ b/lib/hipe/rtl/hipe_rtl.hrl
@@ -28,7 +28,6 @@
 
 -record(alu, {dst, src1, op, src2}).
 -record(alub, {dst, src1, op, src2, 'cond', true_label, false_label, p}).
--record(branch, {src1, src2, 'cond', true_label, false_label, p}).
 -record(call, {dstlist, 'fun', arglist, type, continuation,
     failcontinuation, normalcontinuation = []}).
 -record(comment, {text}).
diff --git a/lib/hipe/rtl/hipe_rtl_binary_construct.erl b/lib/hipe/rtl/hipe_rtl_binary_construct.erl
index 367d76b24d..baf5f7d27a 100644
--- a/lib/hipe/rtl/hipe_rtl_binary_construct.erl
+++ b/lib/hipe/rtl/hipe_rtl_binary_construct.erl
@@ -429,8 +429,8 @@ realloc_binary(SizeReg, ProcBin, Base) ->
    hipe_tagscheme:set_field_from_term(ProcBinFlagsTag, ProcBin, Flags),
    hipe_tagscheme:get_field_from_term(ProcBinValTag, ProcBin, BinPointer),
    hipe_tagscheme:get_field_from_pointer(BinOrigSizeTag, BinPointer, OrigSize),
-   hipe_rtl:mk_branch(OrigSize, 'ltu', ResultingSize,
-		      ReallocLblName, NoReallocLblName),
+   hipe_rtl:mk_branch(OrigSize, 'geu', ResultingSize, NoReallocLblName,
+		      ReallocLblName),
    NoReallocLbl,
    hipe_tagscheme:get_field_from_term(ProcBinBytesTag, ProcBin, Base),
    hipe_rtl:mk_goto(ContLblName),
@@ -757,9 +757,9 @@ test_alignment(SrcOffset, NumBits, Offset, AlignedCode, CCode) ->
   [AlignedLbl, CLbl] = create_lbls(2),
    [hipe_rtl:mk_alu(Tmp, SrcOffset, 'or', NumBits),
    hipe_rtl:mk_alu(Tmp, Tmp, 'or', Offset),
-   hipe_rtl:mk_alub(Tmp, Tmp, 'and', ?LOW_BITS, 'eq',
-		    hipe_rtl:label_name(AlignedLbl),
-		    hipe_rtl:label_name(CLbl)),
+   hipe_rtl:mk_branch(Tmp, 'and', ?LOW_BITS, 'eq',
+		      hipe_rtl:label_name(AlignedLbl),
+		      hipe_rtl:label_name(CLbl), 0.5),
    AlignedLbl,
    AlignedCode,
    CLbl,
@@ -1284,8 +1284,7 @@ is_divisible(Dividend, Divisor, SuccLbl, FailLbl) ->
     true -> %% Divisor is a power of 2
       %% Test that the Log2-1 lowest bits are clear
       Mask = hipe_rtl:mk_imm(Divisor - 1),
-      [Tmp] = create_regs(1),
-      [hipe_rtl:mk_alub(Tmp, Dividend, 'and', Mask, eq, SuccLbl, FailLbl, 0.99)];
+      [hipe_rtl:mk_branch(Dividend, 'and', Mask, eq, SuccLbl, FailLbl, 0.99)];
     false ->
       %% We need division, fall back to a primop
       [hipe_rtl:mk_call([], is_divisible, [Dividend, hipe_rtl:mk_imm(Divisor)],
diff --git a/lib/hipe/rtl/hipe_rtl_binary_match.erl b/lib/hipe/rtl/hipe_rtl_binary_match.erl
index d999cd2743..520b055ba7 100644
--- a/lib/hipe/rtl/hipe_rtl_binary_match.erl
+++ b/lib/hipe/rtl/hipe_rtl_binary_match.erl
@@ -659,9 +659,8 @@ test_alignment_code(Size, Unit, SLblName, FalseLblName) ->
   end.
 
 get_fast_test_code(Size, AndTest, SLblName, FalseLblName) ->
-  [Tmp] = create_gcsafe_regs(1),
-  [hipe_rtl:mk_alub(Tmp, Size, 'and', hipe_rtl:mk_imm(AndTest),
-		    'eq', SLblName, FalseLblName)].
+  [hipe_rtl:mk_branch(Size, 'and', hipe_rtl:mk_imm(AndTest), 'eq',
+		      SLblName, FalseLblName, 0.5)].
 
 %% This is really slow
 get_slow_test_code(Size, Unit, SLblName, FalseLblName) ->
diff --git a/lib/hipe/rtl/hipe_rtl_cfg.erl b/lib/hipe/rtl/hipe_rtl_cfg.erl
index f49e8f815f..e802b320c2 100644
--- a/lib/hipe/rtl/hipe_rtl_cfg.erl
+++ b/lib/hipe/rtl/hipe_rtl_cfg.erl
@@ -83,9 +83,7 @@ mk_goto(Name) ->
 
 branch_successors(Instr) ->
   case Instr of
-    #branch{} -> [hipe_rtl:branch_true_label(Instr), 
-		  hipe_rtl:branch_false_label(Instr)];
-    #alub{} -> [hipe_rtl:alub_true_label(Instr), 
+    #alub{} -> [hipe_rtl:alub_true_label(Instr),
 	        hipe_rtl:alub_false_label(Instr)];
     #switch{} -> hipe_rtl:switch_labels(Instr);
     #call{} -> 
@@ -106,7 +104,6 @@ fails_to(Instr) ->
 
 is_branch(Instr) ->
    case Instr of
-     #branch{} -> true;
      #alub{} -> true;
      #switch{} -> true;
      #goto{} -> true;
@@ -127,7 +124,7 @@ is_branch(Instr) ->
 
 is_pure_branch(Instr) ->
   case Instr of
-    #branch{} -> true;
+    #alub{} -> not hipe_rtl:alub_has_dst(Instr);
     #switch{} -> true;
     #goto{} -> true;
     _ -> false
diff --git a/lib/hipe/rtl/hipe_rtl_lcm.erl b/lib/hipe/rtl/hipe_rtl_lcm.erl
index 71bd06c0df..67ddd0f649 100644
--- a/lib/hipe/rtl/hipe_rtl_lcm.erl
+++ b/lib/hipe/rtl/hipe_rtl_lcm.erl
@@ -378,7 +378,6 @@ is_expr(I) ->
 %% 	  end;
 	       
         #alub{} -> false; %% TODO: Split instruction to consider alu expression?
-        #branch{} -> false;
         #call{} -> false; %% We cannot prove that a call has no side-effects
         #comment{} -> false;
         #enter{} -> false;
diff --git a/lib/hipe/rtl/hipe_rtl_ssa_const_prop.erl b/lib/hipe/rtl/hipe_rtl_ssa_const_prop.erl
index 7158383010..f887eeab66 100644
--- a/lib/hipe/rtl/hipe_rtl_ssa_const_prop.erl
+++ b/lib/hipe/rtl/hipe_rtl_ssa_const_prop.erl
@@ -110,8 +110,6 @@ visit_expression(Instruction, Environment) ->
       visit_alu(Instruction, Environment);
     #alub{} ->
       visit_alub(Instruction, Environment);
-    #branch{} ->
-      visit_branch(Instruction, Environment);
     #call{} ->
       visit_call(Instruction, Environment);
 %%    #comment{} ->
@@ -184,42 +182,6 @@ set_to(Dst, Val, Env) ->
   {[], SSAWork, Env1}.
 
 %%-----------------------------------------------------------------------------
-%% Procedure : visit_branch/2
-%% Purpose   : do symbolic exection of branch instructions.
-%% Arguments : Inst - The instruction
-%%             Env  - The environment
-%% Returns   : { FlowWorkList, SSAWorkList, NewEnvironment}
-%%-----------------------------------------------------------------------------
-
-visit_branch(Inst, Env) -> %% Titta också på exekverbarflagga
-  Val1 = lookup_lattice_value(hipe_rtl:branch_src1(Inst), Env),
-  Val2 = lookup_lattice_value(hipe_rtl:branch_src2(Inst), Env),
-  CFGWL = case evaluate_relop(Val1, hipe_rtl:branch_cond(Inst), Val2) of
-            true   -> [hipe_rtl:branch_true_label(Inst)];
-            false  -> [hipe_rtl:branch_false_label(Inst)];
-            bottom -> [hipe_rtl:branch_true_label(Inst), 
-	               hipe_rtl:branch_false_label(Inst)];
-            top    -> []
-          end,
-  {CFGWL, [], Env}.
-
-%%-----------------------------------------------------------------------------
-%% Procedure : evaluate_relop/3
-%% Purpose   : evaluate the given relop. While taking care to handle top & 
-%%             bottom in some sane way.
-%% Arguments : Val1, Val2 - The operands Integers or top or bottom
-%%             RelOp  - some relop atom from rtl. 
-%% Returns   : bottom, top, true or false
-%%-----------------------------------------------------------------------------
-
-evaluate_relop(Val1, RelOp, Val2) ->
-  if 
-    (Val1==bottom) or (Val2==bottom) -> bottom ;
-    (Val1==top) or (Val2==top)       ->  top;
-    true ->  hipe_rtl_arch:eval_cond(RelOp, Val1, Val2)
-  end.
-
-%%-----------------------------------------------------------------------------
 %% Procedure : evaluate_fixnumop/2 
 %% Purpose   : try to evaluate a fixnumop.
 %% Arguments : Val1 - operand (an integer, 'top' or 'bottom')
@@ -408,6 +370,7 @@ partial_eval_branch(Cond, N0, Z0, V0, C0) ->
        Cond =:= 'ne'           -> {true, Z0,   true, true};
        Cond =:= 'gt';
        Cond =:= 'le'           -> {N0,   Z0,   V0,   true};
+       Cond =:= 'leu';
        Cond =:= 'gtu'          -> {true, Z0,   true, C0  };
        Cond =:= 'lt';
        Cond =:= 'ge'           -> {N0,   true, V0,   true};
@@ -450,7 +413,11 @@ visit_alub(Inst, Env) ->
           false  -> [hipe_rtl:alub_false_label(Inst)]
         end
      end,
-  {[], NewSSA, NewEnv} = set_to(hipe_rtl:alub_dst(Inst), NewVal,  Env),
+  {[], NewSSA, NewEnv} =
+    case hipe_rtl:alub_has_dst(Inst) of
+      false -> {[], [], Env};
+      true -> set_to(hipe_rtl:alub_dst(Inst), NewVal, Env)
+    end,
   {Labels, NewSSA, NewEnv}.
       
 %%-----------------------------------------------------------------------------
@@ -688,8 +655,6 @@ update_instruction(Inst, Env) ->
       update_alu(Inst, Env);
     #alub{} ->
       update_alub(Inst, Env);
-    #branch{} ->
-      update_branch(Inst, Env);
     #call{} ->
       subst_all_uses(Inst, Env);
 %%    #comment{} ->
@@ -902,33 +867,6 @@ update_alu(Inst, Env) ->
       {Val,_,_,_,_} = evaluate_alu(Val1, hipe_rtl:alu_op(Inst), Val2),
       [hipe_rtl:mk_move(hipe_rtl:alu_dst(Inst), hipe_rtl:mk_imm(Val))]
   end.
- 
-%%-----------------------------------------------------------------------------
-%% Procedure : update_branch/2
-%% Purpose   : update an branch-instruction
-%% Arguments : Inst - the instruction.
-%%             Env - in which everything happens.
-%% Returns   : list of new instruction
-%%-----------------------------------------------------------------------------
-
-update_branch(Inst, Env) ->
-  Src1 = hipe_rtl:branch_src1(Inst),
-  Src2 = hipe_rtl:branch_src2(Inst),
-  Val1 = lookup_lattice_value(Src1, Env),
-  Val2 = lookup_lattice_value(Src2, Env),
-  if
-    (Val1 =:= bottom) and (Val2 =:= bottom) ->
-      [Inst];
-    Val1 =:= bottom ->
-      [hipe_rtl:subst_uses([{Src2, hipe_rtl:mk_imm(Val2)}], Inst)];
-    Val2 =:= bottom -> 
-      [hipe_rtl:subst_uses([{Src1, hipe_rtl:mk_imm(Val1)}], Inst)];
-    true ->
-      case hipe_rtl_arch:eval_cond(hipe_rtl:branch_cond(Inst), Val1, Val2) of
-        true  -> [hipe_rtl:mk_goto(hipe_rtl:branch_true_label(Inst))];
-        false -> [hipe_rtl:mk_goto(hipe_rtl:branch_false_label(Inst))]
-      end
-  end.
 
 %%-----------------------------------------------------------------------------
 %% Procedure : update_alub/2
@@ -943,8 +881,12 @@ update_branch(Inst, Env) ->
 
 %% some small helpers.
 alub_to_move(Inst, Res, Lab) ->
-  [hipe_rtl:mk_move(hipe_rtl:alub_dst(Inst), Res),
-   hipe_rtl:mk_goto(Lab)].
+  Goto = [hipe_rtl:mk_goto(Lab)],
+  case hipe_rtl:alub_has_dst(Inst) of
+    false -> Goto;
+    true ->
+      [hipe_rtl:mk_move(hipe_rtl:alub_dst(Inst), Res) | Goto]
+  end.
 
 make_alub_subst_list(bottom, _, Tail) ->  Tail;
 make_alub_subst_list(top, Src, _) ->
diff --git a/lib/hipe/rtl/hipe_tagscheme.erl b/lib/hipe/rtl/hipe_tagscheme.erl
index 8cf45772b5..5d11b9b82e 100644
--- a/lib/hipe/rtl/hipe_tagscheme.erl
+++ b/lib/hipe/rtl/hipe_tagscheme.erl
@@ -171,22 +171,21 @@ test_nil(X, TrueLab, FalseLab, Pred) ->
   hipe_rtl:mk_branch(X, eq, hipe_rtl:mk_imm(?NIL), TrueLab, FalseLab, Pred).
 
 test_cons(X, TrueLab, FalseLab, Pred) ->
-  Tmp = hipe_rtl:mk_new_reg_gcsafe(),
   Mask = hipe_rtl:mk_imm(?TAG_PRIMARY_MASK - ?TAG_PRIMARY_LIST),
-  hipe_rtl:mk_alub(Tmp, X, 'and', Mask, 'eq', TrueLab, FalseLab, Pred).
+  hipe_rtl:mk_branch(X, 'and', Mask, 'eq', TrueLab, FalseLab, Pred).
 
 test_is_boxed(X, TrueLab, FalseLab, Pred) ->
-  Tmp = hipe_rtl:mk_new_reg_gcsafe(),
   Mask = hipe_rtl:mk_imm(?TAG_PRIMARY_MASK - ?TAG_PRIMARY_BOXED),
-  hipe_rtl:mk_alub(Tmp, X, 'and', Mask, 'eq', TrueLab, FalseLab, Pred).
+  hipe_rtl:mk_branch(X, 'and', Mask, 'eq', TrueLab, FalseLab, Pred).
 
 get_header(Res, X) ->
   hipe_rtl:mk_load(Res, X, hipe_rtl:mk_imm(-(?TAG_PRIMARY_BOXED))).
 
 mask_and_compare(X, Mask, Value, TrueLab, FalseLab, Pred) ->
   Tmp = hipe_rtl:mk_new_reg_gcsafe(),
-  [hipe_rtl:mk_alu(Tmp, X, 'and', hipe_rtl:mk_imm(Mask)),
-   hipe_rtl:mk_branch(Tmp, 'eq', hipe_rtl:mk_imm(Value), TrueLab, FalseLab, Pred)].
+  [hipe_rtl:mk_alu(Tmp, X, 'sub', hipe_rtl:mk_imm(Value)),
+   hipe_rtl:mk_branch(Tmp, 'and', hipe_rtl:mk_imm(Mask),
+		      eq, TrueLab, FalseLab, Pred)].
 
 test_immed1(X, Value, TrueLab, FalseLab, Pred) ->
   mask_and_compare(X, ?TAG_IMMED1_MASK, Value, TrueLab, FalseLab, Pred).
@@ -238,13 +237,12 @@ test_atom(X, TrueLab, FalseLab, Pred) ->
 
 test_tuple(X, TrueLab, FalseLab, Pred) ->
   Tmp = hipe_rtl:mk_new_reg_gcsafe(),
-  Tmp2 = hipe_rtl:mk_new_reg_gcsafe(),
   HalfTrueLab = hipe_rtl:mk_new_label(),
   [test_is_boxed(X, hipe_rtl:label_name(HalfTrueLab), FalseLab, Pred),
    HalfTrueLab,
    get_header(Tmp, X),
-   hipe_rtl:mk_alub(Tmp2, Tmp, 'and', hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq',
-		    TrueLab, FalseLab, Pred)].
+   hipe_rtl:mk_branch(Tmp, 'and', hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq',
+		      TrueLab, FalseLab, Pred)].
 
 test_tuple_N(X, N, TrueLab, FalseLab, Pred) ->
   Tmp = hipe_rtl:mk_new_reg_gcsafe(),
@@ -466,12 +464,17 @@ test_fixnums_1([Arg1, Arg2|Args], Acc) ->
 
 test_two_fixnums(Arg1, Arg2, FalseLab) ->
   TrueLab = hipe_rtl:mk_new_label(),
-  case hipe_rtl:is_imm(Arg2) of
+  case hipe_rtl:is_imm(Arg1) orelse hipe_rtl:is_imm(Arg2) of
     true ->
-      Value = hipe_rtl:imm_value(Arg2),
+      {Imm, Var} =
+	case hipe_rtl:is_imm(Arg1) of
+	  true  -> {Arg1, Arg2};
+	  false -> {Arg2, Arg1}
+	end,
+      Value = hipe_rtl:imm_value(Imm),
       case Value band ?TAG_IMMED1_MASK of
 	?TAG_IMMED1_SMALL ->
-	  [test_fixnum(Arg1, hipe_rtl:label_name(TrueLab), FalseLab, 0.99),
+	  [test_fixnum(Var, hipe_rtl:label_name(TrueLab), FalseLab, 0.99),
 	   TrueLab];
 	_ ->
 	  [hipe_rtl:mk_goto(FalseLab)]
@@ -512,28 +515,48 @@ unsafe_fixnum_sub(Arg1, Arg2, Res) ->
 
 %%% (16X+tag)+((16Y+tag)-tag) = 16X+tag+16Y = 16(X+Y)+tag
 %%% (16X+tag)-((16Y+tag)-tag) = 16X+tag-16Y = 16(X-Y)+tag
-fixnum_addsub(AluOp, Arg1, Arg2, Res, OtherLab) ->
-  Tmp = hipe_rtl:mk_new_reg_gcsafe(),
+fixnum_addsub(AluOp, Arg1, Arg2, FinalRes, OtherLab) ->
+  NoOverflowLab = hipe_rtl:mk_new_label(),
   %% XXX: Consider moving this test to the users of fixnum_addsub.
-  case Arg1 =/= Res andalso Arg2 =/= Res of 
-    true -> 
-      %% Args differ from res.
-      NoOverflowLab = hipe_rtl:mk_new_label(),
-      [hipe_rtl:mk_alu(Tmp, Arg2, sub, hipe_rtl:mk_imm(?TAG_IMMED1_SMALL)),
-       hipe_rtl:mk_alub(Res, Arg1, AluOp, Tmp, not_overflow,
-			hipe_rtl:label_name(NoOverflowLab), 
-			hipe_rtl:label_name(OtherLab), 0.99),
-       NoOverflowLab];
+  {Res, Tail} =
+    case Arg1 =/= FinalRes andalso Arg2 =/= FinalRes of
+      true ->
+	%% Args differ from res.
+	{FinalRes, [NoOverflowLab]};
+      false ->
+	%% At least one of the arguments is the same as Res.
+	Tmp = hipe_rtl:mk_new_reg_gcsafe(),
+	{Tmp, [NoOverflowLab, hipe_rtl:mk_move(FinalRes, Tmp)]}
+    end,
+  case (hipe_rtl:is_imm(Arg1) andalso AluOp =:= 'add')
+    orelse hipe_rtl:is_imm(Arg2)
+  of
+    true ->
+      %% Pre-compute the untagged immediate. The optimisers won't do this for us
+      %% since they don't know that the untag never underflows.
+      {Var, Imm0} =
+	case hipe_rtl:is_imm(Arg2) of
+	  true  -> {Arg1, Arg2};
+	  false -> {Arg2, Arg1}
+	end,
+      Imm = hipe_rtl:mk_imm(hipe_rtl:imm_value(Imm0) - ?TAG_IMMED1_SMALL),
+      [hipe_rtl:mk_alub(Res, Var, AluOp, Imm, not_overflow,
+			hipe_rtl:label_name(NoOverflowLab),
+			hipe_rtl:label_name(OtherLab), 0.99)
+       |Tail];
     false ->
-      %% At least one of the arguments is the same as Res.
-      Tmp2 = hipe_rtl:mk_new_var(), % XXX: shouldn't this var be a reg?
-      NoOverflowLab = hipe_rtl:mk_new_label(),
-      [hipe_rtl:mk_alu(Tmp, Arg2, sub, hipe_rtl:mk_imm(?TAG_IMMED1_SMALL)),
-       hipe_rtl:mk_alub(Tmp2, Arg1, AluOp, Tmp, not_overflow,
+      %% Commute add to save a move on x86
+      {UntagFirst, Lhs, Rhs} =
+	case AluOp of
+	  'add' -> {Arg1, Res, Arg2};
+	  'sub' -> {Arg2, Arg1, Res}
+	end,
+      [hipe_rtl:mk_alu(Res, UntagFirst, sub,
+		       hipe_rtl:mk_imm(?TAG_IMMED1_SMALL)),
+       hipe_rtl:mk_alub(Res, Lhs, AluOp, Rhs, not_overflow,
 			hipe_rtl:label_name(NoOverflowLab), 
-			hipe_rtl:label_name(OtherLab), 0.99),
-       NoOverflowLab,
-       hipe_rtl:mk_move(Res, Tmp2)]
+			hipe_rtl:label_name(OtherLab), 0.99)
+       |Tail]
   end.
 
 %%% ((16X+tag) div 16) * ((16Y+tag)-tag) + tag = X*16Y+tag = 16(XY)+tag
@@ -687,7 +710,6 @@ element(Dst, Index, Tuple, FailLabName, unknown, IndexInfo) ->
   IndexOkLab = hipe_rtl:mk_new_label(),
   Ptr = hipe_rtl:mk_new_reg(), % offset from Tuple
   Header = hipe_rtl:mk_new_reg_gcsafe(),
-  Tmp = hipe_rtl:mk_new_reg_gcsafe(),
   UIndex = hipe_rtl:mk_new_reg_gcsafe(),
   Arity = hipe_rtl:mk_new_reg_gcsafe(),
   InvIndex = hipe_rtl:mk_new_reg_gcsafe(),
@@ -700,9 +722,9 @@ element(Dst, Index, Tuple, FailLabName, unknown, IndexInfo) ->
        BoxedOkLab,
        hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)),
        hipe_rtl:mk_load(Header, Ptr, hipe_rtl:mk_imm(0)),
-       hipe_rtl:mk_alub(Tmp, Header, 'and', 
-			hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq',
-			hipe_rtl:label_name(TupleOkLab), FailLabName, 0.99),
+       hipe_rtl:mk_branch(Header, 'and',
+			  hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq',
+			  hipe_rtl:label_name(TupleOkLab), FailLabName, 0.99),
        TupleOkLab,
        untag_fixnum(UIndex, Index),
        hipe_rtl:mk_alu(Arity, Header, 'srl',
@@ -716,9 +738,9 @@ element(Dst, Index, Tuple, FailLabName, unknown, IndexInfo) ->
        BoxedOkLab,
        hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)),
        hipe_rtl:mk_load(Header, Ptr, hipe_rtl:mk_imm(0)),
-       hipe_rtl:mk_alub(Tmp, Header, 'and', 
-			hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq',
-			hipe_rtl:label_name(TupleOkLab), FailLabName, 0.99),
+       hipe_rtl:mk_branch(Header, 'and',
+			  hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq',
+			  hipe_rtl:label_name(TupleOkLab), FailLabName, 0.99),
        TupleOkLab,
        hipe_rtl:mk_alu(Arity, Header, 'srl', 
 		       hipe_rtl:mk_imm(?HEADER_ARITY_OFFS))|
@@ -734,9 +756,9 @@ element(Dst, Index, Tuple, FailLabName, unknown, IndexInfo) ->
        BoxedOkLab,
        hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)),
        hipe_rtl:mk_load(Header, Ptr, hipe_rtl:mk_imm(0)),
-       hipe_rtl:mk_alub(Tmp, Header, 'and', 
-			hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq',
-			hipe_rtl:label_name(TupleOkLab), FailLabName, 0.99),
+       hipe_rtl:mk_branch(Header, 'and',
+			  hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq',
+			  hipe_rtl:label_name(TupleOkLab), FailLabName, 0.99),
        TupleOkLab,
        untag_fixnum(UIndex, Index),
        hipe_rtl:mk_alu(Arity, Header, 'srl',
@@ -870,12 +892,10 @@ heap_arch_spec(HP) ->
    hipe_rtl_arch:pcb_store(?P_OFF_HEAP_FIRST, HP)].
 
 test_heap_binary(Binary, TrueLblName, FalseLblName) ->
-  Tmp1 = hipe_rtl:mk_new_reg_gcsafe(),
-  Tmp2 = hipe_rtl:mk_new_reg_gcsafe(),
-  [get_header(Tmp1, Binary),
-   hipe_rtl:mk_alu(Tmp2, Tmp1, 'and', hipe_rtl:mk_imm(?TAG_HEADER_MASK)),
-   hipe_rtl:mk_branch(Tmp2, eq, hipe_rtl:mk_imm(?TAG_HEADER_HEAP_BIN), 
-		      TrueLblName, FalseLblName)].
+  Tmp = hipe_rtl:mk_new_reg_gcsafe(),
+  [get_header(Tmp, Binary),
+   mask_and_compare(Tmp, ?TAG_HEADER_MASK, ?TAG_HEADER_HEAP_BIN,
+		    TrueLblName, FalseLblName, 0.5)].
 
 mk_sub_binary(Dst, ByteSize, ByteOffs, BitSize, BitOffs, Orig) -> 
   mk_sub_binary(Dst, ByteSize, ByteOffs, BitSize, BitOffs, 
@@ -903,11 +923,10 @@ build_sub_binary(Dst, ByteSize, ByteOffs, BitSize, BitOffs,
    set_field_from_term({sub_binary, orig}, Dst, Orig)].
 
 test_subbinary(Binary, TrueLblName, FalseLblName) ->
-  Tmp1 = hipe_rtl:mk_new_reg_gcsafe(),
-  Tmp2 = hipe_rtl:mk_new_reg_gcsafe(),
-  [get_header(Tmp1, Binary),
-   hipe_rtl:mk_alu(Tmp2, Tmp1, 'and', hipe_rtl:mk_imm(?TAG_HEADER_MASK)),
-   hipe_rtl:mk_branch(Tmp2, eq, hipe_rtl:mk_imm(?TAG_HEADER_SUB_BIN), TrueLblName, FalseLblName)].
+  Tmp = hipe_rtl:mk_new_reg_gcsafe(),
+  [get_header(Tmp, Binary),
+   mask_and_compare(Tmp, ?TAG_HEADER_MASK, ?TAG_HEADER_SUB_BIN,
+		    TrueLblName, FalseLblName, 0.5)].
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %%
diff --git a/lib/hipe/sparc/hipe_rtl_to_sparc.erl b/lib/hipe/sparc/hipe_rtl_to_sparc.erl
index e170fec3d6..7fab0d95c7 100644
--- a/lib/hipe/sparc/hipe_rtl_to_sparc.erl
+++ b/lib/hipe/sparc/hipe_rtl_to_sparc.erl
@@ -63,7 +63,6 @@ conv_insn(I, Map, Data) ->
   case I of
     #alu{} -> conv_alu(I, Map, Data);
     #alub{} -> conv_alub(I, Map, Data);
-    #branch{} -> conv_branch(I, Map, Data);
     #call{} -> conv_call(I, Map, Data);
     #comment{} -> conv_comment(I, Map, Data);
     #enter{} -> conv_enter(I, Map, Data);
@@ -281,7 +280,12 @@ mk_alu_rs(XAluOp, Src1, Src2, Dst) ->
 
 conv_alub(I, Map, Data) ->
   %% dst = src1 aluop src2; if COND goto label
-  {Dst, Map0} = conv_dst(hipe_rtl:alub_dst(I), Map),
+  HasDst = hipe_rtl:alub_has_dst(I),
+  {Dst, Map0} =
+    case HasDst of
+      false -> {hipe_sparc:mk_g0(), Map};
+      true -> conv_dst(hipe_rtl:alub_dst(I), Map)
+    end,
   {Src1, Map1} = conv_src(hipe_rtl:alub_src1(I), Map0),
   {Src2, Map2} = conv_src(hipe_rtl:alub_src2(I), Map1),
   Cond = conv_cond(hipe_rtl:alub_cond(I)),
@@ -307,67 +311,33 @@ conv_alub(I, Map, Data) ->
 	I1 ++
 	[hipe_sparc:mk_rdy(TmpHi),
 	 hipe_sparc:mk_alu('sra', Dst, hipe_sparc:mk_uimm5(31), TmpSign) |
-	 conv_alub2(G0, TmpSign, 'sub', NewCond, TmpHi, I)];
+	 conv_alub2(G0, TmpSign, 'cmpcc', NewCond, TmpHi, I)];
       _ ->
-	conv_alub2(Dst, Src1, RtlAlubOp, Cond, Src2, I)
+	XAluOp =
+	  case (not HasDst) andalso RtlAlubOp =:= 'sub' of
+	    true -> 'cmpcc'; % == a subcc that commutes
+	    false -> conv_alubop_cc(RtlAlubOp)
+	  end,
+	conv_alub2(Dst, Src1, XAluOp, Cond, Src2, I)
     end,
   {I2, Map2, Data}.
 
--ifdef(notdef).	% XXX: only for sparc64, alas
-conv_alub2(Dst, Src1, RtlAlubOp, Cond, Src2, I) ->
-  case conv_cond_rcond(Cond) of
-    [] ->
-      conv_alub_bp(Dst, Src1, RtlAlubOp, Cond, Src2, I);
-    RCond ->
-      conv_alub_br(Dst, Src1, RtlAlubOp, RCond, Src2, I)
-  end.
+conv_alub2(Dst, Src1, XAluOp, Cond, Src2, I) ->
+  conv_alub_bp(Dst, Src1, XAluOp, Cond, Src2, I).
 
-conv_alub_br(Dst, Src1, RtlAlubOp, RCond, Src2, I) ->
-  TrueLab = hipe_rtl:alub_true_label(I),
-  FalseLab = hipe_rtl:alub_false_label(I),
-  Pred = hipe_rtl:alub_pred(I),
-  %% "Dst = Src1 AluOp Src2; if COND" becomes
-  %% "Dst = Src1 AluOp Src2; if-COND(Dst)"
-  {I2, _DidCommute} = mk_alu(conv_alubop_nocc(RtlAlubOp), Src1, Src2, Dst),
-  I2 ++ mk_pseudo_br(RCond, Dst, TrueLab, FalseLab, Pred).
-
-conv_cond_rcond(Cond) ->
-  case Cond of
-    'e'  -> 'z';
-    'ne' -> 'nz';
-    'g'  -> 'gz';
-    'ge' -> 'gez';
-    'l'  -> 'lz';
-    'le' -> 'lez';
-    _	 -> []	% vs, vc, gu, geu, lu, leu
-  end.
-
-conv_alubop_nocc(RtlAlubOp) ->
-  case RtlAlubOp of
-    'add' -> 'add';
-    'sub' -> 'sub';
-    %% mul: handled elsewhere
-    'or' -> 'or';
-    'and' -> 'and';
-    'xor' -> 'xor'
-    %% no shift ops
-  end.
-
-mk_pseudo_br(RCond, Dst, TrueLab, FalseLab, Pred) ->
-  [hipe_sparc:mk_pseudo_br(RCond, Dst, TrueLab, FalseLab, Pred)].
--else.
-conv_alub2(Dst, Src1, RtlAlubOp, Cond, Src2, I) ->
-  conv_alub_bp(Dst, Src1, RtlAlubOp, Cond, Src2, I).
--endif.
-
-conv_alub_bp(Dst, Src1, RtlAlubOp, Cond, Src2, I) ->
+conv_alub_bp(Dst, Src1, XAluOp, Cond, Src2, I) ->
   TrueLab = hipe_rtl:alub_true_label(I),
   FalseLab = hipe_rtl:alub_false_label(I),
   Pred = hipe_rtl:alub_pred(I),
   %% "Dst = Src1 AluOp Src2; if COND" becomes
   %% "Dst = Src1 AluOpCC Src22; if-COND(CC)"
-  {I2, _DidCommute} = mk_alu(conv_alubop_cc(RtlAlubOp), Src1, Src2, Dst),
-  I2 ++ mk_pseudo_bp(Cond, TrueLab, FalseLab, Pred).
+  {I2, DidCommute} = mk_alu(XAluOp, Src1, Src2, Dst),
+  NewCond =
+    case DidCommute andalso XAluOp =:= 'cmpcc' of
+      true -> commute_cond(Cond); % subcc does not commute; its conditions do
+      false -> Cond
+    end,
+  I2 ++ mk_pseudo_bp(NewCond, TrueLab, FalseLab, Pred).
 
 conv_alubop_cc(RtlAlubOp) ->
   case RtlAlubOp of
@@ -380,69 +350,6 @@ conv_alubop_cc(RtlAlubOp) ->
     %% no shift ops
   end.
 
-conv_branch(I, Map, Data) ->
-  %% <unused> = src1 - src2; if COND goto label
-  {Src1, Map0} = conv_src(hipe_rtl:branch_src1(I), Map),
-  {Src2, Map1} = conv_src(hipe_rtl:branch_src2(I), Map0),
-  Cond = conv_cond(hipe_rtl:branch_cond(I)),
-  I2 = conv_branch2(Src1, Cond, Src2, I),
-  {I2, Map1, Data}.
-
--ifdef(notdef).	% XXX: only for sparc64, alas
-conv_branch2(Src1, Cond, Src2, I) ->
-  case conv_cond_rcond(Cond) of
-    [] ->
-      conv_branch_bp(Src1, Cond, Src2, I);
-    RCond ->
-      conv_branch_br(Src1, RCond, Src2, I)
-  end.
-
-conv_branch_br(Src1, RCond, Src2, I) ->
-  TrueLab = hipe_rtl:branch_true_label(I),
-  FalseLab = hipe_rtl:branch_false_label(I),
-  Pred = hipe_rtl:branch_pred(I),
-  %% "if src1-COND-src2" becomes
-  %% "sub src1,src2,tmp; if-COND(tmp)"
-  Dst = hipe_sparc:mk_new_temp('untagged'),
-  XAluOp = 'cmp',	% == a sub that commutes
-  {I1, DidCommute} = mk_alu(XAluOp, Src1, Src2, Dst),
-  NewRCond =
-    case DidCommute of
-      true -> commute_rcond(RCond);
-      false -> RCond
-    end,
-  I1 ++ mk_pseudo_br(NewRCond, Dst, TrueLab, FalseLab, Pred).
-
-commute_rcond(RCond) ->	% if x RCond y, then y commute_rcond(RCond) x
-  case RCond of
-    'z'   -> 'z';	% ==, ==
-    'nz'  -> 'nz';	% !=, !=
-    'gz'  -> 'lz';	% >, <
-    'gez' -> 'lez';	% >=, <=
-    'lz'  -> 'gz';	% <, >
-    'lez' -> 'gez'	% <=, >=
-  end.
--else.
-conv_branch2(Src1, Cond, Src2, I) ->
-  conv_branch_bp(Src1, Cond, Src2, I).
--endif.
-
-conv_branch_bp(Src1, Cond, Src2, I) ->
-  TrueLab = hipe_rtl:branch_true_label(I),
-  FalseLab = hipe_rtl:branch_false_label(I),
-  Pred = hipe_rtl:branch_pred(I),
-  %% "if src1-COND-src2" becomes
-  %% "subcc src1,src2,%g0; if-COND(CC)"
-  Dst = hipe_sparc:mk_g0(),
-  XAluOp = 'cmpcc',	% == a subcc that commutes
-  {I1, DidCommute} = mk_alu(XAluOp, Src1, Src2, Dst),
-  NewCond =
-    case DidCommute of
-      true -> commute_cond(Cond);
-      false -> Cond
-    end,
-  I1 ++ mk_pseudo_bp(NewCond, TrueLab, FalseLab, Pred).
-
 conv_call(I, Map, Data) ->
   {Args, Map0} = conv_src_list(hipe_rtl:call_arglist(I), Map),
   {Dsts, Map1} = conv_dst_list(hipe_rtl:call_dstlist(I), Map0),
diff --git a/lib/hipe/sparc/hipe_sparc_frame.erl b/lib/hipe/sparc/hipe_sparc_frame.erl
index 37f29e660a..bd94d3318c 100644
--- a/lib/hipe/sparc/hipe_sparc_frame.erl
+++ b/lib/hipe/sparc/hipe_sparc_frame.erl
@@ -110,7 +110,10 @@ do_pseudo_move(I, Context, FPoff) ->
 	  Offset = pseudo_offset(Src, FPoff, Context),
 	  mk_load(hipe_sparc:mk_sp(), Offset, Dst, []);
 	_ ->
-	  [hipe_sparc:mk_mov(Src, Dst)]
+	  case hipe_sparc:temp_reg(Dst) =:= hipe_sparc:temp_reg(Src) of
+	    true -> [];
+	    false -> [hipe_sparc:mk_mov(Src, Dst)]
+	  end
       end
   end.
 
diff --git a/lib/hipe/ssa/hipe_ssa.inc b/lib/hipe/ssa/hipe_ssa.inc
index 83ab320306..b511bb6f25 100644
--- a/lib/hipe/ssa/hipe_ssa.inc
+++ b/lib/hipe/ssa/hipe_ssa.inc
@@ -1,4 +1,4 @@
-%% -*- erlang-indent-level: 2 -*-
+%% -*- mode: erlang; erlang-indent-level: 2 -*-
 %%
 %% %CopyrightBegin%
 %% 
@@ -943,9 +943,9 @@ do_code([Instr|Instrs], LiveOut, Changed, Acc) ->
     false ->
       do_code(Instrs, LiveIn, Changed, [Instr|Acc]);
     true ->
-      case ?CODE:is_safe(Instr) of
+      case ?CODE:is_call(Instr) of
 	true ->
-	  case ?CODE:is_call(Instr) of
+	  case ?CODE:is_safe(Instr) of
 	    true ->
 	      case ?CODE:call_continuation(Instr) of
 		[] ->
@@ -955,11 +955,6 @@ do_code([Instr|Instrs], LiveOut, Changed, Acc) ->
 		  do_code(Instrs, LiveOut, true, [NewInstr|Acc])
 	      end;
 	    false ->
-	      do_code(Instrs, LiveOut, true, Acc)
-	  end;
-	false -> %% not a safe instruction - cannot be removed
-	  case ?CODE:is_call(Instr) of
-	    true ->
 	      case ?CODE:call_dstlist(Instr) of
 	        [] ->  %% result was not used anyway; no change
 		  do_code(Instrs, LiveIn, Changed, [Instr|Acc]);
@@ -968,9 +963,14 @@ do_code([Instr|Instrs], LiveOut, Changed, Acc) ->
 		  do_code(Instrs, LiveIn, true, [NewInstr|Acc]);
 		[_|_] ->  %% calls with multiple dests are left untouched
 		  do_code(Instrs, LiveIn, Changed, [Instr|Acc])
-	      end;
-	    false ->
-	      do_code(Instrs, LiveIn, Changed, [Instr|Acc])
+	      end
+	  end;
+	false ->
+	  case ?CODE:reduce_unused(Instr) of
+	    false -> % not a safe instruction - cannot be removed
+	      do_code(Instrs, LiveIn, Changed, [Instr|Acc]);
+	    Replacement ->
+	      do_code(lists:reverse(Replacement, Instrs), LiveOut, true, Acc)
 	  end
       end
   end;
diff --git a/lib/hipe/x86/hipe_rtl_to_x86.erl b/lib/hipe/x86/hipe_rtl_to_x86.erl
index 4c8c98551c..851b7da2dd 100644
--- a/lib/hipe/x86/hipe_rtl_to_x86.erl
+++ b/lib/hipe/x86/hipe_rtl_to_x86.erl
@@ -91,26 +91,31 @@ conv_insn(I, Map, Data) ->
     #alub{} ->
       %% dst = src1 op src2; if COND goto label
       BinOp = conv_binop(hipe_rtl:alub_op(I)),
-      {Dst, Map0} = conv_dst(hipe_rtl:alub_dst(I), Map),
-      {FixSrc1, Src1, Map1} = conv_src(hipe_rtl:alub_src1(I), Map0),
-      {FixSrc2, Src2, Map2} = conv_src(hipe_rtl:alub_src2(I), Map1),
+      {FixSrc1, Src1, Map0} = conv_src(hipe_rtl:alub_src1(I), Map),
+      {FixSrc2, Src2, Map1} = conv_src(hipe_rtl:alub_src2(I), Map0),
       Cc = conv_cond(hipe_rtl:alub_cond(I)),
-      I1 = [hipe_x86:mk_pseudo_jcc(Cc,
-				   hipe_rtl:alub_true_label(I),
-				   hipe_rtl:alub_false_label(I),
-				   hipe_rtl:alub_pred(I))],
-      I2 = conv_alu(Dst, Src1, BinOp, Src2, I1),
-      {FixSrc1++FixSrc2++I2, Map2, Data};
-    #branch{} ->
-      %% <unused> = src1 - src2; if COND goto label
-      {FixSrc1, Src1, Map0} = conv_src(hipe_rtl:branch_src1(I), Map),
-      {FixSrc2, Src2, Map1} = conv_src(hipe_rtl:branch_src2(I), Map0),
-      Cc = conv_cond(hipe_rtl:branch_cond(I)),
-      I2 = conv_branch(Src1, Cc, Src2,
-		       hipe_rtl:branch_true_label(I),
-		       hipe_rtl:branch_false_label(I),
-		       hipe_rtl:branch_pred(I)),
-      {FixSrc1++FixSrc2++I2, Map1, Data};
+      BranchOp = conv_branchop(BinOp),
+      HasDst = hipe_rtl:alub_has_dst(I),
+      {I2, Map3} =
+	case (not HasDst) andalso BranchOp =/= none of
+	  true ->
+	    {conv_branch(Src1, BranchOp, Src2, Cc,
+			 hipe_rtl:alub_true_label(I),
+			 hipe_rtl:alub_false_label(I),
+			 hipe_rtl:alub_pred(I)), Map1};
+	  false ->
+	    {Dst, Map2} =
+	      case HasDst of
+		false -> {new_untagged_temp(), Map1};
+		true -> conv_dst(hipe_rtl:alub_dst(I), Map1)
+	      end,
+	    I1 = [hipe_x86:mk_pseudo_jcc(Cc,
+					 hipe_rtl:alub_true_label(I),
+					 hipe_rtl:alub_false_label(I),
+					 hipe_rtl:alub_pred(I))],
+	    {conv_alu(Dst, Src1, BinOp, Src2, I1), Map2}
+	end,
+      {FixSrc1++FixSrc2++I2, Map3, Data};
     #call{} ->
       %%	push <arg1>
       %%	...
@@ -252,7 +257,9 @@ conv_insn(I, Map, Data) ->
 
 conv_alu_nocc(Dst, Src1, 'add', Src2, Tail) ->
   case (not same_opnd(Dst, Src1)) andalso (not same_opnd(Dst, Src2))
-    andalso (hipe_x86:is_temp(Src1) orelse hipe_x86:is_temp(Src2))
+    %% We could use orelse instead of xor here to generate lea T1(T2), T3, but
+    %% they seem to move coalesce so well that move+add is better for them.
+    andalso (hipe_x86:is_temp(Src1) xor hipe_x86:is_temp(Src2))
   of
     false -> conv_alu(Dst, Src1, 'add', Src2, Tail);
     true -> % Use LEA
@@ -263,6 +270,16 @@ conv_alu_nocc(Dst, Src1, 'add', Src2, Tail) ->
 	    end,
       [hipe_x86:mk_lea(Mem, Dst) | Tail]
   end;
+conv_alu_nocc(Dst, Src1, 'sub', Src2, Tail) ->
+  case (not same_opnd(Dst, Src1)) andalso hipe_x86:is_temp(Src1)
+    andalso (not hipe_x86:is_temp(Src2))
+  of
+    false -> conv_alu(Dst, Src1, 'sub', Src2, Tail);
+    true -> % Use LEA
+      Imm = hipe_x86:mk_imm(-hipe_x86:imm_value(Src2)),
+      Mem = hipe_x86:mk_mem(Src1, Imm, typeof_dst(Dst)),
+      [hipe_x86:mk_lea(Mem, Dst) | Tail]
+  end;
 conv_alu_nocc(Dst, Src1, BinOp, Src2, Tail) ->
   conv_alu(Dst, Src1, BinOp, Src2, Tail).
 
@@ -360,28 +377,41 @@ conv_shift(Dst, Src1, BinOp, Src2) ->
 %%% Finalise the conversion of a conditional branch operation, taking
 %%% care to not introduce more temps and moves than necessary.
 
-conv_branch(Src1, Cc, Src2, TrueLab, FalseLab, Pred) ->
+conv_branchop('sub') -> 'cmp';
+conv_branchop('and') ->  'test';
+conv_branchop(_) -> none.
+
+branchop_commutes('cmp') -> false;
+branchop_commutes('test') -> true.
+
+conv_branch(Src1, Op, Src2, Cc, TrueLab, FalseLab, Pred) ->
   case hipe_x86:is_imm(Src1) of
     false ->
-      mk_branch(Src1, Cc, Src2, TrueLab, FalseLab, Pred);
+      mk_branch(Src1, Op, Src2, Cc, TrueLab, FalseLab, Pred);
     true ->
       case hipe_x86:is_imm(Src2) of
 	false ->
-	  NewCc = commute_cc(Cc),
-	  mk_branch(Src2, NewCc, Src1, TrueLab, FalseLab, Pred);
+	  NewCc = case branchop_commutes(Op) of
+		    true -> Cc;
+		    false -> commute_cc(Cc)
+		  end,
+	  mk_branch(Src2, Op, Src1, NewCc, TrueLab, FalseLab, Pred);
 	true ->
 	  %% two immediates, let the optimiser clean it up
 	  Tmp = new_untagged_temp(),
 	  [hipe_x86:mk_move(Src1, Tmp) |
-	   mk_branch(Tmp, Cc, Src2, TrueLab, FalseLab, Pred)]
+	   mk_branch(Tmp, Op, Src2, Cc, TrueLab, FalseLab, Pred)]
       end
   end.
 
-mk_branch(Src1, Cc, Src2, TrueLab, FalseLab, Pred) ->
+mk_branch(Src1, Op, Src2, Cc, TrueLab, FalseLab, Pred) ->
   %% PRE: not(is_imm(Src1))
-  [hipe_x86:mk_cmp(Src2, Src1),
+  [mk_branchtest(Src1, Op, Src2),
    hipe_x86:mk_pseudo_jcc(Cc, TrueLab, FalseLab, Pred)].
 
+mk_branchtest(Src1, cmp, Src2) -> hipe_x86:mk_cmp(Src2, Src1);
+mk_branchtest(Src1, test, Src2) -> hipe_x86:mk_test(Src2, Src1).
+
 %%% Convert an RTL ALU or ALUB binary operator.
 
 conv_binop(BinOp) ->
diff --git a/lib/hipe/x86/hipe_x86.erl b/lib/hipe/x86/hipe_x86.erl
index 33d7f77cf1..45bf1ad736 100644
--- a/lib/hipe/x86/hipe_x86.erl
+++ b/lib/hipe/x86/hipe_x86.erl
@@ -37,7 +37,7 @@
 	 mk_imm_from_addr/2,
 	 mk_imm_from_atom/1,
 	 is_imm/1,
-	 %% imm_value/1,
+	 imm_value/1,
 
 	 mk_mem/3,
 	 %% is_mem/1,
@@ -201,7 +201,7 @@
 	 shift_src/1,
 	 shift_dst/1,
 
-	 %% mk_test/2,
+	 mk_test/2,
 	 test_src/1,
 	 test_dst/1,
 
@@ -218,6 +218,10 @@
 	 %% highest_temp/1
 	]).
 
+%% Other utilities
+-export([neg_cc/1
+	]).
+
 %%%
 %%% Low-level accessors.
 %%%
@@ -241,7 +245,7 @@ mk_imm_from_addr(Addr, Type) ->
 mk_imm_from_atom(Atom) ->
     mk_imm(Atom).
 is_imm(X) -> case X of #x86_imm{} -> true; _ -> false end.
-%% imm_value(#x86_imm{value=Value}) -> Value.
+imm_value(#x86_imm{value=Value}) -> Value.
 
 mk_mem(Base, Off, Type) -> #x86_mem{base=Base, off=Off, type=Type}.
 %% is_mem(X) -> case X of #x86_mem{} -> true; _ -> false end.
@@ -305,7 +309,7 @@ mk_cmp(Src, Dst) -> #cmp{src=Src, dst=Dst}.
 cmp_src(#cmp{src=Src}) -> Src.
 cmp_dst(#cmp{dst=Dst}) -> Dst.
 
-%% mk_test(Src, Dst) -> #test{src=Src, dst=Dst}.
+mk_test(Src, Dst) -> #test{src=Src, dst=Dst}.
 test_src(#test{src=Src}) -> Src.
 test_dst(#test{dst=Dst}) -> Dst.
 
diff --git a/lib/hipe/x86/hipe_x86_assemble.erl b/lib/hipe/x86/hipe_x86_assemble.erl
index e21223a5b1..e692ff0ebb 100644
--- a/lib/hipe/x86/hipe_x86_assemble.erl
+++ b/lib/hipe/x86/hipe_x86_assemble.erl
@@ -599,10 +599,20 @@ temp_to_xmm(#x86_temp{reg=Reg}) ->
   {xmm, Reg}. 
 
 -ifdef(HIPE_AMD64).
+temp_to_rm8(#x86_temp{reg=Reg}) ->
+  {rm8, ?HIPE_X86_ENCODE:rm_reg(Reg)}.
 temp_to_rm64(#x86_temp{reg=Reg}) ->
   {rm64, hipe_amd64_encode:rm_reg(Reg)}.
+-else.
+temp_to_rm8(#x86_temp{reg=Reg}) ->
+  true = ?HIPE_X86_ENCODE:reg_has_8bit(Reg),
+  {rm8, ?HIPE_X86_ENCODE:rm_reg(Reg)}.
+temp_to_rm16(#x86_temp{reg=Reg}) ->
+  {rm16, ?HIPE_X86_ENCODE:rm_reg(Reg)}.
 -endif.
 
+temp_to_rm32(#x86_temp{reg=Reg}) ->
+  {rm32, ?HIPE_X86_ENCODE:rm_reg(Reg)}.
 temp_to_rmArch(#x86_temp{reg=Reg}) ->
   {?RMArch, ?HIPE_X86_ENCODE:rm_reg(Reg)}.
 temp_to_rm64fp(#x86_temp{reg=Reg}) ->
@@ -878,15 +888,29 @@ resolve_alu_args(Src, Dst, Context) ->
 %%% test
 resolve_test_args(Src, Dst, Context) ->
   case Src of
-    #x86_imm{} -> % imm8 not allowed
-      {_ImmSize,ImmValue} = translate_imm(Src, Context, false),
-      NewDst =
-	case Dst of
-	  #x86_temp{reg=0} -> ?EAX;
-	  #x86_temp{} -> temp_to_rmArch(Dst);
-	  #x86_mem{} -> mem_to_rmArch(Dst)
-	end,
-      {NewDst, {imm32,ImmValue}};
+    %% Since we're using an 8-bit instruction, the immediate is not sign
+    %% extended. Thus, we can use immediates up to 255.
+    #x86_imm{value=ImmVal}
+      when is_integer(ImmVal), ImmVal >= 0, ImmVal =< 255 ->
+      Imm = {imm8, ImmVal},
+      case Dst of
+	#x86_temp{reg=0} -> {al, Imm};
+	#x86_temp{} -> resolve_test_imm8_reg(Imm, Dst);
+	#x86_mem{} -> {mem_to_rm8(Dst), Imm}
+      end;
+    #x86_imm{value=ImmVal} when is_integer(ImmVal), ImmVal >= 0 ->
+      {case Dst of
+	 #x86_temp{reg=0} -> eax;
+	 #x86_temp{} -> temp_to_rm32(Dst);
+	 #x86_mem{} -> mem_to_rm32(Dst)
+       end, {imm32, ImmVal}};
+    #x86_imm{} -> % Negative ImmVal; use word-sized instr, imm32
+      {_, ImmVal} = translate_imm(Src, Context, false),
+      {case Dst of
+	 #x86_temp{reg=0} -> ?EAX;
+	 #x86_temp{} -> temp_to_rmArch(Dst);
+	 #x86_mem{} -> mem_to_rmArch(Dst)
+       end, {imm32, ImmVal}};
     #x86_temp{} ->
       NewDst =
 	case Dst of
@@ -896,6 +920,18 @@ resolve_test_args(Src, Dst, Context) ->
       {NewDst, temp_to_regArch(Src)}
   end.
 
+-ifdef(HIPE_AMD64).
+resolve_test_imm8_reg(Imm, Dst) -> {temp_to_rm8(Dst), Imm}.
+-else.
+resolve_test_imm8_reg(Imm = {imm8, ImmVal}, Dst = #x86_temp{reg=Reg}) ->
+  case ?HIPE_X86_ENCODE:reg_has_8bit(Reg) of
+    true -> {temp_to_rm8(Dst), Imm};
+    false ->
+      %% Register does not exist in 8-bit version; use 16-bit instead
+      {temp_to_rm16(Dst), {imm16, ImmVal}}
+  end.
+-endif.
+
 %%% shifts
 resolve_shift_args(Src, Dst, Context) ->
   RM32 =
diff --git a/lib/hipe/x86/hipe_x86_defuse.erl b/lib/hipe/x86/hipe_x86_defuse.erl
index 4455def74e..ab26370a80 100644
--- a/lib/hipe/x86/hipe_x86_defuse.erl
+++ b/lib/hipe/x86/hipe_x86_defuse.erl
@@ -60,7 +60,7 @@ insn_def(I) ->
     #pseudo_tailcall_prepare{} -> tailcall_clobbered();
     #shift{dst=Dst} -> dst_def(Dst);
     %% call, cmp, comment, jcc, jmp_fun, jmp_label, jmp_switch, label
-    %% pseudo_jcc, pseudo_tailcall, push, ret
+    %% pseudo_jcc, pseudo_tailcall, push, ret, test
     _ -> []
   end.
 
@@ -120,6 +120,7 @@ insn_use(I) ->
     #push{src=Src} -> addtemp(Src, []);
     #ret{} -> [hipe_x86:mk_temp(?HIPE_X86_REGISTERS:?RV(), 'tagged')];
     #shift{src=Src,dst=Dst} -> addtemp(Src, addtemp(Dst, []));
+    #test{src=Src, dst=Dst} -> addtemp(Src, addtemp(Dst, []));
     %% comment, jcc, jmp_label, label, pseudo_jcc, pseudo_tailcall_prepare
     _ -> []
   end.
diff --git a/lib/hipe/x86/hipe_x86_encode.erl b/lib/hipe/x86/hipe_x86_encode.erl
index 3b7be86608..2d1663d0d6 100644
--- a/lib/hipe/x86/hipe_x86_encode.erl
+++ b/lib/hipe/x86/hipe_x86_encode.erl
@@ -65,6 +65,7 @@
 	 cc/1,
 	 % 8-bit registers
 	 %% al/0, cl/0, dl/0, bl/0, ah/0, ch/0, dh/0, bh/0,
+	 reg_has_8bit/1,
 	 % 32-bit registers
 	 %% eax/0, ecx/0, edx/0, ebx/0, esp/0, ebp/0, esi/0, edi/0,
 	 % operands
@@ -143,6 +144,8 @@ cc(g) -> ?CC_G.
 %% dh() -> ?DH.
 %% bh() -> ?BH.
 
+reg_has_8bit(Reg) -> Reg =< ?BL.
+
 %%% 32-bit registers
 
 -define(EAX, 2#000).
@@ -700,8 +703,16 @@ shd_op_sizeof(Opnds) ->
 
 test_encode(Opnds) ->
     case Opnds of
+	{al, {imm8,Imm8}} ->
+	    [16#A8, Imm8];
+	{ax, {imm16,Imm16}} ->
+	    [?PFX_OPND, 16#A9 | le16(Imm16, [])];
 	{eax, {imm32,Imm32}} ->
 	    [16#A9 | le32(Imm32, [])];
+	{{rm8,RM8}, {imm8,Imm8}} ->
+	    [16#F6 | encode_rm(RM8, 2#000, [Imm8])];
+	{{rm16,RM16}, {imm16,Imm16}} ->
+	    [?PFX_OPND, 16#F7 | encode_rm(RM16, 2#000, le16(Imm16, []))];
 	{{rm32,RM32}, {imm32,Imm32}} ->
 	    [16#F7 | encode_rm(RM32, 2#000, le32(Imm32, []))];
 	{{rm32,RM32}, {reg32,Reg32}} ->
@@ -710,8 +721,16 @@ test_encode(Opnds) ->
 
 test_sizeof(Opnds) ->
     case Opnds of
+	{al, {imm8,_}} ->
+	    1 + 1;
+	{ax, {imm16,_}} ->
+	    2 + 2;
 	{eax, {imm32,_}} ->
 	    1 + 4;
+	{{rm8,RM8}, {imm8,_}} ->
+	    1 + sizeof_rm(RM8) + 1;
+	{{rm16,RM16}, {imm16,_}} ->
+	    2 + sizeof_rm(RM16) + 2;
 	{{rm32,RM32}, {imm32,_}} ->
 	    1 + sizeof_rm(RM32) + 4;
 	{{rm32,RM32}, {reg32,_}} ->
@@ -1283,7 +1302,11 @@ dotest1(OS) ->
     t(OS,'sub',{RM32,Imm8}),
     t(OS,'sub',{RM32,Reg32}),
     t(OS,'sub',{Reg32,RM32}),
+    t(OS,'test',{al,Imm8}),
+    t(OS,'test',{ax,Imm16}),
     t(OS,'test',{eax,Imm32}),
+    t(OS,'test',{RM8,Imm8}),
+    t(OS,'test',{RM16,Imm16}),
     t(OS,'test',{RM32,Imm32}),
     t(OS,'test',{RM32,Reg32}),
     t(OS,'xor',{eax,Imm32}),
diff --git a/lib/hipe/x86/hipe_x86_frame.erl b/lib/hipe/x86/hipe_x86_frame.erl
index fc782571bf..17253ad46f 100644
--- a/lib/hipe/x86/hipe_x86_frame.erl
+++ b/lib/hipe/x86/hipe_x86_frame.erl
@@ -116,6 +116,8 @@ do_insn(I, LiveOut, Context, FPoff) ->
       {do_ret(I, Context, FPoff), context_framesize(Context)};
     #shift{} ->
       {[do_shift(I, Context, FPoff)], FPoff};
+    #test{} ->
+      {[do_test(I, Context, FPoff)], FPoff};
     _ ->	% comment, jmp, label, pseudo_jcc, pseudo_tailcall_prepare
       {[I], FPoff}
   end.
@@ -188,6 +190,12 @@ do_shift(I, Context, FPoff) ->
   Dst = conv_opnd(Dst0, FPoff, Context),
   I#shift{src=Src,dst=Dst}.
 
+do_test(I, Context, FPoff) ->
+  #test{src=Src0,dst=Dst0} = I,
+  Src = conv_opnd(Src0, FPoff, Context),
+  Dst = conv_opnd(Dst0, FPoff, Context),
+  I#test{src=Src,dst=Dst}.
+
 conv_opnd(Opnd, FPoff, Context) ->
   case opnd_is_pseudo(Opnd) of
     false ->
diff --git a/lib/hipe/x86/hipe_x86_postpass.erl b/lib/hipe/x86/hipe_x86_postpass.erl
index 4515822a34..f88a841cca 100644
--- a/lib/hipe/x86/hipe_x86_postpass.erl
+++ b/lib/hipe/x86/hipe_x86_postpass.erl
@@ -120,19 +120,15 @@ peep([#move{src=Src1, dst=Dst},
 
 %% ElimCmp0
 %% --------
-peep([C=#cmp{src=Src, dst=Dst},J=#jcc{cc=Cond, label=Lab}|Insns],Res,Lst) ->
-    case (((Src =:= #x86_imm{value=0}) or (Dst =:= #x86_imm{value=0})) and
-	  ((Cond =:= 'eq') or (Cond =:= 'neq'))) of
-	true ->
-	    Src2 = case Src of #x86_imm{value=0} -> Src; _ -> Dst end, 
-	    Cond2 = case Cond of 'eq' -> 'z'; 'neq' -> 'nz' end,
-	    Test = #test{src=Src2, dst=#x86_imm{value=0}},
-	    Jump = #jcc{cc=Cond2, label=Lab},
-	    peep(Insns, [Jump, Test|Res], [elimCmp0|Lst]);
-	_ ->
-	    peep(Insns, [J,C|Res], Lst)
-    end;
-
+peep([#cmp{src=#x86_imm{value=0}, dst=Dst=#x86_temp{}}|Insns],Res,Lst) ->
+  %% TEST leaves the adjust flag undefined, whereas CMP sets it properly (in
+  %% this case to 0). However, since HiPE does not use any instructions that
+  %% read the adjust flag, we can do this transform safely.
+  peep(Insns, [#test{src=Dst, dst=Dst} | Res], [elimCmp0_1|Lst]);
+peep([#cmp{src=Src=#x86_temp{}, dst=#x86_imm{value=0}},
+      J=#jcc{cc=Cond}|Insns],Res,Lst)
+  when Cond =:= 'e'; Cond =:= 'ne' -> % We're commuting the comparison
+  peep(Insns, [J, #test{src=Src, dst=Src} | Res], [elimCmp0_2|Lst]);
 
 %% ElimCmpTest
 %% -----------
@@ -187,6 +183,18 @@ peep([B = #alu{aluop=Op,src=#x86_imm{value=Val},dst=Dst}|Insns], Res, Lst) ->
 	    peep(Insns, [B|Res], Lst)
     end;
 
+%% LeaToAdd
+%% This rule transforms lea into add when the destination is the same as one of
+%% the operands. Sound because lea is never used where the condition codes are
+%% live (and would be clobbered by add).
+%% ----------
+peep([#lea{mem=#x86_mem{base=#x86_temp{reg=DstR},off=Src},
+	   temp=Dst=#x86_temp{reg=DstR}}|Insns], Res, Lst) ->
+     peep(Insns, [#alu{aluop='add',src=Src,dst=Dst}|Res], [leaToAdd|Lst]);
+peep([#lea{mem=#x86_mem{base=Src,off=#x86_temp{reg=DstR}},
+	   temp=Dst=#x86_temp{reg=DstR}}|Insns], Res, Lst) ->
+     peep(Insns, [#alu{aluop='add',src=Src,dst=Dst}|Res], [leaToAdd|Lst]);
+
 %% SubToDec
 %% This rule turns "subl $1,Dst; jl Lab" into "decl Dst; jl Lab", which
 %% changes reduction counter tests to use decl instead of subl.
@@ -209,6 +217,11 @@ trivial_goto_elimination(Insns) -> goto_elim(Insns, []).
 
 goto_elim([#jmp_label{label=Label}, I = #label{label=Label}|Insns], Res) ->
   goto_elim([I|Insns], Res);
+goto_elim([#jcc{cc=CC, label=Label} = IJCC,
+	   #jmp_label{label=BranchTgt},
+	   #label{label=Label} = ILBL|Insns], Res) ->
+  goto_elim([IJCC#jcc{cc=hipe_x86:neg_cc(CC), label=BranchTgt},
+	     ILBL|Insns], Res);
 goto_elim([I | Insns], Res) ->
   goto_elim(Insns, [I|Res]);
 goto_elim([], Res) ->
diff --git a/lib/hipe/x86/hipe_x86_pp.erl b/lib/hipe/x86/hipe_x86_pp.erl
index ff26a31877..942201a051 100644
--- a/lib/hipe/x86/hipe_x86_pp.erl
+++ b/lib/hipe/x86/hipe_x86_pp.erl
@@ -188,6 +188,12 @@ pp_insn(Dev, I, Pre) ->
       io:format(Dev, ", ", []),
       pp_dst(Dev, Dst),
       io:format(Dev, "\n", []);
+    #test{src=Src, dst=Dst} ->
+      io:format(Dev, "\ttest ", []),
+      pp_src(Dev, Src),
+      io:format(Dev, ", ", []),
+      pp_dst(Dev, Dst),
+      io:format(Dev, "\n", []);
     #fp_binop{src=Src, dst=Dst, op=Op} ->
       io:format(Dev, "\t~s ", [Op]),
       pp_dst(Dev, Dst),
diff --git a/lib/hipe/x86/hipe_x86_ra_finalise.erl b/lib/hipe/x86/hipe_x86_ra_finalise.erl
index edfd7b332c..1fd617570a 100644
--- a/lib/hipe/x86/hipe_x86_ra_finalise.erl
+++ b/lib/hipe/x86/hipe_x86_ra_finalise.erl
@@ -162,6 +162,10 @@ ra_insn(I, Map, FpMap) ->
       Src = ra_opnd(Src0, Map),
       Dst = ra_opnd(Dst0, Map),
       I#shift{src=Src,dst=Dst};
+    #test{src=Src0,dst=Dst0} ->
+      Src = ra_opnd(Src0, Map),
+      Dst = ra_opnd(Dst0, Map),
+      I#test{src=Src,dst=Dst};
     _ ->
       exit({?MODULE,ra_insn,I})
   end.
diff --git a/lib/hipe/x86/hipe_x86_ra_naive.erl b/lib/hipe/x86/hipe_x86_ra_naive.erl
index 35de692e07..9371e4b1a5 100644
--- a/lib/hipe/x86/hipe_x86_ra_naive.erl
+++ b/lib/hipe/x86/hipe_x86_ra_naive.erl
@@ -100,6 +100,8 @@ do_insn(I) ->	% Insn -> Insn list
       do_fp_binop(I);
     #shift{} ->
       do_shift(I);
+    #test{} ->
+      do_test(I);
     #label{} ->
       [I];
     #pseudo_jcc{} ->
@@ -310,6 +312,11 @@ do_shift(I) ->
       FixDst ++ [I#shift{dst=Dst}]
   end.
 
+do_test(I) ->
+  #test{src=Src0,dst=Dst0} = I,
+  {FixSrc, Src, FixDst, Dst} = do_binary(Src0, Dst0),
+  FixSrc ++ FixDst ++ [I#test{src=Src,dst=Dst}].
+
 %%% Fix the operands of a binary op.
 %%% 1. remove pseudos from any explicit memory operands
 %%% 2. if both operands are (implicit or explicit) memory operands,
diff --git a/lib/hipe/x86/hipe_x86_ra_postconditions.erl b/lib/hipe/x86/hipe_x86_ra_postconditions.erl
index f496b71828..e7c397b5b7 100644
--- a/lib/hipe/x86/hipe_x86_ra_postconditions.erl
+++ b/lib/hipe/x86/hipe_x86_ra_postconditions.erl
@@ -83,6 +83,8 @@ do_insn(I, TempMap, Strategy) ->	% Insn -> {Insn list, DidSpill}
       do_fmove(I, TempMap, Strategy);
     #shift{} ->
       do_shift(I, TempMap, Strategy);
+    #test{} ->
+      do_test(I, TempMap, Strategy);
     _ ->
       %% comment, jmp*, label, pseudo_call, pseudo_jcc, pseudo_tailcall,
       %% pseudo_tailcall_prepare, push, ret
@@ -308,6 +310,14 @@ do_shift(I, TempMap, Strategy) ->
       {FixDst ++ [I#shift{dst=Dst}], DidSpill}
   end.
 
+%%% Fix a test op.
+
+do_test(I, TempMap, Strategy) ->
+  #test{src=Src0,dst=Dst0} = I,
+  {FixSrc, Src, FixDst, Dst, DidSpill} =
+    do_binary(Src0, Dst0, TempMap, Strategy),
+  {FixSrc ++ FixDst ++ [I#test{src=Src,dst=Dst}], DidSpill}.
+
 %%% Fix the operands of a binary op.
 %%% 1. remove pseudos from any explicit memory operands
 %%% 2. if both operands are (implicit or explicit) memory operands,
author	Sverker Eriksson <[email protected]>	2016-11-22 12:02:07 +0100
committer	Sverker Eriksson <[email protected]>	2016-11-22 12:02:07 +0100
commit	3d7b55f946162b5a129241dbe67397784a1ba1a5 (patch)
tree	8a3809296bdfcdd16ebbf78975ea18034b22d62c
parent	9491f6727f12e37241863bd5becbd1f336ff7659 (diff)
parent	fda3c9575d77bed0250f76f17e92d18836e15d0c (diff)
download	otp-3d7b55f946162b5a129241dbe67397784a1ba1a5.tar.gz otp-3d7b55f946162b5a129241dbe67397784a1ba1a5.tar.bz2 otp-3d7b55f946162b5a129241dbe67397784a1ba1a5.zip