From 3579a706ea0c0081d7dd01291990cd8d3669f195 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20L=C3=A5ng?= <margnus1@telia.com>
Date: Wed, 9 Nov 2016 18:49:06 +0100
Subject: hipe_{x86,amd64}: Finish test instr implementation

---
 lib/hipe/amd64/hipe_amd64_encode.erl        | 38 ++++++++++++++++-------
 lib/hipe/x86/hipe_x86_assemble.erl          | 47 +++++++++++++++++++++++------
 lib/hipe/x86/hipe_x86_defuse.erl            |  3 +-
 lib/hipe/x86/hipe_x86_encode.erl            | 23 ++++++++++++++
 lib/hipe/x86/hipe_x86_frame.erl             |  8 +++++
 lib/hipe/x86/hipe_x86_pp.erl                |  6 ++++
 lib/hipe/x86/hipe_x86_ra_finalise.erl       |  4 +++
 lib/hipe/x86/hipe_x86_ra_naive.erl          |  7 +++++
 lib/hipe/x86/hipe_x86_ra_postconditions.erl | 10 ++++++
 9 files changed, 125 insertions(+), 21 deletions(-)

(limited to 'lib')

diff --git a/lib/hipe/amd64/hipe_amd64_encode.erl b/lib/hipe/amd64/hipe_amd64_encode.erl
index df15732cea..c41eaa3c6a 100644
--- a/lib/hipe/amd64/hipe_amd64_encode.erl
+++ b/lib/hipe/amd64/hipe_amd64_encode.erl
@@ -63,7 +63,7 @@
 -export([% condition codes
 	 cc/1,
 	 % 8-bit registers
-	 %% al/0, cl/0, dl/0, bl/0, ah/0, ch/0, dh/0, bh/0,
+	 %% al/0, cl/0, dl/0, bl/0,
 	 % 32-bit registers
 	 %% eax/0, ecx/0, edx/0, ebx/0, esp/0, ebp/0, esi/0, edi/0,
 	 % operands
@@ -127,19 +127,15 @@ cc(g) -> ?CC_G.
 -define(CL, 2#001).
 -define(DL, 2#010).
 -define(BL, 2#011).
--define(AH, 2#100).
--define(CH, 2#101).
--define(DH, 2#110).
--define(BH, 2#111).
+-define(SPL, 2#100).
+-define(BPL, 2#101).
+-define(SIL, 2#110).
+-define(DIL, 2#111).
 
 %% al() -> ?AL.
 %% cl() -> ?CL.
 %% dl() -> ?DL.
 %% bl() -> ?BL.
-%% ah() -> ?AH.
-%% ch() -> ?CH.
-%% dh() -> ?DH.
-%% bh() -> ?BH.
 
 %%% 32-bit registers
 
@@ -208,6 +204,7 @@ rex_([]) -> 0;
 rex_([{r8, Reg8}| Rest]) ->             % 8 bit registers
     case Reg8 of
 	{rm_mem, _} -> rex_(Rest);
+	{rm_reg, R} -> rex_([{r8, R} | Rest]);
 	4 -> (1 bsl 8) bor rex_(Rest);
 	5 -> (1 bsl 8) bor rex_(Rest);
 	6 -> (1 bsl 8) bor rex_(Rest);
@@ -825,12 +822,22 @@ shd_op_encode(Opcode, Opnds) ->
 
 test_encode(Opnds) ->
     case Opnds of
+	{al, {imm8,Imm8}} ->
+	    [16#A8, Imm8];
+	{ax, {imm16,Imm16}} ->
+	    [?PFX_OPND_16BITS, 16#A9 | le16(Imm16, [])];
 	{eax, {imm32,Imm32}} ->
 	    [16#A9 | le32(Imm32, [])];
+	{{rm8,RM8}, {imm8,Imm8}} ->
+	    [rex([{r8,RM8}]), 16#F6 | encode_rm(RM8, 2#000, [Imm8])];
+	{{rm16,RM16}, {imm16,Imm16}} ->
+	    [?PFX_OPND_16BITS, 16#F7 | encode_rm(RM16, 2#000, le16(Imm16, []))];
 	{{rm32,RM32}, {imm32,Imm32}} ->
 	    [16#F7 | encode_rm(RM32, 2#000, le32(Imm32, []))];
 	{{rm32,RM32}, {reg32,Reg32}} ->
-	    [16#85 | encode_rm(RM32, Reg32, [])]
+	    [16#85 | encode_rm(RM32, Reg32, [])];
+	{{rm64,RM64}, {reg64,Reg64}} ->
+	    [rex([{w,1}]), 16#85 | encode_rm(RM64, Reg64, [])]
     end.
 
 %% test_sizeof(Opnds) ->
@@ -1309,18 +1316,21 @@ dotest1(OS) ->
     Imm32 = {imm32,Word32},
     Imm16 = {imm16,Word16},
     Imm8 = {imm8,Word8},
+    RM64 = {rm64,rm_reg(?EDX)},
     RM32 = {rm32,rm_reg(?EDX)},
     RM16 = {rm16,rm_reg(?EDX)},
     RM8 = {rm8,rm_reg(?EDX)},
+    RM8REX = {rm8,rm_reg(?SIL)},
     Rel32 = {rel32,Word32},
     Rel8 = {rel8,Word8},
     Moffs32 = {moffs32,Word32},
     Moffs16 = {moffs16,Word32},
     Moffs8 = {moffs8,Word32},
     CC = {cc,?CC_G},
+    Reg64 = {reg64,?EAX},
     Reg32 = {reg32,?EAX},
     Reg16 = {reg16,?EAX},
-    Reg8 = {reg8,?AH},
+    Reg8 = {reg8,?SPL},
     EA = {ea,ea_base(?ECX)},
     % exercise each instruction definition
     t(OS,'adc',{eax,Imm32}),
@@ -1465,9 +1475,15 @@ dotest1(OS) ->
     t(OS,'sub',{RM32,Imm8}),
     t(OS,'sub',{RM32,Reg32}),
     t(OS,'sub',{Reg32,RM32}),
+    t(OS,'test',{al,Imm8}),
+    t(OS,'test',{ax,Imm16}),
     t(OS,'test',{eax,Imm32}),
+    t(OS,'test',{RM8,Imm8}),
+    t(OS,'test',{RM8REX,Imm8}),
+    t(OS,'test',{RM16,Imm16}),
     t(OS,'test',{RM32,Imm32}),
     t(OS,'test',{RM32,Reg32}),
+    t(OS,'test',{RM64,Reg64}),
     t(OS,'xor',{eax,Imm32}),
     t(OS,'xor',{RM32,Imm32}),
     t(OS,'xor',{RM32,Imm8}),
diff --git a/lib/hipe/x86/hipe_x86_assemble.erl b/lib/hipe/x86/hipe_x86_assemble.erl
index e21223a5b1..4986933f50 100644
--- a/lib/hipe/x86/hipe_x86_assemble.erl
+++ b/lib/hipe/x86/hipe_x86_assemble.erl
@@ -599,10 +599,20 @@ temp_to_xmm(#x86_temp{reg=Reg}) ->
   {xmm, Reg}. 
 
 -ifdef(HIPE_AMD64).
+temp_to_rm8(#x86_temp{reg=Reg}) ->
+  {rm8, ?HIPE_X86_ENCODE:rm_reg(Reg)}.
 temp_to_rm64(#x86_temp{reg=Reg}) ->
   {rm64, hipe_amd64_encode:rm_reg(Reg)}.
+-else.
+temp_to_rm8(#x86_temp{reg=Reg}) ->
+  true = ?HIPE_X86_ENCODE:reg_has_8bit(Reg),
+  {rm8, ?HIPE_X86_ENCODE:rm_reg(Reg)}.
+temp_to_rm16(#x86_temp{reg=Reg}) ->
+  {rm16, ?HIPE_X86_ENCODE:rm_reg(Reg)}.
 -endif.
 
+temp_to_rm32(#x86_temp{reg=Reg}) ->
+  {rm32, ?HIPE_X86_ENCODE:rm_reg(Reg)}.
 temp_to_rmArch(#x86_temp{reg=Reg}) ->
   {?RMArch, ?HIPE_X86_ENCODE:rm_reg(Reg)}.
 temp_to_rm64fp(#x86_temp{reg=Reg}) ->
@@ -878,15 +888,22 @@ resolve_alu_args(Src, Dst, Context) ->
 %%% test
 resolve_test_args(Src, Dst, Context) ->
   case Src of
-    #x86_imm{} -> % imm8 not allowed
-      {_ImmSize,ImmValue} = translate_imm(Src, Context, false),
-      NewDst =
-	case Dst of
-	  #x86_temp{reg=0} -> ?EAX;
-	  #x86_temp{} -> temp_to_rmArch(Dst);
-	  #x86_mem{} -> mem_to_rmArch(Dst)
-	end,
-      {NewDst, {imm32,ImmValue}};
+    #x86_imm{} ->
+      Imm = translate_imm(Src, Context, false),
+      case Imm of
+	{imm8,_} ->
+	  case Dst of
+	    #x86_temp{reg=0} -> {al, Imm};
+	    #x86_temp{} -> resolve_test_imm8_reg(Imm, Dst);
+	    #x86_mem{} -> {mem_to_rm8(Dst), Imm}
+	  end;
+	{imm32,_} ->
+	  {case Dst of
+	     #x86_temp{reg=0} -> eax;
+	     #x86_temp{} -> temp_to_rm32(Dst);
+	     #x86_mem{} -> mem_to_rm32(Dst)
+	   end, Imm}
+      end;
     #x86_temp{} ->
       NewDst =
 	case Dst of
@@ -896,6 +913,18 @@ resolve_test_args(Src, Dst, Context) ->
       {NewDst, temp_to_regArch(Src)}
   end.
 
+-ifdef(HIPE_AMD64).
+resolve_test_imm8_reg(Imm, Dst) -> {temp_to_rm8(Dst), Imm}.
+-else.
+resolve_test_imm8_reg(Imm = {imm8, ImmVal}, Dst = #x86_temp{reg=Reg}) ->
+  case ?HIPE_X86_ENCODE:reg_has_8bit(Reg) of
+    true -> {temp_to_rm8(Dst), Imm};
+    false ->
+      %% Register does not exist in 8-bit version; use 16-bit instead
+      {temp_to_rm16(Dst), {imm16, ImmVal}}
+  end.
+-endif.
+
 %%% shifts
 resolve_shift_args(Src, Dst, Context) ->
   RM32 =
diff --git a/lib/hipe/x86/hipe_x86_defuse.erl b/lib/hipe/x86/hipe_x86_defuse.erl
index 4455def74e..ab26370a80 100644
--- a/lib/hipe/x86/hipe_x86_defuse.erl
+++ b/lib/hipe/x86/hipe_x86_defuse.erl
@@ -60,7 +60,7 @@ insn_def(I) ->
     #pseudo_tailcall_prepare{} -> tailcall_clobbered();
     #shift{dst=Dst} -> dst_def(Dst);
     %% call, cmp, comment, jcc, jmp_fun, jmp_label, jmp_switch, label
-    %% pseudo_jcc, pseudo_tailcall, push, ret
+    %% pseudo_jcc, pseudo_tailcall, push, ret, test
     _ -> []
   end.
 
@@ -120,6 +120,7 @@ insn_use(I) ->
     #push{src=Src} -> addtemp(Src, []);
     #ret{} -> [hipe_x86:mk_temp(?HIPE_X86_REGISTERS:?RV(), 'tagged')];
     #shift{src=Src,dst=Dst} -> addtemp(Src, addtemp(Dst, []));
+    #test{src=Src, dst=Dst} -> addtemp(Src, addtemp(Dst, []));
     %% comment, jcc, jmp_label, label, pseudo_jcc, pseudo_tailcall_prepare
     _ -> []
   end.
diff --git a/lib/hipe/x86/hipe_x86_encode.erl b/lib/hipe/x86/hipe_x86_encode.erl
index 3b7be86608..2d1663d0d6 100644
--- a/lib/hipe/x86/hipe_x86_encode.erl
+++ b/lib/hipe/x86/hipe_x86_encode.erl
@@ -65,6 +65,7 @@
 	 cc/1,
 	 % 8-bit registers
 	 %% al/0, cl/0, dl/0, bl/0, ah/0, ch/0, dh/0, bh/0,
+	 reg_has_8bit/1,
 	 % 32-bit registers
 	 %% eax/0, ecx/0, edx/0, ebx/0, esp/0, ebp/0, esi/0, edi/0,
 	 % operands
@@ -143,6 +144,8 @@ cc(g) -> ?CC_G.
 %% dh() -> ?DH.
 %% bh() -> ?BH.
 
+reg_has_8bit(Reg) -> Reg =< ?BL.
+
 %%% 32-bit registers
 
 -define(EAX, 2#000).
@@ -700,8 +703,16 @@ shd_op_sizeof(Opnds) ->
 
 test_encode(Opnds) ->
     case Opnds of
+	{al, {imm8,Imm8}} ->
+	    [16#A8, Imm8];
+	{ax, {imm16,Imm16}} ->
+	    [?PFX_OPND, 16#A9 | le16(Imm16, [])];
 	{eax, {imm32,Imm32}} ->
 	    [16#A9 | le32(Imm32, [])];
+	{{rm8,RM8}, {imm8,Imm8}} ->
+	    [16#F6 | encode_rm(RM8, 2#000, [Imm8])];
+	{{rm16,RM16}, {imm16,Imm16}} ->
+	    [?PFX_OPND, 16#F7 | encode_rm(RM16, 2#000, le16(Imm16, []))];
 	{{rm32,RM32}, {imm32,Imm32}} ->
 	    [16#F7 | encode_rm(RM32, 2#000, le32(Imm32, []))];
 	{{rm32,RM32}, {reg32,Reg32}} ->
@@ -710,8 +721,16 @@ test_encode(Opnds) ->
 
 test_sizeof(Opnds) ->
     case Opnds of
+	{al, {imm8,_}} ->
+	    1 + 1;
+	{ax, {imm16,_}} ->
+	    2 + 2;
 	{eax, {imm32,_}} ->
 	    1 + 4;
+	{{rm8,RM8}, {imm8,_}} ->
+	    1 + sizeof_rm(RM8) + 1;
+	{{rm16,RM16}, {imm16,_}} ->
+	    2 + sizeof_rm(RM16) + 2;
 	{{rm32,RM32}, {imm32,_}} ->
 	    1 + sizeof_rm(RM32) + 4;
 	{{rm32,RM32}, {reg32,_}} ->
@@ -1283,7 +1302,11 @@ dotest1(OS) ->
     t(OS,'sub',{RM32,Imm8}),
     t(OS,'sub',{RM32,Reg32}),
     t(OS,'sub',{Reg32,RM32}),
+    t(OS,'test',{al,Imm8}),
+    t(OS,'test',{ax,Imm16}),
     t(OS,'test',{eax,Imm32}),
+    t(OS,'test',{RM8,Imm8}),
+    t(OS,'test',{RM16,Imm16}),
     t(OS,'test',{RM32,Imm32}),
     t(OS,'test',{RM32,Reg32}),
     t(OS,'xor',{eax,Imm32}),
diff --git a/lib/hipe/x86/hipe_x86_frame.erl b/lib/hipe/x86/hipe_x86_frame.erl
index fc782571bf..17253ad46f 100644
--- a/lib/hipe/x86/hipe_x86_frame.erl
+++ b/lib/hipe/x86/hipe_x86_frame.erl
@@ -116,6 +116,8 @@ do_insn(I, LiveOut, Context, FPoff) ->
       {do_ret(I, Context, FPoff), context_framesize(Context)};
     #shift{} ->
       {[do_shift(I, Context, FPoff)], FPoff};
+    #test{} ->
+      {[do_test(I, Context, FPoff)], FPoff};
     _ ->	% comment, jmp, label, pseudo_jcc, pseudo_tailcall_prepare
       {[I], FPoff}
   end.
@@ -188,6 +190,12 @@ do_shift(I, Context, FPoff) ->
   Dst = conv_opnd(Dst0, FPoff, Context),
   I#shift{src=Src,dst=Dst}.
 
+do_test(I, Context, FPoff) ->
+  #test{src=Src0,dst=Dst0} = I,
+  Src = conv_opnd(Src0, FPoff, Context),
+  Dst = conv_opnd(Dst0, FPoff, Context),
+  I#test{src=Src,dst=Dst}.
+
 conv_opnd(Opnd, FPoff, Context) ->
   case opnd_is_pseudo(Opnd) of
     false ->
diff --git a/lib/hipe/x86/hipe_x86_pp.erl b/lib/hipe/x86/hipe_x86_pp.erl
index ff26a31877..942201a051 100644
--- a/lib/hipe/x86/hipe_x86_pp.erl
+++ b/lib/hipe/x86/hipe_x86_pp.erl
@@ -188,6 +188,12 @@ pp_insn(Dev, I, Pre) ->
       io:format(Dev, ", ", []),
       pp_dst(Dev, Dst),
       io:format(Dev, "\n", []);
+    #test{src=Src, dst=Dst} ->
+      io:format(Dev, "\ttest ", []),
+      pp_src(Dev, Src),
+      io:format(Dev, ", ", []),
+      pp_dst(Dev, Dst),
+      io:format(Dev, "\n", []);
     #fp_binop{src=Src, dst=Dst, op=Op} ->
       io:format(Dev, "\t~s ", [Op]),
       pp_dst(Dev, Dst),
diff --git a/lib/hipe/x86/hipe_x86_ra_finalise.erl b/lib/hipe/x86/hipe_x86_ra_finalise.erl
index edfd7b332c..1fd617570a 100644
--- a/lib/hipe/x86/hipe_x86_ra_finalise.erl
+++ b/lib/hipe/x86/hipe_x86_ra_finalise.erl
@@ -162,6 +162,10 @@ ra_insn(I, Map, FpMap) ->
       Src = ra_opnd(Src0, Map),
       Dst = ra_opnd(Dst0, Map),
       I#shift{src=Src,dst=Dst};
+    #test{src=Src0,dst=Dst0} ->
+      Src = ra_opnd(Src0, Map),
+      Dst = ra_opnd(Dst0, Map),
+      I#test{src=Src,dst=Dst};
     _ ->
       exit({?MODULE,ra_insn,I})
   end.
diff --git a/lib/hipe/x86/hipe_x86_ra_naive.erl b/lib/hipe/x86/hipe_x86_ra_naive.erl
index 35de692e07..9371e4b1a5 100644
--- a/lib/hipe/x86/hipe_x86_ra_naive.erl
+++ b/lib/hipe/x86/hipe_x86_ra_naive.erl
@@ -100,6 +100,8 @@ do_insn(I) ->	% Insn -> Insn list
       do_fp_binop(I);
     #shift{} ->
       do_shift(I);
+    #test{} ->
+      do_test(I);
     #label{} ->
       [I];
     #pseudo_jcc{} ->
@@ -310,6 +312,11 @@ do_shift(I) ->
       FixDst ++ [I#shift{dst=Dst}]
   end.
 
+do_test(I) ->
+  #test{src=Src0,dst=Dst0} = I,
+  {FixSrc, Src, FixDst, Dst} = do_binary(Src0, Dst0),
+  FixSrc ++ FixDst ++ [I#test{src=Src,dst=Dst}].
+
 %%% Fix the operands of a binary op.
 %%% 1. remove pseudos from any explicit memory operands
 %%% 2. if both operands are (implicit or explicit) memory operands,
diff --git a/lib/hipe/x86/hipe_x86_ra_postconditions.erl b/lib/hipe/x86/hipe_x86_ra_postconditions.erl
index f496b71828..e7c397b5b7 100644
--- a/lib/hipe/x86/hipe_x86_ra_postconditions.erl
+++ b/lib/hipe/x86/hipe_x86_ra_postconditions.erl
@@ -83,6 +83,8 @@ do_insn(I, TempMap, Strategy) ->	% Insn -> {Insn list, DidSpill}
       do_fmove(I, TempMap, Strategy);
     #shift{} ->
       do_shift(I, TempMap, Strategy);
+    #test{} ->
+      do_test(I, TempMap, Strategy);
     _ ->
       %% comment, jmp*, label, pseudo_call, pseudo_jcc, pseudo_tailcall,
       %% pseudo_tailcall_prepare, push, ret
@@ -308,6 +310,14 @@ do_shift(I, TempMap, Strategy) ->
       {FixDst ++ [I#shift{dst=Dst}], DidSpill}
   end.
 
+%%% Fix a test op.
+
+do_test(I, TempMap, Strategy) ->
+  #test{src=Src0,dst=Dst0} = I,
+  {FixSrc, Src, FixDst, Dst, DidSpill} =
+    do_binary(Src0, Dst0, TempMap, Strategy),
+  {FixSrc ++ FixDst ++ [I#test{src=Src,dst=Dst}], DidSpill}.
+
 %%% Fix the operands of a binary op.
 %%% 1. remove pseudos from any explicit memory operands
 %%% 2. if both operands are (implicit or explicit) memory operands,
-- 
cgit v1.2.3


From bd898fab5d86ff44ce3129db9a06a5c709719392 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20L=C3=A5ng?= <margnus1@telia.com>
Date: Tue, 15 Nov 2016 13:37:19 +0100
Subject: hipe_x86: Fix&activate ElimCmp0 peephole rule

---
 lib/hipe/x86/hipe_x86_postpass.erl | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

(limited to 'lib')

diff --git a/lib/hipe/x86/hipe_x86_postpass.erl b/lib/hipe/x86/hipe_x86_postpass.erl
index 4515822a34..bd232041b6 100644
--- a/lib/hipe/x86/hipe_x86_postpass.erl
+++ b/lib/hipe/x86/hipe_x86_postpass.erl
@@ -120,19 +120,15 @@ peep([#move{src=Src1, dst=Dst},
 
 %% ElimCmp0
 %% --------
-peep([C=#cmp{src=Src, dst=Dst},J=#jcc{cc=Cond, label=Lab}|Insns],Res,Lst) ->
-    case (((Src =:= #x86_imm{value=0}) or (Dst =:= #x86_imm{value=0})) and
-	  ((Cond =:= 'eq') or (Cond =:= 'neq'))) of
-	true ->
-	    Src2 = case Src of #x86_imm{value=0} -> Src; _ -> Dst end, 
-	    Cond2 = case Cond of 'eq' -> 'z'; 'neq' -> 'nz' end,
-	    Test = #test{src=Src2, dst=#x86_imm{value=0}},
-	    Jump = #jcc{cc=Cond2, label=Lab},
-	    peep(Insns, [Jump, Test|Res], [elimCmp0|Lst]);
-	_ ->
-	    peep(Insns, [J,C|Res], Lst)
-    end;
-
+peep([#cmp{src=#x86_imm{value=0}, dst=Dst=#x86_temp{}}|Insns],Res,Lst) ->
+  %% TEST leaves the adjust flag undefined, whereas CMP sets it properly (in
+  %% this case to 0). However, since HiPE does not use any instructions that
+  %% read the adjust flag, we can do this transform safely.
+  peep(Insns, [#test{src=Dst, dst=Dst} | Res], [elimCmp0_1|Lst]);
+peep([#cmp{src=Src=#x86_temp{}, dst=#x86_imm{value=0}},
+      J=#jcc{cc=Cond}|Insns],Res,Lst)
+  when Cond =:= 'e'; Cond =:= 'ne' -> % We're commuting the comparison
+  peep(Insns, [J, #test{src=Src, dst=Src} | Res], [elimCmp0_2|Lst]);
 
 %% ElimCmpTest
 %% -----------
-- 
cgit v1.2.3


From ca0fbe891d0f8278b4824d6b2c5db4cd01fcef5c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20L=C3=A5ng?= <margnus1@telia.com>
Date: Wed, 9 Nov 2016 18:46:19 +0100
Subject: hipe_rtl: unify branch and alub

branch and alub overlap in their use cases, but the backends rely on
knowing that the result is unused in their lowering of branch. By
extending alub so that the destination is optional, it can fully replace
branch.

This simplifies rtl by reducing code duplication and the number of
instructions.

Also, in the x86 and arm backends, we can now use 'test' and
{'tst','mvn','teq'} to lower some alubs without destinations. This is
particularly good for x86, as sequences such as 'is_boxed' type tests
now get shorter (both from not needing a mov to copy the variable, but
also from the fact that 'testb' encodes shorter than 'andq').
---
 lib/hipe/arm/hipe_rtl_to_arm.erl           | 109 ++++++++++++----------
 lib/hipe/llvm/hipe_rtl_to_llvm.erl         |  78 ++++++++--------
 lib/hipe/ppc/hipe_rtl_to_ppc.erl           |  89 +++++++++---------
 lib/hipe/rtl/hipe_rtl.erl                  |  77 ++++------------
 lib/hipe/rtl/hipe_rtl.hrl                  |   1 -
 lib/hipe/rtl/hipe_rtl_binary_construct.erl |   9 +-
 lib/hipe/rtl/hipe_rtl_binary_match.erl     |   5 +-
 lib/hipe/rtl/hipe_rtl_cfg.erl              |   7 +-
 lib/hipe/rtl/hipe_rtl_lcm.erl              |   1 -
 lib/hipe/rtl/hipe_rtl_ssa_const_prop.erl   |  82 +++--------------
 lib/hipe/rtl/hipe_tagscheme.erl            |  30 +++----
 lib/hipe/sparc/hipe_rtl_to_sparc.erl       | 139 +++++------------------------
 lib/hipe/x86/hipe_rtl_to_x86.erl           |  70 +++++++++------
 lib/hipe/x86/hipe_x86.erl                  |   4 +-
 14 files changed, 269 insertions(+), 432 deletions(-)

(limited to 'lib')

diff --git a/lib/hipe/arm/hipe_rtl_to_arm.erl b/lib/hipe/arm/hipe_rtl_to_arm.erl
index 2f9181d517..c964c222aa 100644
--- a/lib/hipe/arm/hipe_rtl_to_arm.erl
+++ b/lib/hipe/arm/hipe_rtl_to_arm.erl
@@ -62,7 +62,6 @@ conv_insn(I, Map, Data) ->
   case I of
     #alu{} -> conv_alu(I, Map, Data);
     #alub{} -> conv_alub(I, Map, Data);
-    #branch{} -> conv_branch(I, Map, Data);
     #call{} -> conv_call(I, Map, Data);
     #comment{} -> conv_comment(I, Map, Data);
     #enter{} -> conv_enter(I, Map, Data);
@@ -111,6 +110,17 @@ commute_arithop(ArithOp) ->
     _ -> ArithOp
   end.
 
+conv_cmpop('add') -> 'cmn';
+conv_cmpop('sub') -> 'cmp';
+conv_cmpop('and') -> 'tst';
+conv_cmpop('xor') -> 'teq';
+conv_cmpop(_) -> none.
+
+cmpop_commutes('cmp') -> false;
+cmpop_commutes('cmn') -> true;
+cmpop_commutes('tst') -> true;
+cmpop_commutes('teq') -> true.
+
 mk_alu(S, Dst, Src1, RtlAluOp, Src2) ->
   case hipe_rtl:is_shift_op(RtlAluOp) of
     true ->
@@ -223,71 +233,77 @@ fix_aluop_imm(AluOp, Imm) -> % {FixAm1,NewAluOp,Am1}
 
 conv_alub(I, Map, Data) ->
   %% dst = src1 aluop src2; if COND goto label
-  {Dst, Map0} = conv_dst(hipe_rtl:alub_dst(I), Map),
-  {Src1, Map1} = conv_src(hipe_rtl:alub_src1(I), Map0),
-  {Src2, Map2} = conv_src(hipe_rtl:alub_src2(I), Map1),
+  {Src1, Map0} = conv_src(hipe_rtl:alub_src1(I), Map),
+  {Src2, Map1} = conv_src(hipe_rtl:alub_src2(I), Map0),
   RtlAluOp = hipe_rtl:alub_op(I),
-  Cond0 = conv_alub_cond(RtlAluOp, hipe_rtl:alub_cond(I)),
-  Cond =
-    case {RtlAluOp,Cond0} of
-      {'mul','vs'} -> 'ne';	% overflow becomes not-equal
-      {'mul','vc'} -> 'eq';	% no-overflow becomes equal
-      {'mul',_} -> exit({?MODULE,I});
-      {_,_} -> Cond0
-    end,
-  I2 = mk_pseudo_bc(
-	  Cond,
-	  hipe_rtl:alub_true_label(I),
-	  hipe_rtl:alub_false_label(I),
-	  hipe_rtl:alub_pred(I)),
-  S = true,
-  I1 = mk_alu(S, Dst, Src1, RtlAluOp, Src2),
-  {I1 ++ I2, Map2, Data}.
-
-conv_branch(I, Map, Data) ->
-  %% <unused> = src1 - src2; if COND goto label
-  {Src1, Map0} = conv_src(hipe_rtl:branch_src1(I), Map),
-  {Src2, Map1} = conv_src(hipe_rtl:branch_src2(I), Map0),
-  Cond = conv_branch_cond(hipe_rtl:branch_cond(I)),
-  I2 = mk_branch(Src1, Cond, Src2,
-		 hipe_rtl:branch_true_label(I),
-		 hipe_rtl:branch_false_label(I),
-		 hipe_rtl:branch_pred(I)),
-  {I2, Map1, Data}.
+  RtlCond = hipe_rtl:alub_cond(I),
+  HasDst = hipe_rtl:alub_has_dst(I),
+  CmpOp = conv_cmpop(RtlAluOp),
+  Cond0 = conv_alub_cond(RtlAluOp, RtlCond),
+  case (not HasDst) andalso CmpOp =/= none of
+    true ->
+      I1 = mk_branch(Src1, CmpOp, Src2, Cond0,
+		     hipe_rtl:alub_true_label(I),
+		     hipe_rtl:alub_false_label(I),
+		     hipe_rtl:alub_pred(I)),
+      {I1, Map1, Data};
+    false ->
+      {Dst, Map2} =
+	case HasDst of
+	  false -> {new_untagged_temp(), Map1};
+	  true -> conv_dst(hipe_rtl:alub_dst(I), Map1)
+	end,
+      Cond =
+	case {RtlAluOp,Cond0} of
+	  {'mul','vs'} -> 'ne';	% overflow becomes not-equal
+	  {'mul','vc'} -> 'eq';	% no-overflow becomes equal
+	  {'mul',_} -> exit({?MODULE,I});
+	  {_,_} -> Cond0
+	end,
+      I2 = mk_pseudo_bc(
+	     Cond,
+	     hipe_rtl:alub_true_label(I),
+	     hipe_rtl:alub_false_label(I),
+	     hipe_rtl:alub_pred(I)),
+      S = true,
+      I1 = mk_alu(S, Dst, Src1, RtlAluOp, Src2),
+      {I1 ++ I2, Map2, Data}
+  end.
 
-mk_branch(Src1, Cond, Src2, TrueLab, FalseLab, Pred) ->
+mk_branch(Src1, CmpOp, Src2, Cond, TrueLab, FalseLab, Pred) ->
   case hipe_arm:is_temp(Src1) of
     true ->
       case hipe_arm:is_temp(Src2) of
 	true ->
-	  mk_branch_rr(Src1, Src2, Cond, TrueLab, FalseLab, Pred);
+	  mk_branch_rr(Src1, CmpOp, Src2, Cond, TrueLab, FalseLab, Pred);
 	_ ->
-	  mk_branch_ri(Src1, Cond, Src2, TrueLab, FalseLab, Pred)
+	  mk_branch_ri(Src1, CmpOp, Src2, Cond, TrueLab, FalseLab, Pred)
       end;
     _ ->
       case hipe_arm:is_temp(Src2) of
 	true ->
-	  NewCond = commute_cond(Cond),
-	  mk_branch_ri(Src2, NewCond, Src1, TrueLab, FalseLab, Pred);
+	  NewCond =
+	    case cmpop_commutes(CmpOp) of
+	      true -> Cond;
+	      false ->  commute_cond(Cond)
+	    end,
+	  mk_branch_ri(Src2, CmpOp, Src1, NewCond, TrueLab, FalseLab, Pred);
 	_ ->
-	  mk_branch_ii(Src1, Cond, Src2, TrueLab, FalseLab, Pred)
+	  mk_branch_ii(Src1, CmpOp, Src2, Cond, TrueLab, FalseLab, Pred)
       end
   end.
 
-mk_branch_ii(Imm1, Cond, Imm2, TrueLab, FalseLab, Pred) ->
+mk_branch_ii(Imm1, CmpOp, Imm2, Cond, TrueLab, FalseLab, Pred) ->
   Tmp = new_untagged_temp(),
   mk_li(Tmp, Imm1,
-	mk_branch_ri(Tmp, Cond, Imm2,
+	mk_branch_ri(Tmp, CmpOp, Imm2, Cond,
 		     TrueLab, FalseLab, Pred)).
 
-mk_branch_ri(Src, Cond, Imm, TrueLab, FalseLab, Pred) ->
-  {FixAm1,NewCmpOp,Am1} = fix_aluop_imm('cmp', Imm),
-  FixAm1 ++ mk_cmp_bc(NewCmpOp, Src, Am1, Cond, TrueLab, FalseLab, Pred).
-
-mk_branch_rr(Src1, Src2, Cond, TrueLab, FalseLab, Pred) ->
-  mk_cmp_bc('cmp', Src1, Src2, Cond, TrueLab, FalseLab, Pred).
+mk_branch_ri(Src, CmpOp, Imm, Cond, TrueLab, FalseLab, Pred) ->
+  {FixAm1,NewCmpOp,Am1} = fix_aluop_imm(CmpOp, Imm),
+  FixAm1 ++ mk_branch_rr(Src, NewCmpOp, Am1, Cond, TrueLab, FalseLab, Pred).
 
-mk_cmp_bc(CmpOp, Src, Am1, Cond, TrueLab, FalseLab, Pred) ->
+mk_branch_rr(Src, CmpOp, Am1, Cond, TrueLab, FalseLab, Pred) ->
   [hipe_arm:mk_cmp(CmpOp, Src, Am1) |
    mk_pseudo_bc(Cond, TrueLab, FalseLab, Pred)].
 
@@ -637,6 +653,7 @@ conv_alub_cond(RtlAluOp, Cond) ->	% may be unsigned, depends on aluop
   case {RtlAluOp, Cond} of	% handle allowed alub unsigned conditions
     {'add', 'ltu'} -> 'hs';	% add+ltu == unsigned overflow == carry set == hs
     %% add more cases when needed
+    {'sub', _} -> conv_branch_cond(Cond);
     _ -> conv_cond(Cond)
   end.
 
diff --git a/lib/hipe/llvm/hipe_rtl_to_llvm.erl b/lib/hipe/llvm/hipe_rtl_to_llvm.erl
index 20813f8bd7..f8911c1909 100644
--- a/lib/hipe/llvm/hipe_rtl_to_llvm.erl
+++ b/lib/hipe/llvm/hipe_rtl_to_llvm.erl
@@ -156,9 +156,6 @@ translate_instr(I, Relocs, Data) ->
     #alub{} ->
       {I2, Relocs2} = trans_alub(I, Relocs),
       {I2, Relocs2, Data};
-    #branch{} ->
-      {I2, Relocs2} = trans_branch(I, Relocs),
-      {I2, Relocs2, Data};
     #call{} ->
       {I2, Relocs2} =
         case hipe_rtl:call_fun(I) of
@@ -255,7 +252,6 @@ trans_alub(I, Relocs) ->
 trans_alub_overflow(I, Sign, Relocs) ->
   {Src1, I1} = trans_src(hipe_rtl:alub_src1(I)),
   {Src2, I2} = trans_src(hipe_rtl:alub_src2(I)),
-  RtlDst = hipe_rtl:alub_dst(I),
   TmpDst = mk_temp(),
   Name = trans_alub_op(I, Sign),
   NewRelocs = relocs_store(Name, {call, remote, {llvm, Name, 2}}, Relocs),
@@ -266,7 +262,10 @@ trans_alub_overflow(I, Sign, Relocs) ->
 			                   [{WordTy, Src1}, {WordTy, Src2}], []),
   %% T1{0}: result of the operation
   I4 = hipe_llvm:mk_extractvalue(TmpDst, ReturnType, T1 , "0", []),
-  I5 = store_stack_dst(TmpDst, RtlDst),
+  I5 = case hipe_rtl:alub_has_dst(I) of
+	 false -> [];
+	 true -> store_stack_dst(TmpDst, hipe_rtl:alub_dst(I))
+       end,
   T2 = mk_temp(),
   %% T1{1}: Boolean variable indicating overflow
   I6 = hipe_llvm:mk_extractvalue(T2, ReturnType, T1, "1", []),
@@ -311,42 +310,35 @@ trans_alub_op(I, Sign) ->
   Name ++ Type.
 
 trans_alub_no_overflow(I, Relocs) ->
+  {Src1, I1} = trans_src(hipe_rtl:alub_src1(I)),
+  {Src2, I2} = trans_src(hipe_rtl:alub_src2(I)),
+  WordTy = hipe_llvm:mk_int(?BITS_IN_WORD),
   %% alu
-  T = hipe_rtl:mk_alu(hipe_rtl:alub_dst(I), hipe_rtl:alub_src1(I),
-                      hipe_rtl:alub_op(I), hipe_rtl:alub_src2(I)),
-  %% A trans_alu instruction cannot change relocations
-  {I1, _} = trans_alu(T, Relocs),
+  {CmpLhs, CmpRhs, I5, Cond} =
+    case {hipe_rtl:alub_has_dst(I), hipe_rtl:alub_op(I)} of
+      {false, 'sub'} ->
+	Cond0 = trans_branch_rel_op(hipe_rtl:alub_cond(I)),
+	{Src1, Src2, [], Cond0};
+      {HasDst, AlubOp} ->
+	TmpDst = mk_temp(),
+	Op = trans_op(AlubOp),
+	I3 = hipe_llvm:mk_operation(TmpDst, Op, WordTy, Src1, Src2, []),
+	I4 = case HasDst of
+	       false -> [];
+	       true -> store_stack_dst(TmpDst, hipe_rtl:alub_dst(I))
+	     end,
+	Cond0 = trans_alub_rel_op(hipe_rtl:alub_cond(I)),
+	{TmpDst, "0", [I4, I3], Cond0}
+    end,
   %% icmp
-  %% Translate destination as src, to match with the semantics of instruction
-  {Dst, I2} = trans_src(hipe_rtl:alub_dst(I)),
-  Cond = trans_rel_op(hipe_rtl:alub_cond(I)),
   T3 = mk_temp(),
-  WordTy = hipe_llvm:mk_int(?BITS_IN_WORD),
-  I5 = hipe_llvm:mk_icmp(T3, Cond, WordTy, Dst, "0"),
+  I6 = hipe_llvm:mk_icmp(T3, Cond, WordTy, CmpLhs, CmpRhs),
   %% br
   Metadata = branch_metadata(hipe_rtl:alub_pred(I)),
   True_label = mk_jump_label(hipe_rtl:alub_true_label(I)),
   False_label = mk_jump_label(hipe_rtl:alub_false_label(I)),
-  I6 = hipe_llvm:mk_br_cond(T3, True_label, False_label, Metadata),
-  {[I6, I5, I2, I1], Relocs}.
-
-%%
-%% branch
-%%
-trans_branch(I, Relocs) ->
-  {Src1, I1} = trans_src(hipe_rtl:branch_src1(I)),
-  {Src2, I2} = trans_src(hipe_rtl:branch_src2(I)),
-  Cond = trans_rel_op(hipe_rtl:branch_cond(I)),
-  %% icmp
-  T1 = mk_temp(),
-  WordTy = hipe_llvm:mk_int(?BITS_IN_WORD),
-  I3 = hipe_llvm:mk_icmp(T1, Cond, WordTy, Src1, Src2),
-  %% br
-  True_label = mk_jump_label(hipe_rtl:branch_true_label(I)),
-  False_label = mk_jump_label(hipe_rtl:branch_false_label(I)),
-  Metadata = branch_metadata(hipe_rtl:branch_pred(I)),
-  I4 = hipe_llvm:mk_br_cond(T1, True_label, False_label, Metadata),
-  {[I4, I3, I2, I1], Relocs}.
+  I7 = hipe_llvm:mk_br_cond(T3, True_label, False_label, Metadata),
+  {[I7, I6, I5, I2, I1], Relocs}.
 
 branch_metadata(X) when X =:= 0.5 -> [];
 branch_metadata(X) when X > 0.5 -> ?BRANCH_META_TAKEN;
@@ -1162,7 +1154,7 @@ trans_dst(A) ->
 		       true ->
 			 "%DL" ++ integer_to_list(hipe_rtl:const_label_label(A)) ++ "_var";
 		       false ->
-			 exit({?MODULE, trans_dst, {"Bad RTL argument",A}})
+			 error(badarg, [A])
 		     end
 		 end
 	     end,
@@ -1260,14 +1252,19 @@ trans_op(Op) ->
     Other -> exit({?MODULE, trans_op, {"Unknown RTL operator", Other}})
   end.
 
-trans_rel_op(Op) ->
+trans_branch_rel_op(Op) ->
   case Op of
-    eq -> eq;
-    ne -> ne;
     gtu -> ugt;
     geu -> uge;
     ltu -> ult;
     leu -> ule;
+    _ -> trans_alub_rel_op(Op)
+  end.
+
+trans_alub_rel_op(Op) ->
+  case Op of
+    eq -> eq;
+    ne -> ne;
     gt -> sgt;
     ge -> sge;
     lt -> slt;
@@ -1300,7 +1297,10 @@ insn_dst(I) ->
     #alu{} ->
       [hipe_rtl:alu_dst(I)];
     #alub{} ->
-      [hipe_rtl:alub_dst(I)];
+      case hipe_rtl:alub_has_dst(I) of
+	true -> [hipe_rtl:alub_dst(I)];
+	false -> []
+      end;
     #call{} ->
       case hipe_rtl:call_dstlist(I) of
         [] -> [];
diff --git a/lib/hipe/ppc/hipe_rtl_to_ppc.erl b/lib/hipe/ppc/hipe_rtl_to_ppc.erl
index a01e67a789..09f1ce5a49 100644
--- a/lib/hipe/ppc/hipe_rtl_to_ppc.erl
+++ b/lib/hipe/ppc/hipe_rtl_to_ppc.erl
@@ -80,7 +80,6 @@ conv_insn(I, Map, Data) ->
   case I of
     #alu{} -> conv_alu(I, Map, Data);
     #alub{} -> conv_alub(I, Map, Data);
-    #branch{} -> conv_branch(I, Map, Data);
     #call{} -> conv_call(I, Map, Data);
     #comment{} -> conv_comment(I, Map, Data);
     #enter{} -> conv_enter(I, Map, Data);
@@ -441,36 +440,53 @@ mk_alu_rr(Dst, Src1, RtlAluOp, Src2) ->
 
 conv_alub(I, Map, Data) ->
   %% dst = src1 aluop src2; if COND goto label
-  {Dst, Map0} = conv_dst(hipe_rtl:alub_dst(I), Map),
-  {Src1, Map1} = conv_src(hipe_rtl:alub_src1(I), Map0),
-  {Src2, Map2} = conv_src(hipe_rtl:alub_src2(I), Map1),
-  {AluOp, BCond} =
-    case {hipe_rtl:alub_op(I), hipe_rtl:alub_cond(I)} of
-      {'add', 'ltu'} ->
-	{'addc', 'eq'};
-      {RtlAlubOp, RtlAlubCond} ->
-	{conv_alub_op(RtlAlubOp), conv_alub_cond(RtlAlubCond)}
-    end,
-  BC = mk_pseudo_bc(BCond,
-		    hipe_rtl:alub_true_label(I),
-		    hipe_rtl:alub_false_label(I),
-		    hipe_rtl:alub_pred(I)),
-  I2 =
-    case {AluOp, BCond} of
-      {'addc', 'eq'} ->	% copy XER[CA] to CR0[EQ] before the BC
-	TmpR = new_untagged_temp(),
-	[hipe_ppc:mk_mfspr(TmpR, 'xer'),
-	 hipe_ppc:mk_mtcr(TmpR) |
-	 BC];
-      _ -> BC
-    end,
-  {NewSrc1, NewSrc2} =
-    case AluOp of
-      'subf' -> {Src2, Src1};
-      _ -> {Src1, Src2}
-    end,
-  I1 = mk_alub(Dst, NewSrc1, AluOp, NewSrc2, BCond),
-  {I1 ++ I2, Map2, Data}.
+  HasDst = hipe_rtl:alub_has_dst(I),
+  {Src1, Map0} = conv_src(hipe_rtl:alub_src1(I), Map),
+  {Src2, Map1} = conv_src(hipe_rtl:alub_src2(I), Map0),
+  RtlAlubOp = hipe_rtl:alub_op(I),
+  RtlAlubCond = hipe_rtl:alub_cond(I),
+  case {HasDst, RtlAlubOp} of
+    {false, sub} ->
+      {BCond,Sign} = conv_branch_cond(RtlAlubCond),
+      I2 = mk_branch(Src1, BCond, Sign, Src2,
+		     hipe_rtl:alub_true_label(I),
+		     hipe_rtl:alub_false_label(I),
+		     hipe_rtl:alub_pred(I)),
+      {I2, Map1, Data};
+    _ ->
+      {Dst, Map2} =
+	case HasDst of
+	  false -> {new_untagged_temp(), Map1};
+	  true -> conv_dst(hipe_rtl:alub_dst(I), Map1)
+	end,
+      {AluOp, BCond} =
+	case {RtlAlubOp, RtlAlubCond} of
+	  {'add', 'ltu'} ->
+	    {'addc', 'eq'};
+	  {_, _} ->
+	    {conv_alub_op(RtlAlubOp), conv_alub_cond(RtlAlubCond)}
+	end,
+      BC = mk_pseudo_bc(BCond,
+			hipe_rtl:alub_true_label(I),
+			hipe_rtl:alub_false_label(I),
+			hipe_rtl:alub_pred(I)),
+      I2 =
+	case {AluOp, BCond} of
+	  {'addc', 'eq'} ->	% copy XER[CA] to CR0[EQ] before the BC
+	    TmpR = new_untagged_temp(),
+	    [hipe_ppc:mk_mfspr(TmpR, 'xer'),
+	     hipe_ppc:mk_mtcr(TmpR) |
+	     BC];
+	  _ -> BC
+	end,
+      {NewSrc1, NewSrc2} =
+	case AluOp of
+	  'subf' -> {Src2, Src1};
+	  _ -> {Src1, Src2}
+	end,
+      I1 = mk_alub(Dst, NewSrc1, AluOp, NewSrc2, BCond),
+      {I1 ++ I2, Map2, Data}
+  end.
 
 conv_alub_op(RtlAluOp) ->
   case {get(hipe_target_arch), RtlAluOp} of
@@ -689,17 +705,6 @@ mk_alub_rr_Rc(Dst, Src1, AluOp, Src2) ->
     end,
   [hipe_ppc:mk_alu(AluOpDot, Dst, Src1, Src2)].
 
-conv_branch(I, Map, Data) ->
-  %% <unused> = src1 - src2; if COND goto label
-  {Src1, Map0} = conv_src(hipe_rtl:branch_src1(I), Map),
-  {Src2, Map1} = conv_src(hipe_rtl:branch_src2(I), Map0),
-  {BCond,Sign} = conv_branch_cond(hipe_rtl:branch_cond(I)),
-  I2 = mk_branch(Src1, BCond, Sign, Src2,
-		 hipe_rtl:branch_true_label(I),
-		 hipe_rtl:branch_false_label(I),
-		 hipe_rtl:branch_pred(I)),
-  {I2, Map1, Data}.
-
 conv_branch_cond(Cond) -> % may be unsigned
   case Cond of
     gtu -> {'gt', 'unsigned'};
diff --git a/lib/hipe/rtl/hipe_rtl.erl b/lib/hipe/rtl/hipe_rtl.erl
index 0726827299..e4e857ec43 100644
--- a/lib/hipe/rtl/hipe_rtl.erl
+++ b/lib/hipe/rtl/hipe_rtl.erl
@@ -187,18 +187,14 @@
 
 	 mk_branch/5,
 	 mk_branch/6,
-	 branch_src1/1,
-	 branch_src2/1,
-	 branch_cond/1,
-	 branch_true_label/1,
-	 branch_false_label/1,
-	 branch_pred/1,
+	 mk_branch/7,
 	 %% is_branch/1,
 	 %% branch_true_label_update/2,
 	 %% branch_false_label_update/2,
 
 	 mk_alub/7,
 	 mk_alub/8,
+	 alub_has_dst/1,
 	 alub_dst/1,
 	 alub_src1/1,
 	 alub_op/1,
@@ -587,26 +583,6 @@ label_name(#label{name=Name}) -> Name.
 is_label(#label{}) -> true;
 is_label(_) -> false.
 
-%%
-%% branch
-%%
-
-mk_branch(Src1, Op, Src2, True, False) ->
-  mk_branch(Src1, Op, Src2, True, False, 0.5).
-mk_branch(Src1, Op, Src2, True, False, P) ->
-  #branch{src1=Src1, 'cond'=Op, src2=Src2, true_label=True,
-	  false_label=False, p=P}.
-branch_src1(#branch{src1=Src1}) -> Src1.
-branch_src1_update(Br, NewSrc) -> Br#branch{src1=NewSrc}.
-branch_src2(#branch{src2=Src2}) -> Src2.
-branch_src2_update(Br, NewSrc) -> Br#branch{src2=NewSrc}.
-branch_cond(#branch{'cond'=Cond}) -> Cond.
-branch_true_label(#branch{true_label=TrueLbl}) -> TrueLbl.
-branch_true_label_update(Br, NewTrue) -> Br#branch{true_label=NewTrue}.
-branch_false_label(#branch{false_label=FalseLbl}) -> FalseLbl.
-branch_false_label_update(Br, NewFalse) -> Br#branch{false_label=NewFalse}.
-branch_pred(#branch{p=P}) -> P.
-
 %%
 %% alub
 %%
@@ -614,11 +590,19 @@ branch_pred(#branch{p=P}) -> P.
 -type alub_cond() :: 'eq' | 'ne' | 'ge' | 'geu' | 'gt' | 'gtu' | 'le'
                    | 'leu' | 'lt' | 'ltu' | 'overflow' | 'not_overflow'.
 
+mk_branch(Src1, Cond, Src2, True, False) ->
+  mk_branch(Src1, Cond, Src2, True, False, 0.5).
+mk_branch(Src1, Cond, Src2, True, False, P) ->
+  mk_branch(Src1, 'sub', Src2, Cond, True, False, P).
+mk_branch(Src1, Op, Src2, Cond, True, False, P) ->
+  mk_alub([], Src1, Op, Src2, Cond, True, False, P).
+
 mk_alub(Dst, Src1, Op, Src2, Cond, True, False) ->
   mk_alub(Dst, Src1, Op, Src2, Cond, True, False, 0.5).
 mk_alub(Dst, Src1, Op, Src2, Cond, True, False, P) ->
   #alub{dst=Dst, src1=Src1, op=Op, src2=Src2, 'cond'=Cond,
 	true_label=True, false_label=False, p=P}.
+alub_has_dst(#alub{dst=Dst}) -> Dst =/= [].
 alub_dst(#alub{dst=Dst}) -> Dst.
 alub_dst_update(A, NewDst) -> A#alub{dst=NewDst}.
 alub_src1(#alub{src1=Src1}) -> Src1.
@@ -943,8 +927,7 @@ args(I) ->
   case I of
     #alu{} -> [alu_src1(I), alu_src2(I)];
     #alub{} -> [alub_src1(I), alub_src2(I)];
-    #branch{} -> [branch_src1(I), branch_src2(I)];
-    #call{} -> 
+    #call{} ->
       Args = call_arglist(I) ++ hipe_rtl_arch:call_used(),
       case call_is_known(I) of
 	false -> [call_fun(I) | Args];
@@ -987,8 +970,8 @@ args(I) ->
 defines(Instr) ->
   Defs = case Instr of
 	   #alu{} -> [alu_dst(Instr)];
+	   #alub{dst=[]} -> [];
 	   #alub{} -> [alub_dst(Instr)];
-	   #branch{} -> [];
 	   #call{} -> call_dstlist(Instr) ++ hipe_rtl_arch:call_defined();
 	   #comment{} -> [];
 	   #enter{} -> [];
@@ -1042,9 +1025,6 @@ subst_uses(Subst, I) ->
     #alub{} ->
       I0 = alub_src1_update(I, subst1(Subst, alub_src1(I))),
       alub_src2_update(I0, subst1(Subst, alub_src2(I)));
-    #branch{} ->
-      I0 = branch_src1_update(I, subst1(Subst, branch_src1(I))),
-      branch_src2_update(I0, subst1(Subst, branch_src2(I)));
     #call{} ->
       case call_is_known(I) of
 	false ->
@@ -1126,11 +1106,6 @@ subst_uses_llvm(Subst, I) ->
       {NewSrc1, _ } = subst1_llvm(Subst1, alub_src1(I)),
       I0 =  alub_src1_update(I, NewSrc1),
       alub_src2_update(I0, NewSrc2);
-    #branch{} ->
-      {NewSrc2, Subst1} = subst1_llvm(Subst, branch_src2(I)),
-      {NewSrc1, _ } = subst1_llvm(Subst1, branch_src1(I)),
-      I0 = branch_src1_update(I, NewSrc1),
-      branch_src2_update(I0, NewSrc2);
     #call{} ->
       case call_is_known(I) of
         false ->
@@ -1243,10 +1218,10 @@ subst_defines(Subst, I)->
   case I of
     #alu{} ->
       alu_dst_update(I, subst1(Subst, alu_dst(I)));
+    #alub{dst=[]} ->
+      I;
     #alub{} ->
       alub_dst_update(I, subst1(Subst, alub_dst(I)));
-    #branch{} ->
-      I;
     #call{} ->
       call_dstlist_update(I, subst_list(Subst, call_dstlist(I)));
     #comment{} ->
@@ -1313,7 +1288,6 @@ is_safe(Instr) ->
   case Instr of
     #alu{} -> true;
     #alub{} -> false;
-    #branch{} -> false;
     #call{} -> false;
     #comment{} -> false;
     #enter{} -> false;
@@ -1386,17 +1360,6 @@ redirect_jmp(Jmp, ToOld, ToNew) ->
   %% OBS: In a jmp instruction more than one labels may be identical
   %%      and thus need redirection!
   case Jmp of
-    #branch{} ->
-      TmpJmp = case branch_true_label(Jmp) of
-		 ToOld -> branch_true_label_update(Jmp, ToNew);
-		 _ -> Jmp
-	       end,
-      case branch_false_label(TmpJmp) of
-	ToOld ->
-	  branch_false_label_update(TmpJmp, ToNew);
-	_ ->
-	  TmpJmp
-      end;
     #switch{} ->
       NewLbls = [case Lbl =:= ToOld of
 		   true -> ToNew;
@@ -1591,13 +1554,6 @@ pp_instr(Dev, I) ->
       io:format(Dev, "~n", []);
     #label{} ->
       io:format(Dev, "L~w:~n", [label_name(I)]);
-    #branch{} ->
-      io:format(Dev, "    if (", []),
-      pp_arg(Dev, branch_src1(I)),
-      io:format(Dev, " ~w ", [branch_cond(I)]),
-      pp_arg(Dev, branch_src2(I)),
-      io:format(Dev, ") then L~w (~.2f) else L~w~n", 
-		[branch_true_label(I), branch_pred(I), branch_false_label(I)]);
     #switch{} ->
       io:format(Dev, "    switch (", []),
       pp_arg(Dev, switch_src(I)),
@@ -1606,7 +1562,10 @@ pp_instr(Dev, I) ->
       io:format(Dev, ">\n", []);
     #alub{} ->
       io:format(Dev, "    ", []),
-      pp_arg(Dev, alub_dst(I)),
+      case alub_has_dst(I) of
+	true -> pp_arg(Dev, alub_dst(I));
+	false -> io:format(Dev, "_", [])
+      end,
       io:format(Dev, " <- ", []),
       pp_arg(Dev, alub_src1(I)),
       io:format(Dev, " ~w ", [alub_op(I)]),
diff --git a/lib/hipe/rtl/hipe_rtl.hrl b/lib/hipe/rtl/hipe_rtl.hrl
index cc76e7e5c4..74020c6045 100644
--- a/lib/hipe/rtl/hipe_rtl.hrl
+++ b/lib/hipe/rtl/hipe_rtl.hrl
@@ -28,7 +28,6 @@
 
 -record(alu, {dst, src1, op, src2}).
 -record(alub, {dst, src1, op, src2, 'cond', true_label, false_label, p}).
--record(branch, {src1, src2, 'cond', true_label, false_label, p}).
 -record(call, {dstlist, 'fun', arglist, type, continuation,
     failcontinuation, normalcontinuation = []}).
 -record(comment, {text}).
diff --git a/lib/hipe/rtl/hipe_rtl_binary_construct.erl b/lib/hipe/rtl/hipe_rtl_binary_construct.erl
index 367d76b24d..2922972085 100644
--- a/lib/hipe/rtl/hipe_rtl_binary_construct.erl
+++ b/lib/hipe/rtl/hipe_rtl_binary_construct.erl
@@ -757,9 +757,9 @@ test_alignment(SrcOffset, NumBits, Offset, AlignedCode, CCode) ->
   [AlignedLbl, CLbl] = create_lbls(2),
    [hipe_rtl:mk_alu(Tmp, SrcOffset, 'or', NumBits),
    hipe_rtl:mk_alu(Tmp, Tmp, 'or', Offset),
-   hipe_rtl:mk_alub(Tmp, Tmp, 'and', ?LOW_BITS, 'eq',
-		    hipe_rtl:label_name(AlignedLbl),
-		    hipe_rtl:label_name(CLbl)),
+   hipe_rtl:mk_branch(Tmp, 'and', ?LOW_BITS, 'eq',
+		      hipe_rtl:label_name(AlignedLbl),
+		      hipe_rtl:label_name(CLbl), 0.5),
    AlignedLbl,
    AlignedCode,
    CLbl,
@@ -1284,8 +1284,7 @@ is_divisible(Dividend, Divisor, SuccLbl, FailLbl) ->
     true -> %% Divisor is a power of 2
       %% Test that the Log2-1 lowest bits are clear
       Mask = hipe_rtl:mk_imm(Divisor - 1),
-      [Tmp] = create_regs(1),
-      [hipe_rtl:mk_alub(Tmp, Dividend, 'and', Mask, eq, SuccLbl, FailLbl, 0.99)];
+      [hipe_rtl:mk_branch(Dividend, 'and', Mask, eq, SuccLbl, FailLbl, 0.99)];
     false ->
       %% We need division, fall back to a primop
       [hipe_rtl:mk_call([], is_divisible, [Dividend, hipe_rtl:mk_imm(Divisor)],
diff --git a/lib/hipe/rtl/hipe_rtl_binary_match.erl b/lib/hipe/rtl/hipe_rtl_binary_match.erl
index d999cd2743..520b055ba7 100644
--- a/lib/hipe/rtl/hipe_rtl_binary_match.erl
+++ b/lib/hipe/rtl/hipe_rtl_binary_match.erl
@@ -659,9 +659,8 @@ test_alignment_code(Size, Unit, SLblName, FalseLblName) ->
   end.
 
 get_fast_test_code(Size, AndTest, SLblName, FalseLblName) ->
-  [Tmp] = create_gcsafe_regs(1),
-  [hipe_rtl:mk_alub(Tmp, Size, 'and', hipe_rtl:mk_imm(AndTest),
-		    'eq', SLblName, FalseLblName)].
+  [hipe_rtl:mk_branch(Size, 'and', hipe_rtl:mk_imm(AndTest), 'eq',
+		      SLblName, FalseLblName, 0.5)].
 
 %% This is really slow
 get_slow_test_code(Size, Unit, SLblName, FalseLblName) ->
diff --git a/lib/hipe/rtl/hipe_rtl_cfg.erl b/lib/hipe/rtl/hipe_rtl_cfg.erl
index f49e8f815f..e802b320c2 100644
--- a/lib/hipe/rtl/hipe_rtl_cfg.erl
+++ b/lib/hipe/rtl/hipe_rtl_cfg.erl
@@ -83,9 +83,7 @@ mk_goto(Name) ->
 
 branch_successors(Instr) ->
   case Instr of
-    #branch{} -> [hipe_rtl:branch_true_label(Instr), 
-		  hipe_rtl:branch_false_label(Instr)];
-    #alub{} -> [hipe_rtl:alub_true_label(Instr), 
+    #alub{} -> [hipe_rtl:alub_true_label(Instr),
 	        hipe_rtl:alub_false_label(Instr)];
     #switch{} -> hipe_rtl:switch_labels(Instr);
     #call{} -> 
@@ -106,7 +104,6 @@ fails_to(Instr) ->
 
 is_branch(Instr) ->
    case Instr of
-     #branch{} -> true;
      #alub{} -> true;
      #switch{} -> true;
      #goto{} -> true;
@@ -127,7 +124,7 @@ is_branch(Instr) ->
 
 is_pure_branch(Instr) ->
   case Instr of
-    #branch{} -> true;
+    #alub{} -> not hipe_rtl:alub_has_dst(Instr);
     #switch{} -> true;
     #goto{} -> true;
     _ -> false
diff --git a/lib/hipe/rtl/hipe_rtl_lcm.erl b/lib/hipe/rtl/hipe_rtl_lcm.erl
index 71bd06c0df..67ddd0f649 100644
--- a/lib/hipe/rtl/hipe_rtl_lcm.erl
+++ b/lib/hipe/rtl/hipe_rtl_lcm.erl
@@ -378,7 +378,6 @@ is_expr(I) ->
 %% 	  end;
 	       
         #alub{} -> false; %% TODO: Split instruction to consider alu expression?
-        #branch{} -> false;
         #call{} -> false; %% We cannot prove that a call has no side-effects
         #comment{} -> false;
         #enter{} -> false;
diff --git a/lib/hipe/rtl/hipe_rtl_ssa_const_prop.erl b/lib/hipe/rtl/hipe_rtl_ssa_const_prop.erl
index 7158383010..f887eeab66 100644
--- a/lib/hipe/rtl/hipe_rtl_ssa_const_prop.erl
+++ b/lib/hipe/rtl/hipe_rtl_ssa_const_prop.erl
@@ -110,8 +110,6 @@ visit_expression(Instruction, Environment) ->
       visit_alu(Instruction, Environment);
     #alub{} ->
       visit_alub(Instruction, Environment);
-    #branch{} ->
-      visit_branch(Instruction, Environment);
     #call{} ->
       visit_call(Instruction, Environment);
 %%    #comment{} ->
@@ -183,42 +181,6 @@ set_to(Dst, Val, Env) ->
   {Env1, SSAWork} = update_lattice_value({Dst, Val}, Env),
   {[], SSAWork, Env1}.
 
-%%-----------------------------------------------------------------------------
-%% Procedure : visit_branch/2
-%% Purpose   : do symbolic exection of branch instructions.
-%% Arguments : Inst - The instruction
-%%             Env  - The environment
-%% Returns   : { FlowWorkList, SSAWorkList, NewEnvironment}
-%%-----------------------------------------------------------------------------
-
-visit_branch(Inst, Env) -> %% Titta också på exekverbarflagga
-  Val1 = lookup_lattice_value(hipe_rtl:branch_src1(Inst), Env),
-  Val2 = lookup_lattice_value(hipe_rtl:branch_src2(Inst), Env),
-  CFGWL = case evaluate_relop(Val1, hipe_rtl:branch_cond(Inst), Val2) of
-            true   -> [hipe_rtl:branch_true_label(Inst)];
-            false  -> [hipe_rtl:branch_false_label(Inst)];
-            bottom -> [hipe_rtl:branch_true_label(Inst), 
-	               hipe_rtl:branch_false_label(Inst)];
-            top    -> []
-          end,
-  {CFGWL, [], Env}.
-
-%%-----------------------------------------------------------------------------
-%% Procedure : evaluate_relop/3
-%% Purpose   : evaluate the given relop. While taking care to handle top & 
-%%             bottom in some sane way.
-%% Arguments : Val1, Val2 - The operands Integers or top or bottom
-%%             RelOp  - some relop atom from rtl. 
-%% Returns   : bottom, top, true or false
-%%-----------------------------------------------------------------------------
-
-evaluate_relop(Val1, RelOp, Val2) ->
-  if 
-    (Val1==bottom) or (Val2==bottom) -> bottom ;
-    (Val1==top) or (Val2==top)       ->  top;
-    true ->  hipe_rtl_arch:eval_cond(RelOp, Val1, Val2)
-  end.
-
 %%-----------------------------------------------------------------------------
 %% Procedure : evaluate_fixnumop/2 
 %% Purpose   : try to evaluate a fixnumop.
@@ -408,6 +370,7 @@ partial_eval_branch(Cond, N0, Z0, V0, C0) ->
        Cond =:= 'ne'           -> {true, Z0,   true, true};
        Cond =:= 'gt';
        Cond =:= 'le'           -> {N0,   Z0,   V0,   true};
+       Cond =:= 'leu';
        Cond =:= 'gtu'          -> {true, Z0,   true, C0  };
        Cond =:= 'lt';
        Cond =:= 'ge'           -> {N0,   true, V0,   true};
@@ -450,7 +413,11 @@ visit_alub(Inst, Env) ->
           false  -> [hipe_rtl:alub_false_label(Inst)]
         end
      end,
-  {[], NewSSA, NewEnv} = set_to(hipe_rtl:alub_dst(Inst), NewVal,  Env),
+  {[], NewSSA, NewEnv} =
+    case hipe_rtl:alub_has_dst(Inst) of
+      false -> {[], [], Env};
+      true -> set_to(hipe_rtl:alub_dst(Inst), NewVal, Env)
+    end,
   {Labels, NewSSA, NewEnv}.
       
 %%-----------------------------------------------------------------------------
@@ -688,8 +655,6 @@ update_instruction(Inst, Env) ->
       update_alu(Inst, Env);
     #alub{} ->
       update_alub(Inst, Env);
-    #branch{} ->
-      update_branch(Inst, Env);
     #call{} ->
       subst_all_uses(Inst, Env);
 %%    #comment{} ->
@@ -902,33 +867,6 @@ update_alu(Inst, Env) ->
       {Val,_,_,_,_} = evaluate_alu(Val1, hipe_rtl:alu_op(Inst), Val2),
       [hipe_rtl:mk_move(hipe_rtl:alu_dst(Inst), hipe_rtl:mk_imm(Val))]
   end.
- 
-%%-----------------------------------------------------------------------------
-%% Procedure : update_branch/2
-%% Purpose   : update an branch-instruction
-%% Arguments : Inst - the instruction.
-%%             Env - in which everything happens.
-%% Returns   : list of new instruction
-%%-----------------------------------------------------------------------------
-
-update_branch(Inst, Env) ->
-  Src1 = hipe_rtl:branch_src1(Inst),
-  Src2 = hipe_rtl:branch_src2(Inst),
-  Val1 = lookup_lattice_value(Src1, Env),
-  Val2 = lookup_lattice_value(Src2, Env),
-  if
-    (Val1 =:= bottom) and (Val2 =:= bottom) ->
-      [Inst];
-    Val1 =:= bottom ->
-      [hipe_rtl:subst_uses([{Src2, hipe_rtl:mk_imm(Val2)}], Inst)];
-    Val2 =:= bottom -> 
-      [hipe_rtl:subst_uses([{Src1, hipe_rtl:mk_imm(Val1)}], Inst)];
-    true ->
-      case hipe_rtl_arch:eval_cond(hipe_rtl:branch_cond(Inst), Val1, Val2) of
-        true  -> [hipe_rtl:mk_goto(hipe_rtl:branch_true_label(Inst))];
-        false -> [hipe_rtl:mk_goto(hipe_rtl:branch_false_label(Inst))]
-      end
-  end.
 
 %%-----------------------------------------------------------------------------
 %% Procedure : update_alub/2
@@ -943,8 +881,12 @@ update_branch(Inst, Env) ->
 
 %% some small helpers.
 alub_to_move(Inst, Res, Lab) ->
-  [hipe_rtl:mk_move(hipe_rtl:alub_dst(Inst), Res),
-   hipe_rtl:mk_goto(Lab)].
+  Goto = [hipe_rtl:mk_goto(Lab)],
+  case hipe_rtl:alub_has_dst(Inst) of
+    false -> Goto;
+    true ->
+      [hipe_rtl:mk_move(hipe_rtl:alub_dst(Inst), Res) | Goto]
+  end.
 
 make_alub_subst_list(bottom, _, Tail) ->  Tail;
 make_alub_subst_list(top, Src, _) ->
diff --git a/lib/hipe/rtl/hipe_tagscheme.erl b/lib/hipe/rtl/hipe_tagscheme.erl
index 8cf45772b5..8d9514ae82 100644
--- a/lib/hipe/rtl/hipe_tagscheme.erl
+++ b/lib/hipe/rtl/hipe_tagscheme.erl
@@ -171,14 +171,12 @@ test_nil(X, TrueLab, FalseLab, Pred) ->
   hipe_rtl:mk_branch(X, eq, hipe_rtl:mk_imm(?NIL), TrueLab, FalseLab, Pred).
 
 test_cons(X, TrueLab, FalseLab, Pred) ->
-  Tmp = hipe_rtl:mk_new_reg_gcsafe(),
   Mask = hipe_rtl:mk_imm(?TAG_PRIMARY_MASK - ?TAG_PRIMARY_LIST),
-  hipe_rtl:mk_alub(Tmp, X, 'and', Mask, 'eq', TrueLab, FalseLab, Pred).
+  hipe_rtl:mk_branch(X, 'and', Mask, 'eq', TrueLab, FalseLab, Pred).
 
 test_is_boxed(X, TrueLab, FalseLab, Pred) ->
-  Tmp = hipe_rtl:mk_new_reg_gcsafe(),
   Mask = hipe_rtl:mk_imm(?TAG_PRIMARY_MASK - ?TAG_PRIMARY_BOXED),
-  hipe_rtl:mk_alub(Tmp, X, 'and', Mask, 'eq', TrueLab, FalseLab, Pred).
+  hipe_rtl:mk_branch(X, 'and', Mask, 'eq', TrueLab, FalseLab, Pred).
 
 get_header(Res, X) ->
   hipe_rtl:mk_load(Res, X, hipe_rtl:mk_imm(-(?TAG_PRIMARY_BOXED))).
@@ -238,13 +236,12 @@ test_atom(X, TrueLab, FalseLab, Pred) ->
 
 test_tuple(X, TrueLab, FalseLab, Pred) ->
   Tmp = hipe_rtl:mk_new_reg_gcsafe(),
-  Tmp2 = hipe_rtl:mk_new_reg_gcsafe(),
   HalfTrueLab = hipe_rtl:mk_new_label(),
   [test_is_boxed(X, hipe_rtl:label_name(HalfTrueLab), FalseLab, Pred),
    HalfTrueLab,
    get_header(Tmp, X),
-   hipe_rtl:mk_alub(Tmp2, Tmp, 'and', hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq',
-		    TrueLab, FalseLab, Pred)].
+   hipe_rtl:mk_branch(Tmp, 'and', hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq',
+		      TrueLab, FalseLab, Pred)].
 
 test_tuple_N(X, N, TrueLab, FalseLab, Pred) ->
   Tmp = hipe_rtl:mk_new_reg_gcsafe(),
@@ -687,7 +684,6 @@ element(Dst, Index, Tuple, FailLabName, unknown, IndexInfo) ->
   IndexOkLab = hipe_rtl:mk_new_label(),
   Ptr = hipe_rtl:mk_new_reg(), % offset from Tuple
   Header = hipe_rtl:mk_new_reg_gcsafe(),
-  Tmp = hipe_rtl:mk_new_reg_gcsafe(),
   UIndex = hipe_rtl:mk_new_reg_gcsafe(),
   Arity = hipe_rtl:mk_new_reg_gcsafe(),
   InvIndex = hipe_rtl:mk_new_reg_gcsafe(),
@@ -700,9 +696,9 @@ element(Dst, Index, Tuple, FailLabName, unknown, IndexInfo) ->
        BoxedOkLab,
        hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)),
        hipe_rtl:mk_load(Header, Ptr, hipe_rtl:mk_imm(0)),
-       hipe_rtl:mk_alub(Tmp, Header, 'and', 
-			hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq',
-			hipe_rtl:label_name(TupleOkLab), FailLabName, 0.99),
+       hipe_rtl:mk_branch(Header, 'and',
+			  hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq',
+			  hipe_rtl:label_name(TupleOkLab), FailLabName, 0.99),
        TupleOkLab,
        untag_fixnum(UIndex, Index),
        hipe_rtl:mk_alu(Arity, Header, 'srl',
@@ -716,9 +712,9 @@ element(Dst, Index, Tuple, FailLabName, unknown, IndexInfo) ->
        BoxedOkLab,
        hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)),
        hipe_rtl:mk_load(Header, Ptr, hipe_rtl:mk_imm(0)),
-       hipe_rtl:mk_alub(Tmp, Header, 'and', 
-			hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq',
-			hipe_rtl:label_name(TupleOkLab), FailLabName, 0.99),
+       hipe_rtl:mk_branch(Header, 'and',
+			  hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq',
+			  hipe_rtl:label_name(TupleOkLab), FailLabName, 0.99),
        TupleOkLab,
        hipe_rtl:mk_alu(Arity, Header, 'srl', 
 		       hipe_rtl:mk_imm(?HEADER_ARITY_OFFS))|
@@ -734,9 +730,9 @@ element(Dst, Index, Tuple, FailLabName, unknown, IndexInfo) ->
        BoxedOkLab,
        hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)),
        hipe_rtl:mk_load(Header, Ptr, hipe_rtl:mk_imm(0)),
-       hipe_rtl:mk_alub(Tmp, Header, 'and', 
-			hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq',
-			hipe_rtl:label_name(TupleOkLab), FailLabName, 0.99),
+       hipe_rtl:mk_branch(Header, 'and',
+			  hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq',
+			  hipe_rtl:label_name(TupleOkLab), FailLabName, 0.99),
        TupleOkLab,
        untag_fixnum(UIndex, Index),
        hipe_rtl:mk_alu(Arity, Header, 'srl',
diff --git a/lib/hipe/sparc/hipe_rtl_to_sparc.erl b/lib/hipe/sparc/hipe_rtl_to_sparc.erl
index e170fec3d6..7fab0d95c7 100644
--- a/lib/hipe/sparc/hipe_rtl_to_sparc.erl
+++ b/lib/hipe/sparc/hipe_rtl_to_sparc.erl
@@ -63,7 +63,6 @@ conv_insn(I, Map, Data) ->
   case I of
     #alu{} -> conv_alu(I, Map, Data);
     #alub{} -> conv_alub(I, Map, Data);
-    #branch{} -> conv_branch(I, Map, Data);
     #call{} -> conv_call(I, Map, Data);
     #comment{} -> conv_comment(I, Map, Data);
     #enter{} -> conv_enter(I, Map, Data);
@@ -281,7 +280,12 @@ mk_alu_rs(XAluOp, Src1, Src2, Dst) ->
 
 conv_alub(I, Map, Data) ->
   %% dst = src1 aluop src2; if COND goto label
-  {Dst, Map0} = conv_dst(hipe_rtl:alub_dst(I), Map),
+  HasDst = hipe_rtl:alub_has_dst(I),
+  {Dst, Map0} =
+    case HasDst of
+      false -> {hipe_sparc:mk_g0(), Map};
+      true -> conv_dst(hipe_rtl:alub_dst(I), Map)
+    end,
   {Src1, Map1} = conv_src(hipe_rtl:alub_src1(I), Map0),
   {Src2, Map2} = conv_src(hipe_rtl:alub_src2(I), Map1),
   Cond = conv_cond(hipe_rtl:alub_cond(I)),
@@ -307,67 +311,33 @@ conv_alub(I, Map, Data) ->
 	I1 ++
 	[hipe_sparc:mk_rdy(TmpHi),
 	 hipe_sparc:mk_alu('sra', Dst, hipe_sparc:mk_uimm5(31), TmpSign) |
-	 conv_alub2(G0, TmpSign, 'sub', NewCond, TmpHi, I)];
+	 conv_alub2(G0, TmpSign, 'cmpcc', NewCond, TmpHi, I)];
       _ ->
-	conv_alub2(Dst, Src1, RtlAlubOp, Cond, Src2, I)
+	XAluOp =
+	  case (not HasDst) andalso RtlAlubOp =:= 'sub' of
+	    true -> 'cmpcc'; % == a subcc that commutes
+	    false -> conv_alubop_cc(RtlAlubOp)
+	  end,
+	conv_alub2(Dst, Src1, XAluOp, Cond, Src2, I)
     end,
   {I2, Map2, Data}.
 
--ifdef(notdef).	% XXX: only for sparc64, alas
-conv_alub2(Dst, Src1, RtlAlubOp, Cond, Src2, I) ->
-  case conv_cond_rcond(Cond) of
-    [] ->
-      conv_alub_bp(Dst, Src1, RtlAlubOp, Cond, Src2, I);
-    RCond ->
-      conv_alub_br(Dst, Src1, RtlAlubOp, RCond, Src2, I)
-  end.
+conv_alub2(Dst, Src1, XAluOp, Cond, Src2, I) ->
+  conv_alub_bp(Dst, Src1, XAluOp, Cond, Src2, I).
 
-conv_alub_br(Dst, Src1, RtlAlubOp, RCond, Src2, I) ->
-  TrueLab = hipe_rtl:alub_true_label(I),
-  FalseLab = hipe_rtl:alub_false_label(I),
-  Pred = hipe_rtl:alub_pred(I),
-  %% "Dst = Src1 AluOp Src2; if COND" becomes
-  %% "Dst = Src1 AluOp Src2; if-COND(Dst)"
-  {I2, _DidCommute} = mk_alu(conv_alubop_nocc(RtlAlubOp), Src1, Src2, Dst),
-  I2 ++ mk_pseudo_br(RCond, Dst, TrueLab, FalseLab, Pred).
-
-conv_cond_rcond(Cond) ->
-  case Cond of
-    'e'  -> 'z';
-    'ne' -> 'nz';
-    'g'  -> 'gz';
-    'ge' -> 'gez';
-    'l'  -> 'lz';
-    'le' -> 'lez';
-    _	 -> []	% vs, vc, gu, geu, lu, leu
-  end.
-
-conv_alubop_nocc(RtlAlubOp) ->
-  case RtlAlubOp of
-    'add' -> 'add';
-    'sub' -> 'sub';
-    %% mul: handled elsewhere
-    'or' -> 'or';
-    'and' -> 'and';
-    'xor' -> 'xor'
-    %% no shift ops
-  end.
-
-mk_pseudo_br(RCond, Dst, TrueLab, FalseLab, Pred) ->
-  [hipe_sparc:mk_pseudo_br(RCond, Dst, TrueLab, FalseLab, Pred)].
--else.
-conv_alub2(Dst, Src1, RtlAlubOp, Cond, Src2, I) ->
-  conv_alub_bp(Dst, Src1, RtlAlubOp, Cond, Src2, I).
--endif.
-
-conv_alub_bp(Dst, Src1, RtlAlubOp, Cond, Src2, I) ->
+conv_alub_bp(Dst, Src1, XAluOp, Cond, Src2, I) ->
   TrueLab = hipe_rtl:alub_true_label(I),
   FalseLab = hipe_rtl:alub_false_label(I),
   Pred = hipe_rtl:alub_pred(I),
   %% "Dst = Src1 AluOp Src2; if COND" becomes
   %% "Dst = Src1 AluOpCC Src22; if-COND(CC)"
-  {I2, _DidCommute} = mk_alu(conv_alubop_cc(RtlAlubOp), Src1, Src2, Dst),
-  I2 ++ mk_pseudo_bp(Cond, TrueLab, FalseLab, Pred).
+  {I2, DidCommute} = mk_alu(XAluOp, Src1, Src2, Dst),
+  NewCond =
+    case DidCommute andalso XAluOp =:= 'cmpcc' of
+      true -> commute_cond(Cond); % subcc does not commute; its conditions do
+      false -> Cond
+    end,
+  I2 ++ mk_pseudo_bp(NewCond, TrueLab, FalseLab, Pred).
 
 conv_alubop_cc(RtlAlubOp) ->
   case RtlAlubOp of
@@ -380,69 +350,6 @@ conv_alubop_cc(RtlAlubOp) ->
     %% no shift ops
   end.
 
-conv_branch(I, Map, Data) ->
-  %% <unused> = src1 - src2; if COND goto label
-  {Src1, Map0} = conv_src(hipe_rtl:branch_src1(I), Map),
-  {Src2, Map1} = conv_src(hipe_rtl:branch_src2(I), Map0),
-  Cond = conv_cond(hipe_rtl:branch_cond(I)),
-  I2 = conv_branch2(Src1, Cond, Src2, I),
-  {I2, Map1, Data}.
-
--ifdef(notdef).	% XXX: only for sparc64, alas
-conv_branch2(Src1, Cond, Src2, I) ->
-  case conv_cond_rcond(Cond) of
-    [] ->
-      conv_branch_bp(Src1, Cond, Src2, I);
-    RCond ->
-      conv_branch_br(Src1, RCond, Src2, I)
-  end.
-
-conv_branch_br(Src1, RCond, Src2, I) ->
-  TrueLab = hipe_rtl:branch_true_label(I),
-  FalseLab = hipe_rtl:branch_false_label(I),
-  Pred = hipe_rtl:branch_pred(I),
-  %% "if src1-COND-src2" becomes
-  %% "sub src1,src2,tmp; if-COND(tmp)"
-  Dst = hipe_sparc:mk_new_temp('untagged'),
-  XAluOp = 'cmp',	% == a sub that commutes
-  {I1, DidCommute} = mk_alu(XAluOp, Src1, Src2, Dst),
-  NewRCond =
-    case DidCommute of
-      true -> commute_rcond(RCond);
-      false -> RCond
-    end,
-  I1 ++ mk_pseudo_br(NewRCond, Dst, TrueLab, FalseLab, Pred).
-
-commute_rcond(RCond) ->	% if x RCond y, then y commute_rcond(RCond) x
-  case RCond of
-    'z'   -> 'z';	% ==, ==
-    'nz'  -> 'nz';	% !=, !=
-    'gz'  -> 'lz';	% >, <
-    'gez' -> 'lez';	% >=, <=
-    'lz'  -> 'gz';	% <, >
-    'lez' -> 'gez'	% <=, >=
-  end.
--else.
-conv_branch2(Src1, Cond, Src2, I) ->
-  conv_branch_bp(Src1, Cond, Src2, I).
--endif.
-
-conv_branch_bp(Src1, Cond, Src2, I) ->
-  TrueLab = hipe_rtl:branch_true_label(I),
-  FalseLab = hipe_rtl:branch_false_label(I),
-  Pred = hipe_rtl:branch_pred(I),
-  %% "if src1-COND-src2" becomes
-  %% "subcc src1,src2,%g0; if-COND(CC)"
-  Dst = hipe_sparc:mk_g0(),
-  XAluOp = 'cmpcc',	% == a subcc that commutes
-  {I1, DidCommute} = mk_alu(XAluOp, Src1, Src2, Dst),
-  NewCond =
-    case DidCommute of
-      true -> commute_cond(Cond);
-      false -> Cond
-    end,
-  I1 ++ mk_pseudo_bp(NewCond, TrueLab, FalseLab, Pred).
-
 conv_call(I, Map, Data) ->
   {Args, Map0} = conv_src_list(hipe_rtl:call_arglist(I), Map),
   {Dsts, Map1} = conv_dst_list(hipe_rtl:call_dstlist(I), Map0),
diff --git a/lib/hipe/x86/hipe_rtl_to_x86.erl b/lib/hipe/x86/hipe_rtl_to_x86.erl
index 4c8c98551c..ccb9b7632b 100644
--- a/lib/hipe/x86/hipe_rtl_to_x86.erl
+++ b/lib/hipe/x86/hipe_rtl_to_x86.erl
@@ -91,26 +91,31 @@ conv_insn(I, Map, Data) ->
     #alub{} ->
       %% dst = src1 op src2; if COND goto label
       BinOp = conv_binop(hipe_rtl:alub_op(I)),
-      {Dst, Map0} = conv_dst(hipe_rtl:alub_dst(I), Map),
-      {FixSrc1, Src1, Map1} = conv_src(hipe_rtl:alub_src1(I), Map0),
-      {FixSrc2, Src2, Map2} = conv_src(hipe_rtl:alub_src2(I), Map1),
+      {FixSrc1, Src1, Map0} = conv_src(hipe_rtl:alub_src1(I), Map),
+      {FixSrc2, Src2, Map1} = conv_src(hipe_rtl:alub_src2(I), Map0),
       Cc = conv_cond(hipe_rtl:alub_cond(I)),
-      I1 = [hipe_x86:mk_pseudo_jcc(Cc,
-				   hipe_rtl:alub_true_label(I),
-				   hipe_rtl:alub_false_label(I),
-				   hipe_rtl:alub_pred(I))],
-      I2 = conv_alu(Dst, Src1, BinOp, Src2, I1),
-      {FixSrc1++FixSrc2++I2, Map2, Data};
-    #branch{} ->
-      %% <unused> = src1 - src2; if COND goto label
-      {FixSrc1, Src1, Map0} = conv_src(hipe_rtl:branch_src1(I), Map),
-      {FixSrc2, Src2, Map1} = conv_src(hipe_rtl:branch_src2(I), Map0),
-      Cc = conv_cond(hipe_rtl:branch_cond(I)),
-      I2 = conv_branch(Src1, Cc, Src2,
-		       hipe_rtl:branch_true_label(I),
-		       hipe_rtl:branch_false_label(I),
-		       hipe_rtl:branch_pred(I)),
-      {FixSrc1++FixSrc2++I2, Map1, Data};
+      BranchOp = conv_branchop(BinOp),
+      HasDst = hipe_rtl:alub_has_dst(I),
+      {I2, Map3} =
+	case (not HasDst) andalso BranchOp =/= none of
+	  true ->
+	    {conv_branch(Src1, BranchOp, Src2, Cc,
+			 hipe_rtl:alub_true_label(I),
+			 hipe_rtl:alub_false_label(I),
+			 hipe_rtl:alub_pred(I)), Map1};
+	  false ->
+	    {Dst, Map2} =
+	      case HasDst of
+		false -> {new_untagged_temp(), Map1};
+		true -> conv_dst(hipe_rtl:alub_dst(I), Map1)
+	      end,
+	    I1 = [hipe_x86:mk_pseudo_jcc(Cc,
+					 hipe_rtl:alub_true_label(I),
+					 hipe_rtl:alub_false_label(I),
+					 hipe_rtl:alub_pred(I))],
+	    {conv_alu(Dst, Src1, BinOp, Src2, I1), Map2}
+	end,
+      {FixSrc1++FixSrc2++I2, Map3, Data};
     #call{} ->
       %%	push <arg1>
       %%	...
@@ -360,28 +365,41 @@ conv_shift(Dst, Src1, BinOp, Src2) ->
 %%% Finalise the conversion of a conditional branch operation, taking
 %%% care to not introduce more temps and moves than necessary.
 
-conv_branch(Src1, Cc, Src2, TrueLab, FalseLab, Pred) ->
+conv_branchop('sub') -> 'cmp';
+conv_branchop('and') ->  'test';
+conv_branchop(_) -> none.
+
+branchop_commutes('cmp') -> false;
+branchop_commutes('test') -> true.
+
+conv_branch(Src1, Op, Src2, Cc, TrueLab, FalseLab, Pred) ->
   case hipe_x86:is_imm(Src1) of
     false ->
-      mk_branch(Src1, Cc, Src2, TrueLab, FalseLab, Pred);
+      mk_branch(Src1, Op, Src2, Cc, TrueLab, FalseLab, Pred);
     true ->
       case hipe_x86:is_imm(Src2) of
 	false ->
-	  NewCc = commute_cc(Cc),
-	  mk_branch(Src2, NewCc, Src1, TrueLab, FalseLab, Pred);
+	  NewCc = case branchop_commutes(Op) of
+		    true -> Cc;
+		    false -> commute_cc(Cc)
+		  end,
+	  mk_branch(Src2, Op, Src1, NewCc, TrueLab, FalseLab, Pred);
 	true ->
 	  %% two immediates, let the optimiser clean it up
 	  Tmp = new_untagged_temp(),
 	  [hipe_x86:mk_move(Src1, Tmp) |
-	   mk_branch(Tmp, Cc, Src2, TrueLab, FalseLab, Pred)]
+	   mk_branch(Tmp, Op, Src2, Cc, TrueLab, FalseLab, Pred)]
       end
   end.
 
-mk_branch(Src1, Cc, Src2, TrueLab, FalseLab, Pred) ->
+mk_branch(Src1, Op, Src2, Cc, TrueLab, FalseLab, Pred) ->
   %% PRE: not(is_imm(Src1))
-  [hipe_x86:mk_cmp(Src2, Src1),
+  [mk_branchtest(Src1, Op, Src2),
    hipe_x86:mk_pseudo_jcc(Cc, TrueLab, FalseLab, Pred)].
 
+mk_branchtest(Src1, cmp, Src2) -> hipe_x86:mk_cmp(Src2, Src1);
+mk_branchtest(Src1, test, Src2) -> hipe_x86:mk_test(Src2, Src1).
+
 %%% Convert an RTL ALU or ALUB binary operator.
 
 conv_binop(BinOp) ->
diff --git a/lib/hipe/x86/hipe_x86.erl b/lib/hipe/x86/hipe_x86.erl
index 33d7f77cf1..e7c4497cda 100644
--- a/lib/hipe/x86/hipe_x86.erl
+++ b/lib/hipe/x86/hipe_x86.erl
@@ -201,7 +201,7 @@
 	 shift_src/1,
 	 shift_dst/1,
 
-	 %% mk_test/2,
+	 mk_test/2,
 	 test_src/1,
 	 test_dst/1,
 
@@ -305,7 +305,7 @@ mk_cmp(Src, Dst) -> #cmp{src=Src, dst=Dst}.
 cmp_src(#cmp{src=Src}) -> Src.
 cmp_dst(#cmp{dst=Dst}) -> Dst.
 
-%% mk_test(Src, Dst) -> #test{src=Src, dst=Dst}.
+mk_test(Src, Dst) -> #test{src=Src, dst=Dst}.
 test_src(#test{src=Src}) -> Src.
 test_dst(#test{dst=Dst}) -> Dst.
 
-- 
cgit v1.2.3


From c91c523f1ca58644267402325b6fb59d65049829 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20L=C3=A5ng?= <margnus1@telia.com>
Date: Wed, 9 Nov 2016 18:47:12 +0100
Subject: hipe_rtl: drop alub dest when unused

---
 lib/hipe/icode/hipe_icode.erl | 13 +++++++++++++
 lib/hipe/rtl/hipe_rtl.erl     | 19 +++++++++++++++++++
 lib/hipe/ssa/hipe_ssa.inc     | 22 +++++++++++-----------
 3 files changed, 43 insertions(+), 11 deletions(-)

(limited to 'lib')

diff --git a/lib/hipe/icode/hipe_icode.erl b/lib/hipe/icode/hipe_icode.erl
index 78508dff22..dce2fcb392 100644
--- a/lib/hipe/icode/hipe_icode.erl
+++ b/lib/hipe/icode/hipe_icode.erl
@@ -594,6 +594,7 @@
 	 uses/1,
 	 defines/1,
 	 is_safe/1,
+	 reduce_unused/1,
 	 strip_comments/1,
 	 subst/2,
 	 subst_uses/2,
@@ -1765,6 +1766,18 @@ is_safe(Instr) ->
     #icode_end_try{} -> false
   end.
 
+%% @doc Produces a simplified instruction sequence that is equivalent to [Instr]
+%% under the assumption that all results of Instr are unused, or 'false' if
+%% there is no such sequence (other than [Instr] itself).
+
+-spec reduce_unused(icode_instr()) -> false | [icode_instr()].
+
+reduce_unused(Instr) ->
+  case is_safe(Instr) of
+    true -> [];
+    false -> false
+  end.
+
 %%-----------------------------------------------------------------------
 
 -spec highest_var(icode_instrs()) -> non_neg_integer().
diff --git a/lib/hipe/rtl/hipe_rtl.erl b/lib/hipe/rtl/hipe_rtl.erl
index e4e857ec43..d39969a0ed 100644
--- a/lib/hipe/rtl/hipe_rtl.erl
+++ b/lib/hipe/rtl/hipe_rtl.erl
@@ -334,6 +334,7 @@
 	 defines/1,
 	 redirect_jmp/3,
 	 is_safe/1,
+	 reduce_unused/1,
 	 %% highest_var/1,
 	 pp/1,
 	 pp/2,
@@ -1314,6 +1315,24 @@ is_safe(Instr) ->
     #switch{} -> false %% Maybe this is safe...
   end.
 
+%% @spec reduce_unused(rtl_instruction())
+%%           -> false | [rtl_instruction()].
+%%
+%% @doc Produces a simplified instruction sequence that is equivalent to [Instr]
+%% under the assumption that all results of Instr are unused, or 'false' if
+%% there is no such sequence (other than [Instr] itself).
+
+reduce_unused(Instr) ->
+  case Instr of
+    #alub{dst=Dst} when Dst =/= [] ->
+      [Instr#alub{dst=[]}];
+    _ ->
+      case is_safe(Instr) of
+	true -> [];
+	false -> false
+      end
+  end.
+
 %%
 %% True if argument is an alu-operator
 %%
diff --git a/lib/hipe/ssa/hipe_ssa.inc b/lib/hipe/ssa/hipe_ssa.inc
index 83ab320306..b511bb6f25 100644
--- a/lib/hipe/ssa/hipe_ssa.inc
+++ b/lib/hipe/ssa/hipe_ssa.inc
@@ -1,4 +1,4 @@
-%% -*- erlang-indent-level: 2 -*-
+%% -*- mode: erlang; erlang-indent-level: 2 -*-
 %%
 %% %CopyrightBegin%
 %% 
@@ -943,9 +943,9 @@ do_code([Instr|Instrs], LiveOut, Changed, Acc) ->
     false ->
       do_code(Instrs, LiveIn, Changed, [Instr|Acc]);
     true ->
-      case ?CODE:is_safe(Instr) of
+      case ?CODE:is_call(Instr) of
 	true ->
-	  case ?CODE:is_call(Instr) of
+	  case ?CODE:is_safe(Instr) of
 	    true ->
 	      case ?CODE:call_continuation(Instr) of
 		[] ->
@@ -955,11 +955,6 @@ do_code([Instr|Instrs], LiveOut, Changed, Acc) ->
 		  do_code(Instrs, LiveOut, true, [NewInstr|Acc])
 	      end;
 	    false ->
-	      do_code(Instrs, LiveOut, true, Acc)
-	  end;
-	false -> %% not a safe instruction - cannot be removed
-	  case ?CODE:is_call(Instr) of
-	    true ->
 	      case ?CODE:call_dstlist(Instr) of
 	        [] ->  %% result was not used anyway; no change
 		  do_code(Instrs, LiveIn, Changed, [Instr|Acc]);
@@ -968,9 +963,14 @@ do_code([Instr|Instrs], LiveOut, Changed, Acc) ->
 		  do_code(Instrs, LiveIn, true, [NewInstr|Acc]);
 		[_|_] ->  %% calls with multiple dests are left untouched
 		  do_code(Instrs, LiveIn, Changed, [Instr|Acc])
-	      end;
-	    false ->
-	      do_code(Instrs, LiveIn, Changed, [Instr|Acc])
+	      end
+	  end;
+	false ->
+	  case ?CODE:reduce_unused(Instr) of
+	    false -> % not a safe instruction - cannot be removed
+	      do_code(Instrs, LiveIn, Changed, [Instr|Acc]);
+	    Replacement ->
+	      do_code(lists:reverse(Replacement, Instrs), LiveOut, true, Acc)
 	  end
       end
   end;
-- 
cgit v1.2.3


From 5ae8d51f97600116f47f0160b4eac331acf5b7ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20L=C3=A5ng?= <margnus1@telia.com>
Date: Thu, 10 Nov 2016 12:20:21 +0100
Subject: hipe_ppc: move coalescing

---
 lib/hipe/ppc/hipe_ppc_frame.erl | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/hipe/ppc/hipe_ppc_frame.erl b/lib/hipe/ppc/hipe_ppc_frame.erl
index 8d37159ad8..58924409a8 100644
--- a/lib/hipe/ppc/hipe_ppc_frame.erl
+++ b/lib/hipe/ppc/hipe_ppc_frame.erl
@@ -98,7 +98,10 @@ do_pseudo_move(I, Context, FPoff) ->
 	  Offset = pseudo_offset(Src, FPoff, Context),
 	  mk_load(hipe_ppc:ldop_word(), Dst, Offset, mk_sp(), []);
 	_ ->
-	  [hipe_ppc:mk_alu('or', Dst, Src, Src)]
+	  case hipe_ppc:temp_reg(Dst) =:= hipe_ppc:temp_reg(Src) of
+	    true -> [];
+	    false -> [hipe_ppc:mk_alu('or', Dst, Src, Src)]
+	  end
       end
   end.
 
-- 
cgit v1.2.3


From 8cf7ad0fd02d5fd4100e23e61e1caa56726a0494 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20L=C3=A5ng?= <margnus1@telia.com>
Date: Thu, 10 Nov 2016 12:20:55 +0100
Subject: hipe_ppc: better rlwinm pp

---
 lib/hipe/ppc/hipe_ppc_pp.erl | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'lib')

diff --git a/lib/hipe/ppc/hipe_ppc_pp.erl b/lib/hipe/ppc/hipe_ppc_pp.erl
index e69e6b64a2..0ff7a76bce 100644
--- a/lib/hipe/ppc/hipe_ppc_pp.erl
+++ b/lib/hipe/ppc/hipe_ppc_pp.erl
@@ -170,6 +170,12 @@ pp_insn(Dev, I, Pre) ->
       io:format(Dev, ", ", []),
       pp_temp(Dev, Base2),
       io:format(Dev, "\n", []);
+    #unary{unop={UnOp,I1,I2,I3}, dst=Dst, src=Src} ->
+      io:format(Dev, "\t~s ", [UnOp]),
+      pp_temp(Dev, Dst),
+      io:format(Dev, ", ", []),
+      pp_temp(Dev, Src),
+      io:format(Dev, ", ~s, ~s, ~s\n", [to_hex(I1),to_hex(I2),to_hex(I3)]);
     #unary{unop=UnOp, dst=Dst, src=Src} ->
       io:format(Dev, "\t~w ", [unop_name(UnOp)]),
       pp_temp(Dev, Dst),
-- 
cgit v1.2.3


From 35f74834dc4130c613fea2a5483ba02ed43af2c4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20L=C3=A5ng?= <margnus1@telia.com>
Date: Thu, 10 Nov 2016 12:25:49 +0100
Subject: hipe_sparc: move coalescing

---
 lib/hipe/sparc/hipe_sparc_frame.erl | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/hipe/sparc/hipe_sparc_frame.erl b/lib/hipe/sparc/hipe_sparc_frame.erl
index 37f29e660a..bd94d3318c 100644
--- a/lib/hipe/sparc/hipe_sparc_frame.erl
+++ b/lib/hipe/sparc/hipe_sparc_frame.erl
@@ -110,7 +110,10 @@ do_pseudo_move(I, Context, FPoff) ->
 	  Offset = pseudo_offset(Src, FPoff, Context),
 	  mk_load(hipe_sparc:mk_sp(), Offset, Dst, []);
 	_ ->
-	  [hipe_sparc:mk_mov(Src, Dst)]
+	  case hipe_sparc:temp_reg(Dst) =:= hipe_sparc:temp_reg(Src) of
+	    true -> [];
+	    false -> [hipe_sparc:mk_mov(Src, Dst)]
+	  end
       end
   end.
 
-- 
cgit v1.2.3


From df04801d5585f52ddc042d30873bdc95da019af6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20L=C3=A5ng?= <margnus1@telia.com>
Date: Sat, 24 Sep 2016 09:42:01 +0200
Subject: hipe_x86: LeaToAdd peephole rule

Although LEA is useful for three-address form adds, sometimes it is used
where a normal add would have sufficed (due to the addition being the
last use of one of the operands; but RTL lowering does not know that as
it does not have liveness information). As a workaround, we convert LEA
back to ADD when the destination is the same as one of the operands.
---
 lib/hipe/x86/hipe_x86_postpass.erl | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'lib')

diff --git a/lib/hipe/x86/hipe_x86_postpass.erl b/lib/hipe/x86/hipe_x86_postpass.erl
index bd232041b6..390f5bf5e7 100644
--- a/lib/hipe/x86/hipe_x86_postpass.erl
+++ b/lib/hipe/x86/hipe_x86_postpass.erl
@@ -183,6 +183,18 @@ peep([B = #alu{aluop=Op,src=#x86_imm{value=Val},dst=Dst}|Insns], Res, Lst) ->
 	    peep(Insns, [B|Res], Lst)
     end;
 
+%% LeaToAdd
+%% This rule transforms lea into add when the destination is the same as one of
+%% the operands. Sound because lea is never used where the condition codes are
+%% live (and would be clobbered by add).
+%% ----------
+peep([#lea{mem=#x86_mem{base=#x86_temp{reg=DstR},off=Src},
+	   temp=Dst=#x86_temp{reg=DstR}}|Insns], Res, Lst) ->
+     peep(Insns, [#alu{aluop='add',src=Src,dst=Dst}|Res], [leaToAdd|Lst]);
+peep([#lea{mem=#x86_mem{base=Src,off=#x86_temp{reg=DstR}},
+	   temp=Dst=#x86_temp{reg=DstR}}|Insns], Res, Lst) ->
+     peep(Insns, [#alu{aluop='add',src=Src,dst=Dst}|Res], [leaToAdd|Lst]);
+
 %% SubToDec
 %% This rule turns "subl $1,Dst; jl Lab" into "decl Dst; jl Lab", which
 %% changes reduction counter tests to use decl instead of subl.
-- 
cgit v1.2.3


From 921638f8b22479473482bdcaa25f8031ac85e7e8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20L=C3=A5ng?= <margnus1@telia.com>
Date: Sun, 2 Oct 2016 13:18:51 +0200
Subject: hipe_rtl_to_x86: Use LEA only for immediate adds

It seems that most 3-address adds of temps can be move coalesced.
Therefore, we limit the behaviour added by 1567585dda8 to only affect
immediate adds.

Also, add conversion of immediate mov+sub to lea.
---
 lib/hipe/x86/hipe_rtl_to_x86.erl | 14 +++++++++++++-
 lib/hipe/x86/hipe_x86.erl        |  4 ++--
 2 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'lib')

diff --git a/lib/hipe/x86/hipe_rtl_to_x86.erl b/lib/hipe/x86/hipe_rtl_to_x86.erl
index ccb9b7632b..851b7da2dd 100644
--- a/lib/hipe/x86/hipe_rtl_to_x86.erl
+++ b/lib/hipe/x86/hipe_rtl_to_x86.erl
@@ -257,7 +257,9 @@ conv_insn(I, Map, Data) ->
 
 conv_alu_nocc(Dst, Src1, 'add', Src2, Tail) ->
   case (not same_opnd(Dst, Src1)) andalso (not same_opnd(Dst, Src2))
-    andalso (hipe_x86:is_temp(Src1) orelse hipe_x86:is_temp(Src2))
+    %% We could use orelse instead of xor here to generate lea T1(T2), T3, but
+    %% they seem to move coalesce so well that move+add is better for them.
+    andalso (hipe_x86:is_temp(Src1) xor hipe_x86:is_temp(Src2))
   of
     false -> conv_alu(Dst, Src1, 'add', Src2, Tail);
     true -> % Use LEA
@@ -268,6 +270,16 @@ conv_alu_nocc(Dst, Src1, 'add', Src2, Tail) ->
 	    end,
       [hipe_x86:mk_lea(Mem, Dst) | Tail]
   end;
+conv_alu_nocc(Dst, Src1, 'sub', Src2, Tail) ->
+  case (not same_opnd(Dst, Src1)) andalso hipe_x86:is_temp(Src1)
+    andalso (not hipe_x86:is_temp(Src2))
+  of
+    false -> conv_alu(Dst, Src1, 'sub', Src2, Tail);
+    true -> % Use LEA
+      Imm = hipe_x86:mk_imm(-hipe_x86:imm_value(Src2)),
+      Mem = hipe_x86:mk_mem(Src1, Imm, typeof_dst(Dst)),
+      [hipe_x86:mk_lea(Mem, Dst) | Tail]
+  end;
 conv_alu_nocc(Dst, Src1, BinOp, Src2, Tail) ->
   conv_alu(Dst, Src1, BinOp, Src2, Tail).
 
diff --git a/lib/hipe/x86/hipe_x86.erl b/lib/hipe/x86/hipe_x86.erl
index e7c4497cda..95af3f9c67 100644
--- a/lib/hipe/x86/hipe_x86.erl
+++ b/lib/hipe/x86/hipe_x86.erl
@@ -37,7 +37,7 @@
 	 mk_imm_from_addr/2,
 	 mk_imm_from_atom/1,
 	 is_imm/1,
-	 %% imm_value/1,
+	 imm_value/1,
 
 	 mk_mem/3,
 	 %% is_mem/1,
@@ -241,7 +241,7 @@ mk_imm_from_addr(Addr, Type) ->
 mk_imm_from_atom(Atom) ->
     mk_imm(Atom).
 is_imm(X) -> case X of #x86_imm{} -> true; _ -> false end.
-%% imm_value(#x86_imm{value=Value}) -> Value.
+imm_value(#x86_imm{value=Value}) -> Value.
 
 mk_mem(Base, Off, Type) -> #x86_mem{base=Base, off=Off, type=Type}.
 %% is_mem(X) -> case X of #x86_mem{} -> true; _ -> false end.
-- 
cgit v1.2.3


From fda8d7909594d434c01fdc9610127d09d1499a43 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20L=C3=A5ng?= <margnus1@telia.com>
Date: Sun, 2 Oct 2016 13:23:52 +0200
Subject: hipe_tagscheme: Improve fixnum add on x86

With the introduction of immediate adds encoded as 'LEA' on x86, it is
now possible to do a fixnum add in two instructions and one branch by
commuting the addition and reusing the result register as a temporary,
which makes the 'alub' a 2-address add, saving a move instruction.
---
 lib/hipe/rtl/hipe_tagscheme.erl | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'lib')

diff --git a/lib/hipe/rtl/hipe_tagscheme.erl b/lib/hipe/rtl/hipe_tagscheme.erl
index 8d9514ae82..305aacb571 100644
--- a/lib/hipe/rtl/hipe_tagscheme.erl
+++ b/lib/hipe/rtl/hipe_tagscheme.erl
@@ -510,21 +510,26 @@ unsafe_fixnum_sub(Arg1, Arg2, Res) ->
 %%% (16X+tag)+((16Y+tag)-tag) = 16X+tag+16Y = 16(X+Y)+tag
 %%% (16X+tag)-((16Y+tag)-tag) = 16X+tag-16Y = 16(X-Y)+tag
 fixnum_addsub(AluOp, Arg1, Arg2, Res, OtherLab) ->
-  Tmp = hipe_rtl:mk_new_reg_gcsafe(),
+  NoOverflowLab = hipe_rtl:mk_new_label(),
   %% XXX: Consider moving this test to the users of fixnum_addsub.
   case Arg1 =/= Res andalso Arg2 =/= Res of 
     true -> 
       %% Args differ from res.
-      NoOverflowLab = hipe_rtl:mk_new_label(),
-      [hipe_rtl:mk_alu(Tmp, Arg2, sub, hipe_rtl:mk_imm(?TAG_IMMED1_SMALL)),
-       hipe_rtl:mk_alub(Res, Arg1, AluOp, Tmp, not_overflow,
+      %% Commute add to save a move on x86
+      {UntagFirst, Lhs, Rhs} =
+	case AluOp of
+	  'add' -> {Arg1, Res, Arg2};
+	  'sub' -> {Arg2, Arg1, Res}
+	end,
+      [hipe_rtl:mk_alu(Res, UntagFirst, sub, hipe_rtl:mk_imm(?TAG_IMMED1_SMALL)),
+       hipe_rtl:mk_alub(Res, Lhs, AluOp, Rhs, not_overflow,
 			hipe_rtl:label_name(NoOverflowLab), 
 			hipe_rtl:label_name(OtherLab), 0.99),
        NoOverflowLab];
     false ->
       %% At least one of the arguments is the same as Res.
+      Tmp = hipe_rtl:mk_new_reg_gcsafe(),
       Tmp2 = hipe_rtl:mk_new_var(), % XXX: shouldn't this var be a reg?
-      NoOverflowLab = hipe_rtl:mk_new_label(),
       [hipe_rtl:mk_alu(Tmp, Arg2, sub, hipe_rtl:mk_imm(?TAG_IMMED1_SMALL)),
        hipe_rtl:mk_alub(Tmp2, Arg1, AluOp, Tmp, not_overflow,
 			hipe_rtl:label_name(NoOverflowLab), 
-- 
cgit v1.2.3


From 6b7c73196936d94f2a03a465c0954fc9dbf35974 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20L=C3=A5ng?= <margnus1@telia.com>
Date: Sun, 2 Oct 2016 14:20:41 +0200
Subject: hipe_x86_postpass: Negate conditions in goto elim

---
 lib/hipe/x86/hipe_x86.erl          | 4 ++++
 lib/hipe/x86/hipe_x86_postpass.erl | 5 +++++
 2 files changed, 9 insertions(+)

(limited to 'lib')

diff --git a/lib/hipe/x86/hipe_x86.erl b/lib/hipe/x86/hipe_x86.erl
index 95af3f9c67..45bf1ad736 100644
--- a/lib/hipe/x86/hipe_x86.erl
+++ b/lib/hipe/x86/hipe_x86.erl
@@ -218,6 +218,10 @@
 	 %% highest_temp/1
 	]).
 
+%% Other utilities
+-export([neg_cc/1
+	]).
+
 %%%
 %%% Low-level accessors.
 %%%
diff --git a/lib/hipe/x86/hipe_x86_postpass.erl b/lib/hipe/x86/hipe_x86_postpass.erl
index 390f5bf5e7..f88a841cca 100644
--- a/lib/hipe/x86/hipe_x86_postpass.erl
+++ b/lib/hipe/x86/hipe_x86_postpass.erl
@@ -217,6 +217,11 @@ trivial_goto_elimination(Insns) -> goto_elim(Insns, []).
 
 goto_elim([#jmp_label{label=Label}, I = #label{label=Label}|Insns], Res) ->
   goto_elim([I|Insns], Res);
+goto_elim([#jcc{cc=CC, label=Label} = IJCC,
+	   #jmp_label{label=BranchTgt},
+	   #label{label=Label} = ILBL|Insns], Res) ->
+  goto_elim([IJCC#jcc{cc=hipe_x86:neg_cc(CC), label=BranchTgt},
+	     ILBL|Insns], Res);
 goto_elim([I | Insns], Res) ->
   goto_elim(Insns, [I|Res]);
 goto_elim([], Res) ->
-- 
cgit v1.2.3


From e1b9f467fbdb09aac9871e6064b67f87c8e59a47 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20L=C3=A5ng?= <margnus1@telia.com>
Date: Mon, 10 Oct 2016 15:32:18 +0200
Subject: hipe: Make realloc_binary fast case true branch

This makes the fast case a fallthrough and the slow case a branch,
hopefully improving cache locality.
---
 lib/hipe/rtl/hipe_rtl_binary_construct.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'lib')

diff --git a/lib/hipe/rtl/hipe_rtl_binary_construct.erl b/lib/hipe/rtl/hipe_rtl_binary_construct.erl
index 2922972085..baf5f7d27a 100644
--- a/lib/hipe/rtl/hipe_rtl_binary_construct.erl
+++ b/lib/hipe/rtl/hipe_rtl_binary_construct.erl
@@ -429,8 +429,8 @@ realloc_binary(SizeReg, ProcBin, Base) ->
    hipe_tagscheme:set_field_from_term(ProcBinFlagsTag, ProcBin, Flags),
    hipe_tagscheme:get_field_from_term(ProcBinValTag, ProcBin, BinPointer),
    hipe_tagscheme:get_field_from_pointer(BinOrigSizeTag, BinPointer, OrigSize),
-   hipe_rtl:mk_branch(OrigSize, 'ltu', ResultingSize,
-		      ReallocLblName, NoReallocLblName),
+   hipe_rtl:mk_branch(OrigSize, 'geu', ResultingSize, NoReallocLblName,
+		      ReallocLblName),
    NoReallocLbl,
    hipe_tagscheme:get_field_from_term(ProcBinBytesTag, ProcBin, Base),
    hipe_rtl:mk_goto(ContLblName),
-- 
cgit v1.2.3


From e98df8fac977350b56319df621c65a823bfe86f4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20L=C3=A5ng?= <margnus1@telia.com>
Date: Mon, 10 Oct 2016 15:38:39 +0200
Subject: hipe_tagscheme: Improve fixnum_addsub with imm

The addsub sequence was suboptimal when one of the arguments was
immediate, because it became an immediate alu followed by an immediate
alub, and the optimisers would not combine them due to the risk of
altering the branch. However, in this case we know that such a rewrite
is safe, and do it directly in hipe_tagscheme:fixnum_addsub/5 instead.
---
 lib/hipe/rtl/hipe_tagscheme.erl | 49 +++++++++++++++++++++++++++--------------
 1 file changed, 32 insertions(+), 17 deletions(-)

(limited to 'lib')

diff --git a/lib/hipe/rtl/hipe_tagscheme.erl b/lib/hipe/rtl/hipe_tagscheme.erl
index 305aacb571..1d9861da7a 100644
--- a/lib/hipe/rtl/hipe_tagscheme.erl
+++ b/lib/hipe/rtl/hipe_tagscheme.erl
@@ -509,33 +509,48 @@ unsafe_fixnum_sub(Arg1, Arg2, Res) ->
 
 %%% (16X+tag)+((16Y+tag)-tag) = 16X+tag+16Y = 16(X+Y)+tag
 %%% (16X+tag)-((16Y+tag)-tag) = 16X+tag-16Y = 16(X-Y)+tag
-fixnum_addsub(AluOp, Arg1, Arg2, Res, OtherLab) ->
+fixnum_addsub(AluOp, Arg1, Arg2, FinalRes, OtherLab) ->
   NoOverflowLab = hipe_rtl:mk_new_label(),
   %% XXX: Consider moving this test to the users of fixnum_addsub.
-  case Arg1 =/= Res andalso Arg2 =/= Res of 
-    true -> 
-      %% Args differ from res.
+  {Res, Tail} =
+    case Arg1 =/= FinalRes andalso Arg2 =/= FinalRes of
+      true ->
+	%% Args differ from res.
+	{FinalRes, [NoOverflowLab]};
+      false ->
+	%% At least one of the arguments is the same as Res.
+	Tmp = hipe_rtl:mk_new_reg_gcsafe(),
+	{Tmp, [NoOverflowLab, hipe_rtl:mk_move(FinalRes, Tmp)]}
+    end,
+  case (hipe_rtl:is_imm(Arg1) andalso AluOp =:= 'add')
+    orelse hipe_rtl:is_imm(Arg2)
+  of
+    true ->
+      %% Pre-compute the untagged immediate. The optimisers won't do this for us
+      %% since they don't know that the untag never underflows.
+      {Var, Imm0} =
+	case hipe_rtl:is_imm(Arg2) of
+	  true  -> {Arg1, Arg2};
+	  false -> {Arg2, Arg1}
+	end,
+      Imm = hipe_rtl:mk_imm(hipe_rtl:imm_value(Imm0) - ?TAG_IMMED1_SMALL),
+      [hipe_rtl:mk_alub(Res, Var, AluOp, Imm, not_overflow,
+			hipe_rtl:label_name(NoOverflowLab),
+			hipe_rtl:label_name(OtherLab), 0.99)
+       |Tail];
+    false ->
       %% Commute add to save a move on x86
       {UntagFirst, Lhs, Rhs} =
 	case AluOp of
 	  'add' -> {Arg1, Res, Arg2};
 	  'sub' -> {Arg2, Arg1, Res}
 	end,
-      [hipe_rtl:mk_alu(Res, UntagFirst, sub, hipe_rtl:mk_imm(?TAG_IMMED1_SMALL)),
+      [hipe_rtl:mk_alu(Res, UntagFirst, sub,
+		       hipe_rtl:mk_imm(?TAG_IMMED1_SMALL)),
        hipe_rtl:mk_alub(Res, Lhs, AluOp, Rhs, not_overflow,
 			hipe_rtl:label_name(NoOverflowLab), 
-			hipe_rtl:label_name(OtherLab), 0.99),
-       NoOverflowLab];
-    false ->
-      %% At least one of the arguments is the same as Res.
-      Tmp = hipe_rtl:mk_new_reg_gcsafe(),
-      Tmp2 = hipe_rtl:mk_new_var(), % XXX: shouldn't this var be a reg?
-      [hipe_rtl:mk_alu(Tmp, Arg2, sub, hipe_rtl:mk_imm(?TAG_IMMED1_SMALL)),
-       hipe_rtl:mk_alub(Tmp2, Arg1, AluOp, Tmp, not_overflow,
-			hipe_rtl:label_name(NoOverflowLab), 
-			hipe_rtl:label_name(OtherLab), 0.99),
-       NoOverflowLab,
-       hipe_rtl:mk_move(Res, Tmp2)]
+			hipe_rtl:label_name(OtherLab), 0.99)
+       |Tail]
   end.
 
 %%% ((16X+tag) div 16) * ((16Y+tag)-tag) + tag = X*16Y+tag = 16(XY)+tag
-- 
cgit v1.2.3


From 40b7dc3ce63999a6e8d40c20a098de6d85676aeb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20L=C3=A5ng?= <margnus1@telia.com>
Date: Sun, 13 Nov 2016 11:12:24 +0100
Subject: hipe_tagscheme: x86 lea+test for mask_and_compare

By changing mask_and_compare from and,sub to sub,and, x86 can use a
3-address LEA immediate add, saving a mov. The RISC backends should see
no change in sequence length.

We make test_(heap_|sub)binary use mask_and_compare so they will benefit
too.
---
 lib/hipe/rtl/hipe_tagscheme.erl | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

(limited to 'lib')

diff --git a/lib/hipe/rtl/hipe_tagscheme.erl b/lib/hipe/rtl/hipe_tagscheme.erl
index 1d9861da7a..566a28cd59 100644
--- a/lib/hipe/rtl/hipe_tagscheme.erl
+++ b/lib/hipe/rtl/hipe_tagscheme.erl
@@ -183,8 +183,9 @@ get_header(Res, X) ->
 
 mask_and_compare(X, Mask, Value, TrueLab, FalseLab, Pred) ->
   Tmp = hipe_rtl:mk_new_reg_gcsafe(),
-  [hipe_rtl:mk_alu(Tmp, X, 'and', hipe_rtl:mk_imm(Mask)),
-   hipe_rtl:mk_branch(Tmp, 'eq', hipe_rtl:mk_imm(Value), TrueLab, FalseLab, Pred)].
+  [hipe_rtl:mk_alu(Tmp, X, 'sub', hipe_rtl:mk_imm(Value)),
+   hipe_rtl:mk_branch(Tmp, 'and', hipe_rtl:mk_imm(Mask),
+		      eq, TrueLab, FalseLab, Pred)].
 
 test_immed1(X, Value, TrueLab, FalseLab, Pred) ->
   mask_and_compare(X, ?TAG_IMMED1_MASK, Value, TrueLab, FalseLab, Pred).
@@ -886,12 +887,10 @@ heap_arch_spec(HP) ->
    hipe_rtl_arch:pcb_store(?P_OFF_HEAP_FIRST, HP)].
 
 test_heap_binary(Binary, TrueLblName, FalseLblName) ->
-  Tmp1 = hipe_rtl:mk_new_reg_gcsafe(),
-  Tmp2 = hipe_rtl:mk_new_reg_gcsafe(),
-  [get_header(Tmp1, Binary),
-   hipe_rtl:mk_alu(Tmp2, Tmp1, 'and', hipe_rtl:mk_imm(?TAG_HEADER_MASK)),
-   hipe_rtl:mk_branch(Tmp2, eq, hipe_rtl:mk_imm(?TAG_HEADER_HEAP_BIN), 
-		      TrueLblName, FalseLblName)].
+  Tmp = hipe_rtl:mk_new_reg_gcsafe(),
+  [get_header(Tmp, Binary),
+   mask_and_compare(Tmp, ?TAG_HEADER_MASK, ?TAG_HEADER_HEAP_BIN,
+		    TrueLblName, FalseLblName, 0.5)].
 
 mk_sub_binary(Dst, ByteSize, ByteOffs, BitSize, BitOffs, Orig) -> 
   mk_sub_binary(Dst, ByteSize, ByteOffs, BitSize, BitOffs, 
@@ -919,11 +918,10 @@ build_sub_binary(Dst, ByteSize, ByteOffs, BitSize, BitOffs,
    set_field_from_term({sub_binary, orig}, Dst, Orig)].
 
 test_subbinary(Binary, TrueLblName, FalseLblName) ->
-  Tmp1 = hipe_rtl:mk_new_reg_gcsafe(),
-  Tmp2 = hipe_rtl:mk_new_reg_gcsafe(),
-  [get_header(Tmp1, Binary),
-   hipe_rtl:mk_alu(Tmp2, Tmp1, 'and', hipe_rtl:mk_imm(?TAG_HEADER_MASK)),
-   hipe_rtl:mk_branch(Tmp2, eq, hipe_rtl:mk_imm(?TAG_HEADER_SUB_BIN), TrueLblName, FalseLblName)].
+  Tmp = hipe_rtl:mk_new_reg_gcsafe(),
+  [get_header(Tmp, Binary),
+   mask_and_compare(Tmp, ?TAG_HEADER_MASK, ?TAG_HEADER_SUB_BIN,
+		    TrueLblName, FalseLblName, 0.5)].
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %%
-- 
cgit v1.2.3


From 6ce54e9665788e6a172950c68fb58158e923c6ec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20L=C3=A5ng?= <margnus1@telia.com>
Date: Mon, 14 Nov 2016 15:31:28 +0100
Subject: hipe_icode: Always const-propagate if&call args

---
 lib/hipe/icode/hipe_icode.erl                |  4 +++
 lib/hipe/icode/hipe_icode_ssa_const_prop.erl | 43 ++++++++++++++++------------
 2 files changed, 29 insertions(+), 18 deletions(-)

(limited to 'lib')

diff --git a/lib/hipe/icode/hipe_icode.erl b/lib/hipe/icode/hipe_icode.erl
index dce2fcb392..d2d08e0253 100644
--- a/lib/hipe/icode/hipe_icode.erl
+++ b/lib/hipe/icode/hipe_icode.erl
@@ -438,6 +438,7 @@
 	 if_true_label/1,
 	 if_false_label/1,
 	 if_args/1,
+	 if_args_update/2,
 	 if_pred/1,
 	 %% is_if/1,
 	 
@@ -714,6 +715,9 @@ if_op_update(IF, NewOp) -> IF#icode_if{op=NewOp}.
 -spec if_args(#icode_if{}) -> [icode_term_arg()].
 if_args(#icode_if{args=Args}) -> Args.
 
+-spec if_args_update(#icode_if{}, [icode_term_arg()]) -> #icode_if{}.
+if_args_update(IF, Args) -> IF#icode_if{args=Args}.
+
 -spec if_true_label(#icode_if{}) -> icode_lbl().
 if_true_label(#icode_if{true_label=TrueLbl}) -> TrueLbl.
 
diff --git a/lib/hipe/icode/hipe_icode_ssa_const_prop.erl b/lib/hipe/icode/hipe_icode_ssa_const_prop.erl
index 4ab4d7e95d..5d3d5413bc 100644
--- a/lib/hipe/icode/hipe_icode_ssa_const_prop.erl
+++ b/lib/hipe/icode/hipe_icode_ssa_const_prop.erl
@@ -97,11 +97,13 @@ visit_expression(Instruction, Environment) ->
       visit_begin_handler     (Instruction, EvaluatedArguments, Environment);
     #icode_begin_try{} ->
       visit_begin_try         (Instruction, EvaluatedArguments, Environment);
-    #icode_fail{} ->                
+    #icode_fail{} ->
       visit_fail              (Instruction, EvaluatedArguments, Environment);
-    _ ->
-      %% label, end_try, comment, return,
-      {[], [], Environment}
+    #icode_comment{} -> {[], [], Environment};
+    #icode_end_try{} -> {[], [], Environment};
+    #icode_enter{} ->   {[], [], Environment};
+    #icode_label{} ->   {[], [], Environment};
+    #icode_return{} ->  {[], [], Environment}
   end.
 
 %%-----------------------------------------------------------------------------
@@ -463,11 +465,15 @@ update_instruction(Instruction, Environment) ->
       update_type(Instruction, Environment);
     #icode_switch_tuple_arity{} ->
       update_switch_tuple_arity(Instruction, Environment);
-    _ ->
-      %% goto, comment, label, return, begin_handler, end_try,
-      %% begin_try, fail
-      %% We could but don't handle: catch?, fail?
-      [Instruction]
+    %% We could but don't handle: catch?, fail?
+    #icode_begin_handler{} -> [Instruction];
+    #icode_begin_try{} ->     [Instruction];
+    #icode_comment{} ->       [Instruction];
+    #icode_end_try{} ->       [Instruction];
+    #icode_fail{} ->          [Instruction];
+    #icode_goto{} ->          [Instruction];
+    #icode_label{} ->         [Instruction];
+    #icode_return{} ->        [Instruction]
   end.
 
 %%-----------------------------------------------------------------------------
@@ -502,14 +508,12 @@ update_call(Instruction, Environment) ->
 			  [Instruction, NewInstructions]),
 	  NewInstructions
       end;
-%%     %% [] ->  %% No destination; we don't touch this
-%%     [] -> 
-%%       NewArguments = update_arguments(hipe_icode:call_args(Instruction),
-%%                                       Environment),
-%%       [hipe_icode:call_args_update(Instruction, NewArguments)];
+    %% [] ->  %% No destination; we don't touch this
     %% List-> %% Means register allocation; not implemented at this point
     _ ->
-      [Instruction]
+      NewArguments = update_arguments(hipe_icode:call_args(Instruction),
+                                      Environment),
+      [hipe_icode:call_args_update(Instruction, NewArguments)]
   end.
 
 %%-----------------------------------------------------------------------------
@@ -574,7 +578,9 @@ update_if(Instruction, Environment) ->
       %% Convert the if-test to a type test if possible.
       Op = hipe_icode:if_op(Instruction),
       case Op =:= '=:=' orelse Op =:= '=/=' of
-	false -> [Instruction];
+	false ->
+	  [hipe_icode:if_args_update(
+	     Instruction, update_arguments(Args, Environment))];
 	true ->
 	  [Arg1, Arg2] = Args,
 	  case EvaluatedArguments of
@@ -604,8 +610,9 @@ conv_if_to_type(I, Const, Arg) when is_atom(Const);
   NewI = hipe_icode:mk_type([Arg], Test, T, F),
   ?CONST_PROP_MSG("if: ~w ---> type ~w\n", [I, NewI]),
   [NewI];
-conv_if_to_type(I, _, _) ->
-  [I].
+conv_if_to_type(I, Const, Arg) ->
+  %% Note: we are potentially commuting the (equality) comparison here
+  [hipe_icode:if_args_update(I, [Arg, hipe_icode:mk_const(Const)])].
 
 %%-----------------------------------------------------------------------------
 
-- 
cgit v1.2.3


From 053e54e5a937deb685dc71e3873074cb8efc96ea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20L=C3=A5ng?= <margnus1@telia.com>
Date: Mon, 14 Nov 2016 15:42:57 +0100
Subject: hipe_tagscheme: Simplify test_two_fixnums with imm

test_two_fixnums would previously only check its right-hand argument for
immediates, but not it's left. Now, test_two_fixnums reduces to
test_fixnum if either argument is an immediate.
---
 lib/hipe/rtl/hipe_tagscheme.erl | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'lib')

diff --git a/lib/hipe/rtl/hipe_tagscheme.erl b/lib/hipe/rtl/hipe_tagscheme.erl
index 566a28cd59..5d11b9b82e 100644
--- a/lib/hipe/rtl/hipe_tagscheme.erl
+++ b/lib/hipe/rtl/hipe_tagscheme.erl
@@ -464,12 +464,17 @@ test_fixnums_1([Arg1, Arg2|Args], Acc) ->
 
 test_two_fixnums(Arg1, Arg2, FalseLab) ->
   TrueLab = hipe_rtl:mk_new_label(),
-  case hipe_rtl:is_imm(Arg2) of
+  case hipe_rtl:is_imm(Arg1) orelse hipe_rtl:is_imm(Arg2) of
     true ->
-      Value = hipe_rtl:imm_value(Arg2),
+      {Imm, Var} =
+	case hipe_rtl:is_imm(Arg1) of
+	  true  -> {Arg1, Arg2};
+	  false -> {Arg2, Arg1}
+	end,
+      Value = hipe_rtl:imm_value(Imm),
       case Value band ?TAG_IMMED1_MASK of
 	?TAG_IMMED1_SMALL ->
-	  [test_fixnum(Arg1, hipe_rtl:label_name(TrueLab), FalseLab, 0.99),
+	  [test_fixnum(Var, hipe_rtl:label_name(TrueLab), FalseLab, 0.99),
 	   TrueLab];
 	_ ->
 	  [hipe_rtl:mk_goto(FalseLab)]
-- 
cgit v1.2.3


From fda3c9575d77bed0250f76f17e92d18836e15d0c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20L=C3=A5ng?= <margnus1@telia.com>
Date: Wed, 16 Nov 2016 18:29:16 +0100
Subject: hipe_x86: Fix encoding of test instr w/ neg imm

Also, use byte form for immediates up to 255, since there's no sign
extension in byte form.

HiPE seems to never generate negative test immediates currently, but we
should at least not output incorrect encodings.
---
 lib/hipe/amd64/hipe_amd64_encode.erl |  6 ++++++
 lib/hipe/x86/hipe_x86_assemble.erl   | 37 +++++++++++++++++++++---------------
 2 files changed, 28 insertions(+), 15 deletions(-)

(limited to 'lib')

diff --git a/lib/hipe/amd64/hipe_amd64_encode.erl b/lib/hipe/amd64/hipe_amd64_encode.erl
index c41eaa3c6a..16bd705055 100644
--- a/lib/hipe/amd64/hipe_amd64_encode.erl
+++ b/lib/hipe/amd64/hipe_amd64_encode.erl
@@ -828,12 +828,16 @@ test_encode(Opnds) ->
 	    [?PFX_OPND_16BITS, 16#A9 | le16(Imm16, [])];
 	{eax, {imm32,Imm32}} ->
 	    [16#A9 | le32(Imm32, [])];
+	{rax, {imm32,Imm32}} ->
+	    [rex([{w,1}]), 16#A9 | le32(Imm32, [])];
 	{{rm8,RM8}, {imm8,Imm8}} ->
 	    [rex([{r8,RM8}]), 16#F6 | encode_rm(RM8, 2#000, [Imm8])];
 	{{rm16,RM16}, {imm16,Imm16}} ->
 	    [?PFX_OPND_16BITS, 16#F7 | encode_rm(RM16, 2#000, le16(Imm16, []))];
 	{{rm32,RM32}, {imm32,Imm32}} ->
 	    [16#F7 | encode_rm(RM32, 2#000, le32(Imm32, []))];
+	{{rm64,RM64}, {imm32,Imm32}} ->
+	    [rex([{w,1}]), 16#F7 | encode_rm(RM64, 2#000, le32(Imm32, []))];
 	{{rm32,RM32}, {reg32,Reg32}} ->
 	    [16#85 | encode_rm(RM32, Reg32, [])];
 	{{rm64,RM64}, {reg64,Reg64}} ->
@@ -1478,10 +1482,12 @@ dotest1(OS) ->
     t(OS,'test',{al,Imm8}),
     t(OS,'test',{ax,Imm16}),
     t(OS,'test',{eax,Imm32}),
+    t(OS,'test',{rax,Imm32}),
     t(OS,'test',{RM8,Imm8}),
     t(OS,'test',{RM8REX,Imm8}),
     t(OS,'test',{RM16,Imm16}),
     t(OS,'test',{RM32,Imm32}),
+    t(OS,'test',{RM64,Imm32}),
     t(OS,'test',{RM32,Reg32}),
     t(OS,'test',{RM64,Reg64}),
     t(OS,'xor',{eax,Imm32}),
diff --git a/lib/hipe/x86/hipe_x86_assemble.erl b/lib/hipe/x86/hipe_x86_assemble.erl
index 4986933f50..e692ff0ebb 100644
--- a/lib/hipe/x86/hipe_x86_assemble.erl
+++ b/lib/hipe/x86/hipe_x86_assemble.erl
@@ -888,22 +888,29 @@ resolve_alu_args(Src, Dst, Context) ->
 %%% test
 resolve_test_args(Src, Dst, Context) ->
   case Src of
-    #x86_imm{} ->
-      Imm = translate_imm(Src, Context, false),
-      case Imm of
-	{imm8,_} ->
-	  case Dst of
-	    #x86_temp{reg=0} -> {al, Imm};
-	    #x86_temp{} -> resolve_test_imm8_reg(Imm, Dst);
-	    #x86_mem{} -> {mem_to_rm8(Dst), Imm}
-	  end;
-	{imm32,_} ->
-	  {case Dst of
-	     #x86_temp{reg=0} -> eax;
-	     #x86_temp{} -> temp_to_rm32(Dst);
-	     #x86_mem{} -> mem_to_rm32(Dst)
-	   end, Imm}
+    %% Since we're using an 8-bit instruction, the immediate is not sign
+    %% extended. Thus, we can use immediates up to 255.
+    #x86_imm{value=ImmVal}
+      when is_integer(ImmVal), ImmVal >= 0, ImmVal =< 255 ->
+      Imm = {imm8, ImmVal},
+      case Dst of
+	#x86_temp{reg=0} -> {al, Imm};
+	#x86_temp{} -> resolve_test_imm8_reg(Imm, Dst);
+	#x86_mem{} -> {mem_to_rm8(Dst), Imm}
       end;
+    #x86_imm{value=ImmVal} when is_integer(ImmVal), ImmVal >= 0 ->
+      {case Dst of
+	 #x86_temp{reg=0} -> eax;
+	 #x86_temp{} -> temp_to_rm32(Dst);
+	 #x86_mem{} -> mem_to_rm32(Dst)
+       end, {imm32, ImmVal}};
+    #x86_imm{} -> % Negative ImmVal; use word-sized instr, imm32
+      {_, ImmVal} = translate_imm(Src, Context, false),
+      {case Dst of
+	 #x86_temp{reg=0} -> ?EAX;
+	 #x86_temp{} -> temp_to_rmArch(Dst);
+	 #x86_mem{} -> mem_to_rmArch(Dst)
+       end, {imm32, ImmVal}};
     #x86_temp{} ->
       NewDst =
 	case Dst of
-- 
cgit v1.2.3