aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--bootstrap/lib/kernel/ebin/hipe_unified_loader.beambin12492 -> 12500 bytes
-rw-r--r--erts/configure.in17
-rw-r--r--erts/emulator/hipe/hipe_amd64.c117
-rw-r--r--erts/emulator/hipe/hipe_bif0.c6
-rw-r--r--lib/compiler/src/beam_type.erl8
-rw-r--r--lib/compiler/test/beam_type_SUITE.erl33
-rw-r--r--lib/hipe/x86/hipe_rtl_to_x86.erl24
-rw-r--r--lib/hipe/x86/hipe_x86_assemble.erl1
-rw-r--r--lib/kernel/src/hipe_unified_loader.erl1
9 files changed, 154 insertions, 53 deletions
diff --git a/bootstrap/lib/kernel/ebin/hipe_unified_loader.beam b/bootstrap/lib/kernel/ebin/hipe_unified_loader.beam
index 8ca0b915a2..3f5e59ec50 100644
--- a/bootstrap/lib/kernel/ebin/hipe_unified_loader.beam
+++ b/bootstrap/lib/kernel/ebin/hipe_unified_loader.beam
Binary files differ
diff --git a/erts/configure.in b/erts/configure.in
index f15bb56435..dd6c52b079 100644
--- a/erts/configure.in
+++ b/erts/configure.in
@@ -2749,23 +2749,6 @@ if test "$cross_compiling" != "yes" && test X${enable_hipe} != Xno; then
fi
fi
-dnl Check to disable -fPIE and friends for HiPE on amd64
-if test X${enable_hipe} = Xyes && test X$ARCH = Xamd64; then
- AC_TRY_COMPILE(, [#if defined(__pie__) || defined(__PIE__)
- #error -fPIE is enabled by default
- #endif],
- [AC_MSG_NOTICE([No -fPIE enabled by default])],
- [AC_MSG_WARN([Security feature -fPIE will be disabled for HiPE])
- STATIC_CFLAGS="-fno-PIE $STATIC_CFLAGS"
- saved_LDFLAGS=$LDFLAGS
- LDFLAGS="-no-pie $LDFLAGS"
- AC_TRY_LINK(,, [],
- [LDFLAGS="-fno-PIE $saved_LDFLAGS"
- AC_TRY_LINK(,, [],
- [AC_MSG_WARN([Linked does not accept option -no-pie nor -fno-PIE])
- LDFLAGS=$saved_LDFLAGS])])])
-fi
-
if test X${enable_hipe} = Xyes; then
case $OPSYS in
linux)
diff --git a/erts/emulator/hipe/hipe_amd64.c b/erts/emulator/hipe/hipe_amd64.c
index e3cff4a4ba..f23f341e6d 100644
--- a/erts/emulator/hipe/hipe_amd64.c
+++ b/erts/emulator/hipe/hipe_amd64.c
@@ -28,6 +28,7 @@
#include "error.h"
#include "bif.h"
#include "big.h" /* term_to_Sint() */
+#include "erl_binary.h"
#include "hipe_arch.h"
#include "hipe_bif0.h"
@@ -38,6 +39,8 @@
#undef ERL_FUN_SIZE
#include "hipe_literals.h"
+static void patch_trampoline(void *trampoline, void *destAddress);
+
const Uint sse2_fnegate_mask[2] = {0x8000000000000000,0};
void hipe_patch_load_fe(Uint64 *address, Uint64 value)
@@ -52,9 +55,9 @@ int hipe_patch_insn(void *address, Uint64 value, Eterm type)
switch (type) {
case am_closure:
case am_constant:
+ case am_c_const:
*(Uint64*)address = value;
break;
- case am_c_const:
case am_atom:
/* check that value fits in an unsigned imm32 */
/* XXX: are we sure it's not really a signed imm32? */
@@ -71,14 +74,18 @@ int hipe_patch_insn(void *address, Uint64 value, Eterm type)
int hipe_patch_call(void *callAddress, void *destAddress, void *trampoline)
{
- Sint rel32;
+ Sint64 destOffset = (Sint64)destAddress - (Sint64)callAddress - 4;
- ASSERT(trampoline == NULL);
+ if ((destOffset < -0x80000000L) || (destOffset >= 0x80000000L)) {
+ destOffset = (Sint64)trampoline - (Sint64)callAddress - 4;
- rel32 = (Sint)destAddress - (Sint)callAddress - 4;
- if ((Sint)(Sint32)rel32 != rel32)
- return -1;
- *(Uint32*)callAddress = (Uint32)rel32;
+ if ((destOffset < -0x80000000L) || (destOffset >= 0x80000000L))
+ return -1;
+
+ patch_trampoline(trampoline, destAddress);
+ }
+
+ *(Uint32*)callAddress = (Uint32)destOffset;
hipe_flush_icache_word(callAddress);
return 0;
}
@@ -96,12 +103,80 @@ static void *alloc_code(unsigned int alloc_bytes)
return erts_alloc(ERTS_ALC_T_HIPE_EXEC, alloc_bytes);
}
+static int check_callees(Eterm callees)
+{
+ Eterm *tuple;
+ Uint arity;
+ Uint i;
+
+ if (is_not_tuple(callees))
+ return -1;
+ tuple = tuple_val(callees);
+ arity = arityval(tuple[0]);
+ for (i = 1; i <= arity; ++i) {
+ Eterm mfa = tuple[i];
+ if (is_atom(mfa))
+ continue;
+ if (is_not_tuple(mfa) ||
+ tuple_val(mfa)[0] != make_arityval(3) ||
+ is_not_atom(tuple_val(mfa)[1]) ||
+ is_not_atom(tuple_val(mfa)[2]) ||
+ is_not_small(tuple_val(mfa)[3]) ||
+ unsigned_val(tuple_val(mfa)[3]) > 255)
+ return -1;
+ }
+ return arity;
+}
+
+#define TRAMPOLINE_BYTES 12
+
+static void generate_trampolines(unsigned char *address,
+ int nrcallees, Eterm callees,
+ unsigned char **trampvec)
+{
+ unsigned char *trampoline = address;
+ int i;
+
+ for(i = 0; i < nrcallees; ++i) {
+ trampoline[0] = 0x48; /* movabsq $..., %rax; */
+ trampoline[1] = 0xb8;
+ *(void**)(trampoline+2) = NULL; /* callee's address */
+ trampoline[10] = 0xff; /* jmpq *%rax */
+ trampoline[11] = 0xe0;
+ trampvec[i] = trampoline;
+ trampoline += TRAMPOLINE_BYTES;
+ }
+ hipe_flush_icache_range(address, nrcallees*TRAMPOLINE_BYTES);
+}
+
+static void patch_trampoline(void *trampoline, void *destAddress)
+{
+ unsigned char *tp = (unsigned char*) trampoline;
+
+ ASSERT(tp[0] == 0x48 && tp[1] == 0xb8);
+
+ *(void**)(tp+2) = destAddress; /* callee's address */
+ hipe_flush_icache_word(tp+2);
+}
+
void *hipe_alloc_code(Uint nrbytes, Eterm callees, Eterm *trampolines, Process *p)
{
- if (is_not_nil(callees))
+ int nrcallees;
+ Eterm trampvecbin;
+ unsigned char **trampvec;
+ unsigned char *address;
+
+ nrcallees = check_callees(callees);
+ if (nrcallees < 0)
return NULL;
- *trampolines = NIL;
- return alloc_code(nrbytes);
+
+ trampvecbin = new_binary(p, NULL, nrcallees*sizeof(unsigned char*));
+ trampvec = (unsigned char **)binary_bytes(trampvecbin);
+
+ address = alloc_code(nrbytes + nrcallees*TRAMPOLINE_BYTES);
+ generate_trampolines(address + nrbytes, nrcallees, callees, trampvec);
+ *trampolines = trampvecbin;
+ return address;
}
void hipe_free_code(void* code, unsigned int bytes)
@@ -129,10 +204,9 @@ void *hipe_make_native_stub(void *callee_exp, unsigned int beamArity)
*/
unsigned int codeSize;
unsigned char *code, *codep;
- unsigned int callEmuOffset;
- codeSize = /* 23, 26, 29, or 32 bytes */
- 23 + /* 23 when all offsets are 8-bit */
+ codeSize = /* 30, 33, 36, or 39 bytes */
+ 30 + /* 30 when all offsets are 8-bit */
(P_CALLEE_EXP >= 128 ? 3 : 0) +
((P_CALLEE_EXP + 4) >= 128 ? 3 : 0) +
(P_ARITY >= 128 ? 3 : 0);
@@ -197,14 +271,15 @@ void *hipe_make_native_stub(void *callee_exp, unsigned int beamArity)
codep[0] = beamArity;
codep += 1;
- /* jmp callemu; 5 bytes */
- callEmuOffset = (unsigned char*)nbif_callemu - (code + codeSize);
- codep[0] = 0xe9;
- codep[1] = callEmuOffset & 0xFF;
- codep[2] = (callEmuOffset >> 8) & 0xFF;
- codep[3] = (callEmuOffset >> 16) & 0xFF;
- codep[4] = (callEmuOffset >> 24) & 0xFF;
- codep += 5;
+ /* jmp callemu; 12 bytes */
+ codep[0] = 0x48;
+ codep[1] = 0xb8;
+ codep += 2;
+ *(Uint64*)codep = (Uint64)nbif_callemu;
+ codep += 8;
+ codep[0] = 0xff;
+ codep[1] = 0xe0;
+ codep += 2;
ASSERT(codep == code + codeSize);
diff --git a/erts/emulator/hipe/hipe_bif0.c b/erts/emulator/hipe/hipe_bif0.c
index e477c4cdea..a8be64e08d 100644
--- a/erts/emulator/hipe/hipe_bif0.c
+++ b/erts/emulator/hipe/hipe_bif0.c
@@ -1112,7 +1112,7 @@ static struct hipe_mfa_info* mod2mfa_put(struct hipe_mfa_info* mfa)
struct hipe_ref {
struct hipe_ref_head head; /* list of refs to same calleee */
void *address;
-#if defined(__arm__) || defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__)
+#if defined(__x86_64__) || defined(__arm__) || defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__)
void *trampoline;
#endif
unsigned int flags;
@@ -1543,7 +1543,7 @@ BIF_RETTYPE hipe_bifs_add_ref_2(BIF_ALIST_2)
ref = erts_alloc(ERTS_ALC_T_HIPE_LL, sizeof(struct hipe_ref));
ref->address = address;
-#if defined(__arm__) || defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__)
+#if defined(__x86_64__) || defined(__arm__) || defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__)
ref->trampoline = trampoline;
#endif
ref->flags = flags;
@@ -1819,7 +1819,7 @@ void hipe_redirect_to_module(Module* modp)
if (ref->flags & REF_FLAG_IS_LOAD_MFA)
res = hipe_patch_insn(ref->address, (Uint)p->remote_address, am_load_mfa);
else {
-#if defined(__arm__) || defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__)
+#if defined(__x86_64__) || defined(__arm__) || defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__)
void* trampoline = ref->trampoline;
#else
void* trampoline = NULL;
diff --git a/lib/compiler/src/beam_type.erl b/lib/compiler/src/beam_type.erl
index b2fabed2c5..a2e3af37d0 100644
--- a/lib/compiler/src/beam_type.erl
+++ b/lib/compiler/src/beam_type.erl
@@ -943,10 +943,10 @@ merge_type_info({tuple,SzKind1,Sz1,[]}, {tuple,_SzKind2,_Sz2,First}=Tuple2) ->
merge_type_info({tuple,SzKind1,Sz1,First}, Tuple2);
merge_type_info({tuple,_SzKind1,_Sz1,First}=Tuple1, {tuple,SzKind2,Sz2,_}) ->
merge_type_info(Tuple1, {tuple,SzKind2,Sz2,First});
-merge_type_info(integer, {integer,_}=Int) ->
- Int;
-merge_type_info({integer,_}=Int, integer) ->
- Int;
+merge_type_info(integer, {integer,_}) ->
+ integer;
+merge_type_info({integer,_}, integer) ->
+ integer;
merge_type_info({integer,{Min1,Max1}}, {integer,{Min2,Max2}}) ->
{integer,{max(Min1, Min2),min(Max1, Max2)}};
merge_type_info({binary,U1}, {binary,U2}) ->
diff --git a/lib/compiler/test/beam_type_SUITE.erl b/lib/compiler/test/beam_type_SUITE.erl
index dfbf2aa4a0..e33df809ff 100644
--- a/lib/compiler/test/beam_type_SUITE.erl
+++ b/lib/compiler/test/beam_type_SUITE.erl
@@ -67,6 +67,15 @@ integers(_Config) ->
college = do_integers_3(),
+ zero = do_integers_4(<<0:1>>, 0),
+ one = do_integers_4(<<1:1>>, 0),
+ other = do_integers_4(<<1:1>>, 2),
+
+ zero = do_integers_5(0, 0),
+ one = do_integers_5(0, 1),
+ two = do_integers_5(0, 2),
+ three = do_integers_5(0, 3),
+
ok.
do_integers_1(B0) ->
@@ -89,6 +98,30 @@ do_integers_3() ->
1 -> 0
end.
+do_integers_4(<<X:1,T/bits>>, C) ->
+ %% Binary matching gives the range 0-1 for X.
+ %% The range for `X bor C` is unknown. It must not be inherited
+ %% from X. (`X bor C` will reuse the register used for X.)
+ case X bor C of
+ 0 -> do_integers_4(T, C, zero);
+ 1 -> do_integers_4(T, C, one);
+ _ -> do_integers_4(T, C, other)
+ end.
+
+do_integers_4(_, _, Res) ->
+ Res.
+
+do_integers_5(X0, Y0) ->
+ %% X and Y will use the same register.
+ X = X0 band 1,
+ Y = Y0 band 3,
+ case Y of
+ 0 -> zero;
+ 1 -> one;
+ 2 -> two;
+ 3 -> three
+ end.
+
coverage(_Config) ->
{'EXIT',{badarith,_}} = (catch id(1) bsl 0.5),
{'EXIT',{badarith,_}} = (catch id(2.0) bsl 2),
diff --git a/lib/hipe/x86/hipe_rtl_to_x86.erl b/lib/hipe/x86/hipe_rtl_to_x86.erl
index 31e4f6e4ac..22947da148 100644
--- a/lib/hipe/x86/hipe_rtl_to_x86.erl
+++ b/lib/hipe/x86/hipe_rtl_to_x86.erl
@@ -646,7 +646,7 @@ conv_imm(Opnd, Map) ->
is_imm64(Value) when is_integer(Value) ->
(Value < -(1 bsl (32 - 1))) or (Value > (1 bsl (32 - 1)) - 1);
is_imm64({_,atom}) -> false; % Atoms are 32 bits.
-is_imm64({_,c_const}) -> false; % c_consts are 32 bits.
+is_imm64({_,c_const}) -> true; % c_consts are 64 bits.
is_imm64({_,_}) -> true . % Other relocs are 64 bits.
-else.
conv_imm(Opnd, Map) ->
@@ -777,6 +777,18 @@ conv_fconv(Dst, Src) ->
%%% Finalise the conversion of a 2-address FP operation.
+-ifdef(HIPE_AMD64).
+conv_fp_unary(Dst, Src, 'fchs') ->
+ Tmp = new_untagged_temp(),
+ case same_opnd(Dst, Src) of
+ true ->
+ [];
+ _ ->
+ [hipe_x86:mk_fmove(Src, Dst)]
+ end ++
+ mk_load_address(c_const, hipe_x86:mk_imm({sse2_fnegate_mask, c_const}), Tmp) ++
+ [hipe_x86:mk_fp_binop('xorpd', hipe_x86:mk_mem(Tmp, hipe_x86:mk_imm(0), double), Dst)].
+-else.
conv_fp_unary(Dst, Src, FpUnOp) ->
case same_opnd(Dst, Src) of
true ->
@@ -785,6 +797,7 @@ conv_fp_unary(Dst, Src, FpUnOp) ->
[hipe_x86:mk_fmove(Src, Dst),
hipe_x86:mk_fp_unop(FpUnOp, Dst)]
end.
+-endif.
conv_fp_unop(RtlFpUnOp) ->
case RtlFpUnOp of
@@ -854,13 +867,8 @@ mk_jmp_switch(Index, JTabLab, Labels) ->
%%% Finalise the translation of a load_address instruction.
-ifdef(HIPE_AMD64).
-mk_load_address(Type, Src, Dst) ->
- case Type of
- c_const -> % 32 bits
- [hipe_x86:mk_move(Src, Dst)];
- _ ->
- [hipe_x86:mk_move64(Src, Dst)]
- end.
+mk_load_address(_Type, Src, Dst) ->
+ [hipe_x86:mk_move64(Src, Dst)].
-else.
mk_load_address(_Type, Src, Dst) ->
[hipe_x86:mk_move(Src, Dst)].
diff --git a/lib/hipe/x86/hipe_x86_assemble.erl b/lib/hipe/x86/hipe_x86_assemble.erl
index 50919bdf4e..9d2586a14d 100644
--- a/lib/hipe/x86/hipe_x86_assemble.erl
+++ b/lib/hipe/x86/hipe_x86_assemble.erl
@@ -735,6 +735,7 @@ resolve_sse2_op(Op) ->
fdiv -> divsd;
fmul -> mulsd;
fsub -> subsd;
+ xorpd -> xorpd;
_ -> exit({?MODULE, unknown_sse2_operator, Op})
end.
diff --git a/lib/kernel/src/hipe_unified_loader.erl b/lib/kernel/src/hipe_unified_loader.erl
index f8199fcf71..fd06f0f7d8 100644
--- a/lib/kernel/src/hipe_unified_loader.erl
+++ b/lib/kernel/src/hipe_unified_loader.erl
@@ -275,6 +275,7 @@ needs_trampolines(Architecture) ->
arm -> true;
powerpc -> true;
ppc64 -> true;
+ amd64 -> true;
_ -> false
end.