aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorbhuztez <[email protected]>2017-12-31 14:07:49 +0800
committerSverker Eriksson <[email protected]>2018-01-29 16:39:21 +0100
commit38a99af36f044459db40b76be2cc72c638eb6d98 (patch)
treee8c656436ed0d934aab93daa5c3bc8ef33a05cd4
parentb73e87f5de895860337c0d30473edd1e59667baa (diff)
downloadotp-38a99af36f044459db40b76be2cc72c638eb6d98.tar.gz
otp-38a99af36f044459db40b76be2cc72c638eb6d98.tar.bz2
otp-38a99af36f044459db40b76be2cc72c638eb6d98.zip
make HiPE work on x86_64 when PIE is enabled
Currently HiPE amd64 assumes the runtime system code is loaded into the low 2G of the address space. However, this is not the case when PIE is enabled, it is loaded into a random location. So trampolines are required to call BIFs, and also we have first to load the address of sse2_fnegate_mask to a regisiter before xorpd in fchs.
-rw-r--r--erts/configure.in17
-rw-r--r--erts/emulator/hipe/hipe_amd64.c106
-rw-r--r--erts/emulator/hipe/hipe_bif0.c6
-rw-r--r--lib/hipe/x86/hipe_rtl_to_x86.erl24
-rw-r--r--lib/hipe/x86/hipe_x86_assemble.erl1
-rw-r--r--lib/kernel/src/hipe_unified_loader.erl1
6 files changed, 106 insertions, 49 deletions
diff --git a/erts/configure.in b/erts/configure.in
index f15bb56435..dd6c52b079 100644
--- a/erts/configure.in
+++ b/erts/configure.in
@@ -2749,23 +2749,6 @@ if test "$cross_compiling" != "yes" && test X${enable_hipe} != Xno; then
fi
fi
-dnl Check to disable -fPIE and friends for HiPE on amd64
-if test X${enable_hipe} = Xyes && test X$ARCH = Xamd64; then
- AC_TRY_COMPILE(, [#if defined(__pie__) || defined(__PIE__)
- #error -fPIE is enabled by default
- #endif],
- [AC_MSG_NOTICE([No -fPIE enabled by default])],
- [AC_MSG_WARN([Security feature -fPIE will be disabled for HiPE])
- STATIC_CFLAGS="-fno-PIE $STATIC_CFLAGS"
- saved_LDFLAGS=$LDFLAGS
- LDFLAGS="-no-pie $LDFLAGS"
- AC_TRY_LINK(,, [],
- [LDFLAGS="-fno-PIE $saved_LDFLAGS"
- AC_TRY_LINK(,, [],
- [AC_MSG_WARN([Linked does not accept option -no-pie nor -fno-PIE])
- LDFLAGS=$saved_LDFLAGS])])])
-fi
-
if test X${enable_hipe} = Xyes; then
case $OPSYS in
linux)
diff --git a/erts/emulator/hipe/hipe_amd64.c b/erts/emulator/hipe/hipe_amd64.c
index e3cff4a4ba..bdeae23c7c 100644
--- a/erts/emulator/hipe/hipe_amd64.c
+++ b/erts/emulator/hipe/hipe_amd64.c
@@ -28,6 +28,7 @@
#include "error.h"
#include "bif.h"
#include "big.h" /* term_to_Sint() */
+#include "erl_binary.h"
#include "hipe_arch.h"
#include "hipe_bif0.h"
@@ -52,9 +53,9 @@ int hipe_patch_insn(void *address, Uint64 value, Eterm type)
switch (type) {
case am_closure:
case am_constant:
+ case am_c_const:
*(Uint64*)address = value;
break;
- case am_c_const:
case am_atom:
/* check that value fits in an unsigned imm32 */
/* XXX: are we sure it's not really a signed imm32? */
@@ -71,14 +72,19 @@ int hipe_patch_insn(void *address, Uint64 value, Eterm type)
int hipe_patch_call(void *callAddress, void *destAddress, void *trampoline)
{
- Sint rel32;
+ Sint64 destOffset = (Sint64)destAddress - (Sint64)callAddress - 4;
- ASSERT(trampoline == NULL);
+ if ((destOffset < -0x80000000L) || (destOffset >= 0x80000000L)) {
+ destOffset = ((Sint64)trampoline - 2) - (Sint64)callAddress - 4;
- rel32 = (Sint)destAddress - (Sint)callAddress - 4;
- if ((Sint)(Sint32)rel32 != rel32)
- return -1;
- *(Uint32*)callAddress = (Uint32)rel32;
+ if ((destOffset < -0x80000000L) || (destOffset >= 0x80000000L))
+ return -1;
+
+ *(void**)trampoline = destAddress;
+ hipe_flush_icache_word(trampoline);
+ }
+
+ *(Uint32*)callAddress = (Uint32)destOffset;
hipe_flush_icache_word(callAddress);
return 0;
}
@@ -96,12 +102,70 @@ static void *alloc_code(unsigned int alloc_bytes)
return erts_alloc(ERTS_ALC_T_HIPE_EXEC, alloc_bytes);
}
+static int check_callees(Eterm callees)
+{
+ Eterm *tuple;
+ Uint arity;
+ Uint i;
+
+ if (is_not_tuple(callees))
+ return -1;
+ tuple = tuple_val(callees);
+ arity = arityval(tuple[0]);
+ for (i = 1; i <= arity; ++i) {
+ Eterm mfa = tuple[i];
+ if (is_atom(mfa))
+ continue;
+ if (is_not_tuple(mfa) ||
+ tuple_val(mfa)[0] != make_arityval(3) ||
+ is_not_atom(tuple_val(mfa)[1]) ||
+ is_not_atom(tuple_val(mfa)[2]) ||
+ is_not_small(tuple_val(mfa)[3]) ||
+ unsigned_val(tuple_val(mfa)[3]) > 255)
+ return -1;
+ }
+ return arity;
+}
+
+#define TRAMPOLINE_BYTES 12
+
+static void generate_trampolines(unsigned char *address,
+ int nrcallees, Eterm callees,
+ unsigned char **trampvec)
+{
+ unsigned char *trampoline = address;
+ int i;
+
+ for(i = 0; i < nrcallees; ++i) {
+ trampoline[0] = 0x48; /* movabsq $..., %rax; */
+ trampoline[1] = 0xb8;
+ *((Uint64*)(trampoline+2)) = 0; /* callee's address */
+ trampoline[10] = 0xff; /* jmpq *%rax */
+ trampoline[11] = 0xe0;
+ trampvec[i] = trampoline+2;
+ trampoline += TRAMPOLINE_BYTES;
+ }
+ hipe_flush_icache_range(address, nrcallees*TRAMPOLINE_BYTES);
+}
+
void *hipe_alloc_code(Uint nrbytes, Eterm callees, Eterm *trampolines, Process *p)
{
- if (is_not_nil(callees))
+ int nrcallees;
+ Eterm trampvecbin;
+ unsigned char **trampvec;
+ unsigned char *address;
+
+ nrcallees = check_callees(callees);
+ if (nrcallees < 0)
return NULL;
- *trampolines = NIL;
- return alloc_code(nrbytes);
+
+ trampvecbin = new_binary(p, NULL, nrcallees*sizeof(unsigned char*));
+ trampvec = (unsigned char **)binary_bytes(trampvecbin);
+
+ address = alloc_code(nrbytes + nrcallees*TRAMPOLINE_BYTES);
+ generate_trampolines(address + nrbytes, nrcallees, callees, trampvec);
+ *trampolines = trampvecbin;
+ return address;
}
void hipe_free_code(void* code, unsigned int bytes)
@@ -129,10 +193,9 @@ void *hipe_make_native_stub(void *callee_exp, unsigned int beamArity)
*/
unsigned int codeSize;
unsigned char *code, *codep;
- unsigned int callEmuOffset;
- codeSize = /* 23, 26, 29, or 32 bytes */
- 23 + /* 23 when all offsets are 8-bit */
+ codeSize = /* 30, 33, 36, or 39 bytes */
+ 30 + /* 30 when all offsets are 8-bit */
(P_CALLEE_EXP >= 128 ? 3 : 0) +
((P_CALLEE_EXP + 4) >= 128 ? 3 : 0) +
(P_ARITY >= 128 ? 3 : 0);
@@ -197,14 +260,15 @@ void *hipe_make_native_stub(void *callee_exp, unsigned int beamArity)
codep[0] = beamArity;
codep += 1;
- /* jmp callemu; 5 bytes */
- callEmuOffset = (unsigned char*)nbif_callemu - (code + codeSize);
- codep[0] = 0xe9;
- codep[1] = callEmuOffset & 0xFF;
- codep[2] = (callEmuOffset >> 8) & 0xFF;
- codep[3] = (callEmuOffset >> 16) & 0xFF;
- codep[4] = (callEmuOffset >> 24) & 0xFF;
- codep += 5;
+ /* jmp callemu; 12 bytes */
+ codep[0] = 0x48;
+ codep[1] = 0xb8;
+ codep += 2;
+ *(Uint64*)codep = (Uint64)nbif_callemu;
+ codep += 8;
+ codep[0] = 0xff;
+ codep[1] = 0xe0;
+ codep += 2;
ASSERT(codep == code + codeSize);
diff --git a/erts/emulator/hipe/hipe_bif0.c b/erts/emulator/hipe/hipe_bif0.c
index e477c4cdea..a8be64e08d 100644
--- a/erts/emulator/hipe/hipe_bif0.c
+++ b/erts/emulator/hipe/hipe_bif0.c
@@ -1112,7 +1112,7 @@ static struct hipe_mfa_info* mod2mfa_put(struct hipe_mfa_info* mfa)
struct hipe_ref {
struct hipe_ref_head head; /* list of refs to same calleee */
void *address;
-#if defined(__arm__) || defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__)
+#if defined(__x86_64__) || defined(__arm__) || defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__)
void *trampoline;
#endif
unsigned int flags;
@@ -1543,7 +1543,7 @@ BIF_RETTYPE hipe_bifs_add_ref_2(BIF_ALIST_2)
ref = erts_alloc(ERTS_ALC_T_HIPE_LL, sizeof(struct hipe_ref));
ref->address = address;
-#if defined(__arm__) || defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__)
+#if defined(__x86_64__) || defined(__arm__) || defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__)
ref->trampoline = trampoline;
#endif
ref->flags = flags;
@@ -1819,7 +1819,7 @@ void hipe_redirect_to_module(Module* modp)
if (ref->flags & REF_FLAG_IS_LOAD_MFA)
res = hipe_patch_insn(ref->address, (Uint)p->remote_address, am_load_mfa);
else {
-#if defined(__arm__) || defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__)
+#if defined(__x86_64__) || defined(__arm__) || defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__)
void* trampoline = ref->trampoline;
#else
void* trampoline = NULL;
diff --git a/lib/hipe/x86/hipe_rtl_to_x86.erl b/lib/hipe/x86/hipe_rtl_to_x86.erl
index 31e4f6e4ac..22947da148 100644
--- a/lib/hipe/x86/hipe_rtl_to_x86.erl
+++ b/lib/hipe/x86/hipe_rtl_to_x86.erl
@@ -646,7 +646,7 @@ conv_imm(Opnd, Map) ->
is_imm64(Value) when is_integer(Value) ->
(Value < -(1 bsl (32 - 1))) or (Value > (1 bsl (32 - 1)) - 1);
is_imm64({_,atom}) -> false; % Atoms are 32 bits.
-is_imm64({_,c_const}) -> false; % c_consts are 32 bits.
+is_imm64({_,c_const}) -> true; % c_consts are 64 bits.
is_imm64({_,_}) -> true . % Other relocs are 64 bits.
-else.
conv_imm(Opnd, Map) ->
@@ -777,6 +777,18 @@ conv_fconv(Dst, Src) ->
%%% Finalise the conversion of a 2-address FP operation.
+-ifdef(HIPE_AMD64).
+conv_fp_unary(Dst, Src, 'fchs') ->
+ Tmp = new_untagged_temp(),
+ case same_opnd(Dst, Src) of
+ true ->
+ [];
+ _ ->
+ [hipe_x86:mk_fmove(Src, Dst)]
+ end ++
+ mk_load_address(c_const, hipe_x86:mk_imm({sse2_fnegate_mask, c_const}), Tmp) ++
+ [hipe_x86:mk_fp_binop('xorpd', hipe_x86:mk_mem(Tmp, hipe_x86:mk_imm(0), double), Dst)].
+-else.
conv_fp_unary(Dst, Src, FpUnOp) ->
case same_opnd(Dst, Src) of
true ->
@@ -785,6 +797,7 @@ conv_fp_unary(Dst, Src, FpUnOp) ->
[hipe_x86:mk_fmove(Src, Dst),
hipe_x86:mk_fp_unop(FpUnOp, Dst)]
end.
+-endif.
conv_fp_unop(RtlFpUnOp) ->
case RtlFpUnOp of
@@ -854,13 +867,8 @@ mk_jmp_switch(Index, JTabLab, Labels) ->
%%% Finalise the translation of a load_address instruction.
-ifdef(HIPE_AMD64).
-mk_load_address(Type, Src, Dst) ->
- case Type of
- c_const -> % 32 bits
- [hipe_x86:mk_move(Src, Dst)];
- _ ->
- [hipe_x86:mk_move64(Src, Dst)]
- end.
+mk_load_address(_Type, Src, Dst) ->
+ [hipe_x86:mk_move64(Src, Dst)].
-else.
mk_load_address(_Type, Src, Dst) ->
[hipe_x86:mk_move(Src, Dst)].
diff --git a/lib/hipe/x86/hipe_x86_assemble.erl b/lib/hipe/x86/hipe_x86_assemble.erl
index 50919bdf4e..9d2586a14d 100644
--- a/lib/hipe/x86/hipe_x86_assemble.erl
+++ b/lib/hipe/x86/hipe_x86_assemble.erl
@@ -735,6 +735,7 @@ resolve_sse2_op(Op) ->
fdiv -> divsd;
fmul -> mulsd;
fsub -> subsd;
+ xorpd -> xorpd;
_ -> exit({?MODULE, unknown_sse2_operator, Op})
end.
diff --git a/lib/kernel/src/hipe_unified_loader.erl b/lib/kernel/src/hipe_unified_loader.erl
index f8199fcf71..fd06f0f7d8 100644
--- a/lib/kernel/src/hipe_unified_loader.erl
+++ b/lib/kernel/src/hipe_unified_loader.erl
@@ -275,6 +275,7 @@ needs_trampolines(Architecture) ->
arm -> true;
powerpc -> true;
ppc64 -> true;
+ amd64 -> true;
_ -> false
end.