From 38a99af36f044459db40b76be2cc72c638eb6d98 Mon Sep 17 00:00:00 2001 From: bhuztez Date: Sun, 31 Dec 2017 14:07:49 +0800 Subject: make HiPE work on x86_64 when PIE is enabled Currently HiPE amd64 assumes the runtime system code is loaded into the low 2G of the address space. However, this is not the case when PIE is enabled, it is loaded into a random location. So trampolines are required to call BIFs, and also we have first to load the address of sse2_fnegate_mask to a regisiter before xorpd in fchs. --- erts/configure.in | 17 ------ erts/emulator/hipe/hipe_amd64.c | 106 ++++++++++++++++++++++++++------- erts/emulator/hipe/hipe_bif0.c | 6 +- lib/hipe/x86/hipe_rtl_to_x86.erl | 24 +++++--- lib/hipe/x86/hipe_x86_assemble.erl | 1 + lib/kernel/src/hipe_unified_loader.erl | 1 + 6 files changed, 106 insertions(+), 49 deletions(-) diff --git a/erts/configure.in b/erts/configure.in index f15bb56435..dd6c52b079 100644 --- a/erts/configure.in +++ b/erts/configure.in @@ -2749,23 +2749,6 @@ if test "$cross_compiling" != "yes" && test X${enable_hipe} != Xno; then fi fi -dnl Check to disable -fPIE and friends for HiPE on amd64 -if test X${enable_hipe} = Xyes && test X$ARCH = Xamd64; then - AC_TRY_COMPILE(, [#if defined(__pie__) || defined(__PIE__) - #error -fPIE is enabled by default - #endif], - [AC_MSG_NOTICE([No -fPIE enabled by default])], - [AC_MSG_WARN([Security feature -fPIE will be disabled for HiPE]) - STATIC_CFLAGS="-fno-PIE $STATIC_CFLAGS" - saved_LDFLAGS=$LDFLAGS - LDFLAGS="-no-pie $LDFLAGS" - AC_TRY_LINK(,, [], - [LDFLAGS="-fno-PIE $saved_LDFLAGS" - AC_TRY_LINK(,, [], - [AC_MSG_WARN([Linked does not accept option -no-pie nor -fno-PIE]) - LDFLAGS=$saved_LDFLAGS])])]) -fi - if test X${enable_hipe} = Xyes; then case $OPSYS in linux) diff --git a/erts/emulator/hipe/hipe_amd64.c b/erts/emulator/hipe/hipe_amd64.c index e3cff4a4ba..bdeae23c7c 100644 --- a/erts/emulator/hipe/hipe_amd64.c +++ b/erts/emulator/hipe/hipe_amd64.c @@ -28,6 +28,7 @@ #include "error.h" #include "bif.h" #include "big.h" /* term_to_Sint() */ +#include "erl_binary.h" #include "hipe_arch.h" #include "hipe_bif0.h" @@ -52,9 +53,9 @@ int hipe_patch_insn(void *address, Uint64 value, Eterm type) switch (type) { case am_closure: case am_constant: + case am_c_const: *(Uint64*)address = value; break; - case am_c_const: case am_atom: /* check that value fits in an unsigned imm32 */ /* XXX: are we sure it's not really a signed imm32? */ @@ -71,14 +72,19 @@ int hipe_patch_insn(void *address, Uint64 value, Eterm type) int hipe_patch_call(void *callAddress, void *destAddress, void *trampoline) { - Sint rel32; + Sint64 destOffset = (Sint64)destAddress - (Sint64)callAddress - 4; - ASSERT(trampoline == NULL); + if ((destOffset < -0x80000000L) || (destOffset >= 0x80000000L)) { + destOffset = ((Sint64)trampoline - 2) - (Sint64)callAddress - 4; - rel32 = (Sint)destAddress - (Sint)callAddress - 4; - if ((Sint)(Sint32)rel32 != rel32) - return -1; - *(Uint32*)callAddress = (Uint32)rel32; + if ((destOffset < -0x80000000L) || (destOffset >= 0x80000000L)) + return -1; + + *(void**)trampoline = destAddress; + hipe_flush_icache_word(trampoline); + } + + *(Uint32*)callAddress = (Uint32)destOffset; hipe_flush_icache_word(callAddress); return 0; } @@ -96,12 +102,70 @@ static void *alloc_code(unsigned int alloc_bytes) return erts_alloc(ERTS_ALC_T_HIPE_EXEC, alloc_bytes); } +static int check_callees(Eterm callees) +{ + Eterm *tuple; + Uint arity; + Uint i; + + if (is_not_tuple(callees)) + return -1; + tuple = tuple_val(callees); + arity = arityval(tuple[0]); + for (i = 1; i <= arity; ++i) { + Eterm mfa = tuple[i]; + if (is_atom(mfa)) + continue; + if (is_not_tuple(mfa) || + tuple_val(mfa)[0] != make_arityval(3) || + is_not_atom(tuple_val(mfa)[1]) || + is_not_atom(tuple_val(mfa)[2]) || + is_not_small(tuple_val(mfa)[3]) || + unsigned_val(tuple_val(mfa)[3]) > 255) + return -1; + } + return arity; +} + +#define TRAMPOLINE_BYTES 12 + +static void generate_trampolines(unsigned char *address, + int nrcallees, Eterm callees, + unsigned char **trampvec) +{ + unsigned char *trampoline = address; + int i; + + for(i = 0; i < nrcallees; ++i) { + trampoline[0] = 0x48; /* movabsq $..., %rax; */ + trampoline[1] = 0xb8; + *((Uint64*)(trampoline+2)) = 0; /* callee's address */ + trampoline[10] = 0xff; /* jmpq *%rax */ + trampoline[11] = 0xe0; + trampvec[i] = trampoline+2; + trampoline += TRAMPOLINE_BYTES; + } + hipe_flush_icache_range(address, nrcallees*TRAMPOLINE_BYTES); +} + void *hipe_alloc_code(Uint nrbytes, Eterm callees, Eterm *trampolines, Process *p) { - if (is_not_nil(callees)) + int nrcallees; + Eterm trampvecbin; + unsigned char **trampvec; + unsigned char *address; + + nrcallees = check_callees(callees); + if (nrcallees < 0) return NULL; - *trampolines = NIL; - return alloc_code(nrbytes); + + trampvecbin = new_binary(p, NULL, nrcallees*sizeof(unsigned char*)); + trampvec = (unsigned char **)binary_bytes(trampvecbin); + + address = alloc_code(nrbytes + nrcallees*TRAMPOLINE_BYTES); + generate_trampolines(address + nrbytes, nrcallees, callees, trampvec); + *trampolines = trampvecbin; + return address; } void hipe_free_code(void* code, unsigned int bytes) @@ -129,10 +193,9 @@ void *hipe_make_native_stub(void *callee_exp, unsigned int beamArity) */ unsigned int codeSize; unsigned char *code, *codep; - unsigned int callEmuOffset; - codeSize = /* 23, 26, 29, or 32 bytes */ - 23 + /* 23 when all offsets are 8-bit */ + codeSize = /* 30, 33, 36, or 39 bytes */ + 30 + /* 30 when all offsets are 8-bit */ (P_CALLEE_EXP >= 128 ? 3 : 0) + ((P_CALLEE_EXP + 4) >= 128 ? 3 : 0) + (P_ARITY >= 128 ? 3 : 0); @@ -197,14 +260,15 @@ void *hipe_make_native_stub(void *callee_exp, unsigned int beamArity) codep[0] = beamArity; codep += 1; - /* jmp callemu; 5 bytes */ - callEmuOffset = (unsigned char*)nbif_callemu - (code + codeSize); - codep[0] = 0xe9; - codep[1] = callEmuOffset & 0xFF; - codep[2] = (callEmuOffset >> 8) & 0xFF; - codep[3] = (callEmuOffset >> 16) & 0xFF; - codep[4] = (callEmuOffset >> 24) & 0xFF; - codep += 5; + /* jmp callemu; 12 bytes */ + codep[0] = 0x48; + codep[1] = 0xb8; + codep += 2; + *(Uint64*)codep = (Uint64)nbif_callemu; + codep += 8; + codep[0] = 0xff; + codep[1] = 0xe0; + codep += 2; ASSERT(codep == code + codeSize); diff --git a/erts/emulator/hipe/hipe_bif0.c b/erts/emulator/hipe/hipe_bif0.c index e477c4cdea..a8be64e08d 100644 --- a/erts/emulator/hipe/hipe_bif0.c +++ b/erts/emulator/hipe/hipe_bif0.c @@ -1112,7 +1112,7 @@ static struct hipe_mfa_info* mod2mfa_put(struct hipe_mfa_info* mfa) struct hipe_ref { struct hipe_ref_head head; /* list of refs to same calleee */ void *address; -#if defined(__arm__) || defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__) +#if defined(__x86_64__) || defined(__arm__) || defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__) void *trampoline; #endif unsigned int flags; @@ -1543,7 +1543,7 @@ BIF_RETTYPE hipe_bifs_add_ref_2(BIF_ALIST_2) ref = erts_alloc(ERTS_ALC_T_HIPE_LL, sizeof(struct hipe_ref)); ref->address = address; -#if defined(__arm__) || defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__) +#if defined(__x86_64__) || defined(__arm__) || defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__) ref->trampoline = trampoline; #endif ref->flags = flags; @@ -1819,7 +1819,7 @@ void hipe_redirect_to_module(Module* modp) if (ref->flags & REF_FLAG_IS_LOAD_MFA) res = hipe_patch_insn(ref->address, (Uint)p->remote_address, am_load_mfa); else { -#if defined(__arm__) || defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__) +#if defined(__x86_64__) || defined(__arm__) || defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__) void* trampoline = ref->trampoline; #else void* trampoline = NULL; diff --git a/lib/hipe/x86/hipe_rtl_to_x86.erl b/lib/hipe/x86/hipe_rtl_to_x86.erl index 31e4f6e4ac..22947da148 100644 --- a/lib/hipe/x86/hipe_rtl_to_x86.erl +++ b/lib/hipe/x86/hipe_rtl_to_x86.erl @@ -646,7 +646,7 @@ conv_imm(Opnd, Map) -> is_imm64(Value) when is_integer(Value) -> (Value < -(1 bsl (32 - 1))) or (Value > (1 bsl (32 - 1)) - 1); is_imm64({_,atom}) -> false; % Atoms are 32 bits. -is_imm64({_,c_const}) -> false; % c_consts are 32 bits. +is_imm64({_,c_const}) -> true; % c_consts are 64 bits. is_imm64({_,_}) -> true . % Other relocs are 64 bits. -else. conv_imm(Opnd, Map) -> @@ -777,6 +777,18 @@ conv_fconv(Dst, Src) -> %%% Finalise the conversion of a 2-address FP operation. +-ifdef(HIPE_AMD64). +conv_fp_unary(Dst, Src, 'fchs') -> + Tmp = new_untagged_temp(), + case same_opnd(Dst, Src) of + true -> + []; + _ -> + [hipe_x86:mk_fmove(Src, Dst)] + end ++ + mk_load_address(c_const, hipe_x86:mk_imm({sse2_fnegate_mask, c_const}), Tmp) ++ + [hipe_x86:mk_fp_binop('xorpd', hipe_x86:mk_mem(Tmp, hipe_x86:mk_imm(0), double), Dst)]. +-else. conv_fp_unary(Dst, Src, FpUnOp) -> case same_opnd(Dst, Src) of true -> @@ -785,6 +797,7 @@ conv_fp_unary(Dst, Src, FpUnOp) -> [hipe_x86:mk_fmove(Src, Dst), hipe_x86:mk_fp_unop(FpUnOp, Dst)] end. +-endif. conv_fp_unop(RtlFpUnOp) -> case RtlFpUnOp of @@ -854,13 +867,8 @@ mk_jmp_switch(Index, JTabLab, Labels) -> %%% Finalise the translation of a load_address instruction. -ifdef(HIPE_AMD64). -mk_load_address(Type, Src, Dst) -> - case Type of - c_const -> % 32 bits - [hipe_x86:mk_move(Src, Dst)]; - _ -> - [hipe_x86:mk_move64(Src, Dst)] - end. +mk_load_address(_Type, Src, Dst) -> + [hipe_x86:mk_move64(Src, Dst)]. -else. mk_load_address(_Type, Src, Dst) -> [hipe_x86:mk_move(Src, Dst)]. diff --git a/lib/hipe/x86/hipe_x86_assemble.erl b/lib/hipe/x86/hipe_x86_assemble.erl index 50919bdf4e..9d2586a14d 100644 --- a/lib/hipe/x86/hipe_x86_assemble.erl +++ b/lib/hipe/x86/hipe_x86_assemble.erl @@ -735,6 +735,7 @@ resolve_sse2_op(Op) -> fdiv -> divsd; fmul -> mulsd; fsub -> subsd; + xorpd -> xorpd; _ -> exit({?MODULE, unknown_sse2_operator, Op}) end. diff --git a/lib/kernel/src/hipe_unified_loader.erl b/lib/kernel/src/hipe_unified_loader.erl index f8199fcf71..fd06f0f7d8 100644 --- a/lib/kernel/src/hipe_unified_loader.erl +++ b/lib/kernel/src/hipe_unified_loader.erl @@ -275,6 +275,7 @@ needs_trampolines(Architecture) -> arm -> true; powerpc -> true; ppc64 -> true; + amd64 -> true; _ -> false end. -- cgit v1.2.3 From 84db627b0d7bbb581da1ddc192e5c85155a76a1f Mon Sep 17 00:00:00 2001 From: Sverker Eriksson Date: Mon, 29 Jan 2018 16:45:07 +0100 Subject: Update primary bootstrap for hipe_unified_loader --- bootstrap/lib/kernel/ebin/hipe_unified_loader.beam | Bin 12492 -> 12500 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/bootstrap/lib/kernel/ebin/hipe_unified_loader.beam b/bootstrap/lib/kernel/ebin/hipe_unified_loader.beam index 8ca0b915a2..3f5e59ec50 100644 Binary files a/bootstrap/lib/kernel/ebin/hipe_unified_loader.beam and b/bootstrap/lib/kernel/ebin/hipe_unified_loader.beam differ -- cgit v1.2.3 From 627a958104c50c9cb4a5022b29239b7d63fb5d76 Mon Sep 17 00:00:00 2001 From: Sverker Eriksson Date: Mon, 29 Jan 2018 17:51:06 +0100 Subject: erts: Refactor hipe x86_64 trampolines --- erts/emulator/hipe/hipe_amd64.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/erts/emulator/hipe/hipe_amd64.c b/erts/emulator/hipe/hipe_amd64.c index bdeae23c7c..f23f341e6d 100644 --- a/erts/emulator/hipe/hipe_amd64.c +++ b/erts/emulator/hipe/hipe_amd64.c @@ -39,6 +39,8 @@ #undef ERL_FUN_SIZE #include "hipe_literals.h" +static void patch_trampoline(void *trampoline, void *destAddress); + const Uint sse2_fnegate_mask[2] = {0x8000000000000000,0}; void hipe_patch_load_fe(Uint64 *address, Uint64 value) @@ -75,13 +77,12 @@ int hipe_patch_call(void *callAddress, void *destAddress, void *trampoline) Sint64 destOffset = (Sint64)destAddress - (Sint64)callAddress - 4; if ((destOffset < -0x80000000L) || (destOffset >= 0x80000000L)) { - destOffset = ((Sint64)trampoline - 2) - (Sint64)callAddress - 4; + destOffset = (Sint64)trampoline - (Sint64)callAddress - 4; if ((destOffset < -0x80000000L) || (destOffset >= 0x80000000L)) return -1; - *(void**)trampoline = destAddress; - hipe_flush_icache_word(trampoline); + patch_trampoline(trampoline, destAddress); } *(Uint32*)callAddress = (Uint32)destOffset; @@ -139,15 +140,25 @@ static void generate_trampolines(unsigned char *address, for(i = 0; i < nrcallees; ++i) { trampoline[0] = 0x48; /* movabsq $..., %rax; */ trampoline[1] = 0xb8; - *((Uint64*)(trampoline+2)) = 0; /* callee's address */ + *(void**)(trampoline+2) = NULL; /* callee's address */ trampoline[10] = 0xff; /* jmpq *%rax */ trampoline[11] = 0xe0; - trampvec[i] = trampoline+2; + trampvec[i] = trampoline; trampoline += TRAMPOLINE_BYTES; } hipe_flush_icache_range(address, nrcallees*TRAMPOLINE_BYTES); } +static void patch_trampoline(void *trampoline, void *destAddress) +{ + unsigned char *tp = (unsigned char*) trampoline; + + ASSERT(tp[0] == 0x48 && tp[1] == 0xb8); + + *(void**)(tp+2) = destAddress; /* callee's address */ + hipe_flush_icache_word(tp+2); +} + void *hipe_alloc_code(Uint nrbytes, Eterm callees, Eterm *trampolines, Process *p) { int nrcallees; -- cgit v1.2.3