diff options
-rw-r--r-- | bootstrap/lib/kernel/ebin/hipe_unified_loader.beam | bin | 12492 -> 12500 bytes | |||
-rw-r--r-- | erts/configure.in | 17 | ||||
-rw-r--r-- | erts/emulator/hipe/hipe_amd64.c | 117 | ||||
-rw-r--r-- | erts/emulator/hipe/hipe_bif0.c | 6 | ||||
-rw-r--r-- | lib/compiler/src/beam_type.erl | 8 | ||||
-rw-r--r-- | lib/compiler/test/beam_type_SUITE.erl | 33 | ||||
-rw-r--r-- | lib/hipe/x86/hipe_rtl_to_x86.erl | 24 | ||||
-rw-r--r-- | lib/hipe/x86/hipe_x86_assemble.erl | 1 | ||||
-rw-r--r-- | lib/kernel/src/hipe_unified_loader.erl | 1 |
9 files changed, 154 insertions, 53 deletions
diff --git a/bootstrap/lib/kernel/ebin/hipe_unified_loader.beam b/bootstrap/lib/kernel/ebin/hipe_unified_loader.beam Binary files differindex 8ca0b915a2..3f5e59ec50 100644 --- a/bootstrap/lib/kernel/ebin/hipe_unified_loader.beam +++ b/bootstrap/lib/kernel/ebin/hipe_unified_loader.beam diff --git a/erts/configure.in b/erts/configure.in index f15bb56435..dd6c52b079 100644 --- a/erts/configure.in +++ b/erts/configure.in @@ -2749,23 +2749,6 @@ if test "$cross_compiling" != "yes" && test X${enable_hipe} != Xno; then fi fi -dnl Check to disable -fPIE and friends for HiPE on amd64 -if test X${enable_hipe} = Xyes && test X$ARCH = Xamd64; then - AC_TRY_COMPILE(, [#if defined(__pie__) || defined(__PIE__) - #error -fPIE is enabled by default - #endif], - [AC_MSG_NOTICE([No -fPIE enabled by default])], - [AC_MSG_WARN([Security feature -fPIE will be disabled for HiPE]) - STATIC_CFLAGS="-fno-PIE $STATIC_CFLAGS" - saved_LDFLAGS=$LDFLAGS - LDFLAGS="-no-pie $LDFLAGS" - AC_TRY_LINK(,, [], - [LDFLAGS="-fno-PIE $saved_LDFLAGS" - AC_TRY_LINK(,, [], - [AC_MSG_WARN([Linked does not accept option -no-pie nor -fno-PIE]) - LDFLAGS=$saved_LDFLAGS])])]) -fi - if test X${enable_hipe} = Xyes; then case $OPSYS in linux) diff --git a/erts/emulator/hipe/hipe_amd64.c b/erts/emulator/hipe/hipe_amd64.c index e3cff4a4ba..f23f341e6d 100644 --- a/erts/emulator/hipe/hipe_amd64.c +++ b/erts/emulator/hipe/hipe_amd64.c @@ -28,6 +28,7 @@ #include "error.h" #include "bif.h" #include "big.h" /* term_to_Sint() */ +#include "erl_binary.h" #include "hipe_arch.h" #include "hipe_bif0.h" @@ -38,6 +39,8 @@ #undef ERL_FUN_SIZE #include "hipe_literals.h" +static void patch_trampoline(void *trampoline, void *destAddress); + const Uint sse2_fnegate_mask[2] = {0x8000000000000000,0}; void hipe_patch_load_fe(Uint64 *address, Uint64 value) @@ -52,9 +55,9 @@ int hipe_patch_insn(void *address, Uint64 value, Eterm type) switch (type) { case am_closure: case am_constant: + case am_c_const: *(Uint64*)address = value; break; - case am_c_const: case am_atom: /* check that value fits in an unsigned imm32 */ /* XXX: are we sure it's not really a signed imm32? */ @@ -71,14 +74,18 @@ int hipe_patch_insn(void *address, Uint64 value, Eterm type) int hipe_patch_call(void *callAddress, void *destAddress, void *trampoline) { - Sint rel32; + Sint64 destOffset = (Sint64)destAddress - (Sint64)callAddress - 4; - ASSERT(trampoline == NULL); + if ((destOffset < -0x80000000L) || (destOffset >= 0x80000000L)) { + destOffset = (Sint64)trampoline - (Sint64)callAddress - 4; - rel32 = (Sint)destAddress - (Sint)callAddress - 4; - if ((Sint)(Sint32)rel32 != rel32) - return -1; - *(Uint32*)callAddress = (Uint32)rel32; + if ((destOffset < -0x80000000L) || (destOffset >= 0x80000000L)) + return -1; + + patch_trampoline(trampoline, destAddress); + } + + *(Uint32*)callAddress = (Uint32)destOffset; hipe_flush_icache_word(callAddress); return 0; } @@ -96,12 +103,80 @@ static void *alloc_code(unsigned int alloc_bytes) return erts_alloc(ERTS_ALC_T_HIPE_EXEC, alloc_bytes); } +static int check_callees(Eterm callees) +{ + Eterm *tuple; + Uint arity; + Uint i; + + if (is_not_tuple(callees)) + return -1; + tuple = tuple_val(callees); + arity = arityval(tuple[0]); + for (i = 1; i <= arity; ++i) { + Eterm mfa = tuple[i]; + if (is_atom(mfa)) + continue; + if (is_not_tuple(mfa) || + tuple_val(mfa)[0] != make_arityval(3) || + is_not_atom(tuple_val(mfa)[1]) || + is_not_atom(tuple_val(mfa)[2]) || + is_not_small(tuple_val(mfa)[3]) || + unsigned_val(tuple_val(mfa)[3]) > 255) + return -1; + } + return arity; +} + +#define TRAMPOLINE_BYTES 12 + +static void generate_trampolines(unsigned char *address, + int nrcallees, Eterm callees, + unsigned char **trampvec) +{ + unsigned char *trampoline = address; + int i; + + for(i = 0; i < nrcallees; ++i) { + trampoline[0] = 0x48; /* movabsq $..., %rax; */ + trampoline[1] = 0xb8; + *(void**)(trampoline+2) = NULL; /* callee's address */ + trampoline[10] = 0xff; /* jmpq *%rax */ + trampoline[11] = 0xe0; + trampvec[i] = trampoline; + trampoline += TRAMPOLINE_BYTES; + } + hipe_flush_icache_range(address, nrcallees*TRAMPOLINE_BYTES); +} + +static void patch_trampoline(void *trampoline, void *destAddress) +{ + unsigned char *tp = (unsigned char*) trampoline; + + ASSERT(tp[0] == 0x48 && tp[1] == 0xb8); + + *(void**)(tp+2) = destAddress; /* callee's address */ + hipe_flush_icache_word(tp+2); +} + void *hipe_alloc_code(Uint nrbytes, Eterm callees, Eterm *trampolines, Process *p) { - if (is_not_nil(callees)) + int nrcallees; + Eterm trampvecbin; + unsigned char **trampvec; + unsigned char *address; + + nrcallees = check_callees(callees); + if (nrcallees < 0) return NULL; - *trampolines = NIL; - return alloc_code(nrbytes); + + trampvecbin = new_binary(p, NULL, nrcallees*sizeof(unsigned char*)); + trampvec = (unsigned char **)binary_bytes(trampvecbin); + + address = alloc_code(nrbytes + nrcallees*TRAMPOLINE_BYTES); + generate_trampolines(address + nrbytes, nrcallees, callees, trampvec); + *trampolines = trampvecbin; + return address; } void hipe_free_code(void* code, unsigned int bytes) @@ -129,10 +204,9 @@ void *hipe_make_native_stub(void *callee_exp, unsigned int beamArity) */ unsigned int codeSize; unsigned char *code, *codep; - unsigned int callEmuOffset; - codeSize = /* 23, 26, 29, or 32 bytes */ - 23 + /* 23 when all offsets are 8-bit */ + codeSize = /* 30, 33, 36, or 39 bytes */ + 30 + /* 30 when all offsets are 8-bit */ (P_CALLEE_EXP >= 128 ? 3 : 0) + ((P_CALLEE_EXP + 4) >= 128 ? 3 : 0) + (P_ARITY >= 128 ? 3 : 0); @@ -197,14 +271,15 @@ void *hipe_make_native_stub(void *callee_exp, unsigned int beamArity) codep[0] = beamArity; codep += 1; - /* jmp callemu; 5 bytes */ - callEmuOffset = (unsigned char*)nbif_callemu - (code + codeSize); - codep[0] = 0xe9; - codep[1] = callEmuOffset & 0xFF; - codep[2] = (callEmuOffset >> 8) & 0xFF; - codep[3] = (callEmuOffset >> 16) & 0xFF; - codep[4] = (callEmuOffset >> 24) & 0xFF; - codep += 5; + /* jmp callemu; 12 bytes */ + codep[0] = 0x48; + codep[1] = 0xb8; + codep += 2; + *(Uint64*)codep = (Uint64)nbif_callemu; + codep += 8; + codep[0] = 0xff; + codep[1] = 0xe0; + codep += 2; ASSERT(codep == code + codeSize); diff --git a/erts/emulator/hipe/hipe_bif0.c b/erts/emulator/hipe/hipe_bif0.c index e477c4cdea..a8be64e08d 100644 --- a/erts/emulator/hipe/hipe_bif0.c +++ b/erts/emulator/hipe/hipe_bif0.c @@ -1112,7 +1112,7 @@ static struct hipe_mfa_info* mod2mfa_put(struct hipe_mfa_info* mfa) struct hipe_ref { struct hipe_ref_head head; /* list of refs to same calleee */ void *address; -#if defined(__arm__) || defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__) +#if defined(__x86_64__) || defined(__arm__) || defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__) void *trampoline; #endif unsigned int flags; @@ -1543,7 +1543,7 @@ BIF_RETTYPE hipe_bifs_add_ref_2(BIF_ALIST_2) ref = erts_alloc(ERTS_ALC_T_HIPE_LL, sizeof(struct hipe_ref)); ref->address = address; -#if defined(__arm__) || defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__) +#if defined(__x86_64__) || defined(__arm__) || defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__) ref->trampoline = trampoline; #endif ref->flags = flags; @@ -1819,7 +1819,7 @@ void hipe_redirect_to_module(Module* modp) if (ref->flags & REF_FLAG_IS_LOAD_MFA) res = hipe_patch_insn(ref->address, (Uint)p->remote_address, am_load_mfa); else { -#if defined(__arm__) || defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__) +#if defined(__x86_64__) || defined(__arm__) || defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__) void* trampoline = ref->trampoline; #else void* trampoline = NULL; diff --git a/lib/compiler/src/beam_type.erl b/lib/compiler/src/beam_type.erl index b2fabed2c5..a2e3af37d0 100644 --- a/lib/compiler/src/beam_type.erl +++ b/lib/compiler/src/beam_type.erl @@ -943,10 +943,10 @@ merge_type_info({tuple,SzKind1,Sz1,[]}, {tuple,_SzKind2,_Sz2,First}=Tuple2) -> merge_type_info({tuple,SzKind1,Sz1,First}, Tuple2); merge_type_info({tuple,_SzKind1,_Sz1,First}=Tuple1, {tuple,SzKind2,Sz2,_}) -> merge_type_info(Tuple1, {tuple,SzKind2,Sz2,First}); -merge_type_info(integer, {integer,_}=Int) -> - Int; -merge_type_info({integer,_}=Int, integer) -> - Int; +merge_type_info(integer, {integer,_}) -> + integer; +merge_type_info({integer,_}, integer) -> + integer; merge_type_info({integer,{Min1,Max1}}, {integer,{Min2,Max2}}) -> {integer,{max(Min1, Min2),min(Max1, Max2)}}; merge_type_info({binary,U1}, {binary,U2}) -> diff --git a/lib/compiler/test/beam_type_SUITE.erl b/lib/compiler/test/beam_type_SUITE.erl index dfbf2aa4a0..e33df809ff 100644 --- a/lib/compiler/test/beam_type_SUITE.erl +++ b/lib/compiler/test/beam_type_SUITE.erl @@ -67,6 +67,15 @@ integers(_Config) -> college = do_integers_3(), + zero = do_integers_4(<<0:1>>, 0), + one = do_integers_4(<<1:1>>, 0), + other = do_integers_4(<<1:1>>, 2), + + zero = do_integers_5(0, 0), + one = do_integers_5(0, 1), + two = do_integers_5(0, 2), + three = do_integers_5(0, 3), + ok. do_integers_1(B0) -> @@ -89,6 +98,30 @@ do_integers_3() -> 1 -> 0 end. +do_integers_4(<<X:1,T/bits>>, C) -> + %% Binary matching gives the range 0-1 for X. + %% The range for `X bor C` is unknown. It must not be inherited + %% from X. (`X bor C` will reuse the register used for X.) + case X bor C of + 0 -> do_integers_4(T, C, zero); + 1 -> do_integers_4(T, C, one); + _ -> do_integers_4(T, C, other) + end. + +do_integers_4(_, _, Res) -> + Res. + +do_integers_5(X0, Y0) -> + %% X and Y will use the same register. + X = X0 band 1, + Y = Y0 band 3, + case Y of + 0 -> zero; + 1 -> one; + 2 -> two; + 3 -> three + end. + coverage(_Config) -> {'EXIT',{badarith,_}} = (catch id(1) bsl 0.5), {'EXIT',{badarith,_}} = (catch id(2.0) bsl 2), diff --git a/lib/hipe/x86/hipe_rtl_to_x86.erl b/lib/hipe/x86/hipe_rtl_to_x86.erl index 31e4f6e4ac..22947da148 100644 --- a/lib/hipe/x86/hipe_rtl_to_x86.erl +++ b/lib/hipe/x86/hipe_rtl_to_x86.erl @@ -646,7 +646,7 @@ conv_imm(Opnd, Map) -> is_imm64(Value) when is_integer(Value) -> (Value < -(1 bsl (32 - 1))) or (Value > (1 bsl (32 - 1)) - 1); is_imm64({_,atom}) -> false; % Atoms are 32 bits. -is_imm64({_,c_const}) -> false; % c_consts are 32 bits. +is_imm64({_,c_const}) -> true; % c_consts are 64 bits. is_imm64({_,_}) -> true . % Other relocs are 64 bits. -else. conv_imm(Opnd, Map) -> @@ -777,6 +777,18 @@ conv_fconv(Dst, Src) -> %%% Finalise the conversion of a 2-address FP operation. +-ifdef(HIPE_AMD64). +conv_fp_unary(Dst, Src, 'fchs') -> + Tmp = new_untagged_temp(), + case same_opnd(Dst, Src) of + true -> + []; + _ -> + [hipe_x86:mk_fmove(Src, Dst)] + end ++ + mk_load_address(c_const, hipe_x86:mk_imm({sse2_fnegate_mask, c_const}), Tmp) ++ + [hipe_x86:mk_fp_binop('xorpd', hipe_x86:mk_mem(Tmp, hipe_x86:mk_imm(0), double), Dst)]. +-else. conv_fp_unary(Dst, Src, FpUnOp) -> case same_opnd(Dst, Src) of true -> @@ -785,6 +797,7 @@ conv_fp_unary(Dst, Src, FpUnOp) -> [hipe_x86:mk_fmove(Src, Dst), hipe_x86:mk_fp_unop(FpUnOp, Dst)] end. +-endif. conv_fp_unop(RtlFpUnOp) -> case RtlFpUnOp of @@ -854,13 +867,8 @@ mk_jmp_switch(Index, JTabLab, Labels) -> %%% Finalise the translation of a load_address instruction. -ifdef(HIPE_AMD64). -mk_load_address(Type, Src, Dst) -> - case Type of - c_const -> % 32 bits - [hipe_x86:mk_move(Src, Dst)]; - _ -> - [hipe_x86:mk_move64(Src, Dst)] - end. +mk_load_address(_Type, Src, Dst) -> + [hipe_x86:mk_move64(Src, Dst)]. -else. mk_load_address(_Type, Src, Dst) -> [hipe_x86:mk_move(Src, Dst)]. diff --git a/lib/hipe/x86/hipe_x86_assemble.erl b/lib/hipe/x86/hipe_x86_assemble.erl index 50919bdf4e..9d2586a14d 100644 --- a/lib/hipe/x86/hipe_x86_assemble.erl +++ b/lib/hipe/x86/hipe_x86_assemble.erl @@ -735,6 +735,7 @@ resolve_sse2_op(Op) -> fdiv -> divsd; fmul -> mulsd; fsub -> subsd; + xorpd -> xorpd; _ -> exit({?MODULE, unknown_sse2_operator, Op}) end. diff --git a/lib/kernel/src/hipe_unified_loader.erl b/lib/kernel/src/hipe_unified_loader.erl index f8199fcf71..fd06f0f7d8 100644 --- a/lib/kernel/src/hipe_unified_loader.erl +++ b/lib/kernel/src/hipe_unified_loader.erl @@ -275,6 +275,7 @@ needs_trampolines(Architecture) -> arm -> true; powerpc -> true; ppc64 -> true; + amd64 -> true; _ -> false end. |