diff options
| -rw-r--r-- | bootstrap/lib/kernel/ebin/hipe_unified_loader.beam | bin | 12492 -> 12500 bytes | |||
| -rw-r--r-- | erts/configure.in | 17 | ||||
| -rw-r--r-- | erts/emulator/hipe/hipe_amd64.c | 117 | ||||
| -rw-r--r-- | erts/emulator/hipe/hipe_bif0.c | 6 | ||||
| -rw-r--r-- | lib/hipe/x86/hipe_rtl_to_x86.erl | 24 | ||||
| -rw-r--r-- | lib/hipe/x86/hipe_x86_assemble.erl | 1 | ||||
| -rw-r--r-- | lib/kernel/src/hipe_unified_loader.erl | 1 | 
7 files changed, 117 insertions, 49 deletions
| diff --git a/bootstrap/lib/kernel/ebin/hipe_unified_loader.beam b/bootstrap/lib/kernel/ebin/hipe_unified_loader.beamBinary files differ index 8ca0b915a2..3f5e59ec50 100644 --- a/bootstrap/lib/kernel/ebin/hipe_unified_loader.beam +++ b/bootstrap/lib/kernel/ebin/hipe_unified_loader.beam diff --git a/erts/configure.in b/erts/configure.in index f15bb56435..dd6c52b079 100644 --- a/erts/configure.in +++ b/erts/configure.in @@ -2749,23 +2749,6 @@ if test "$cross_compiling" != "yes" && test X${enable_hipe} != Xno; then    fi  fi -dnl Check to disable -fPIE and friends for HiPE on amd64 -if test X${enable_hipe} = Xyes && test X$ARCH = Xamd64; then -   AC_TRY_COMPILE(, [#if defined(__pie__) || defined(__PIE__) -		     #error -fPIE is enabled by default -		     #endif], -		    [AC_MSG_NOTICE([No -fPIE enabled by default])], -		    [AC_MSG_WARN([Security feature -fPIE will be disabled for HiPE]) -		     STATIC_CFLAGS="-fno-PIE $STATIC_CFLAGS" -		     saved_LDFLAGS=$LDFLAGS -		     LDFLAGS="-no-pie $LDFLAGS" -		     AC_TRY_LINK(,, [], -			[LDFLAGS="-fno-PIE $saved_LDFLAGS" -			 AC_TRY_LINK(,, [], -			    [AC_MSG_WARN([Linked does not accept option -no-pie nor -fno-PIE]) -			     LDFLAGS=$saved_LDFLAGS])])]) -fi -  if test X${enable_hipe} = Xyes; then     case $OPSYS in  	linux) diff --git a/erts/emulator/hipe/hipe_amd64.c b/erts/emulator/hipe/hipe_amd64.c index e3cff4a4ba..f23f341e6d 100644 --- a/erts/emulator/hipe/hipe_amd64.c +++ b/erts/emulator/hipe/hipe_amd64.c @@ -28,6 +28,7 @@  #include "error.h"  #include "bif.h"  #include "big.h"	/* term_to_Sint() */ +#include "erl_binary.h"  #include "hipe_arch.h"  #include "hipe_bif0.h" @@ -38,6 +39,8 @@  #undef ERL_FUN_SIZE  #include "hipe_literals.h" +static void patch_trampoline(void *trampoline, void *destAddress); +  const Uint sse2_fnegate_mask[2] = {0x8000000000000000,0};  void hipe_patch_load_fe(Uint64 *address, Uint64 value) @@ -52,9 +55,9 @@ int hipe_patch_insn(void *address, Uint64 value, Eterm type)      switch (type) {        case am_closure:        case am_constant: +      case am_c_const:  	*(Uint64*)address = value;  	break; -      case am_c_const:        case am_atom:  	/* check that value fits in an unsigned imm32 */  	/* XXX: are we sure it's not really a signed imm32? */ @@ -71,14 +74,18 @@ int hipe_patch_insn(void *address, Uint64 value, Eterm type)  int hipe_patch_call(void *callAddress, void *destAddress, void *trampoline)  { -    Sint rel32; +    Sint64 destOffset = (Sint64)destAddress - (Sint64)callAddress - 4; -    ASSERT(trampoline == NULL); +    if ((destOffset < -0x80000000L) || (destOffset >= 0x80000000L)) { +        destOffset = (Sint64)trampoline - (Sint64)callAddress - 4; -    rel32 = (Sint)destAddress - (Sint)callAddress - 4; -    if ((Sint)(Sint32)rel32 != rel32) -	return -1; -    *(Uint32*)callAddress = (Uint32)rel32; +        if ((destOffset < -0x80000000L) || (destOffset >= 0x80000000L)) +            return -1; + +        patch_trampoline(trampoline, destAddress); +    } + +    *(Uint32*)callAddress = (Uint32)destOffset;      hipe_flush_icache_word(callAddress);      return 0;  } @@ -96,12 +103,80 @@ static void *alloc_code(unsigned int alloc_bytes)      return erts_alloc(ERTS_ALC_T_HIPE_EXEC, alloc_bytes);  } +static int check_callees(Eterm callees) +{ +    Eterm *tuple; +    Uint arity; +    Uint i; + +    if (is_not_tuple(callees)) +	return -1; +    tuple = tuple_val(callees); +    arity = arityval(tuple[0]); +    for (i = 1; i <= arity; ++i) { +	Eterm mfa = tuple[i]; +	if (is_atom(mfa)) +	    continue; +	if (is_not_tuple(mfa) || +	    tuple_val(mfa)[0] != make_arityval(3) || +	    is_not_atom(tuple_val(mfa)[1]) || +	    is_not_atom(tuple_val(mfa)[2]) || +	    is_not_small(tuple_val(mfa)[3]) || +	    unsigned_val(tuple_val(mfa)[3]) > 255) +	    return -1; +    } +    return arity; +} + +#define TRAMPOLINE_BYTES 12 + +static void generate_trampolines(unsigned char *address, +                                 int nrcallees, Eterm callees, +                                 unsigned char **trampvec) +{ +    unsigned char *trampoline = address; +    int i; + +    for(i = 0; i < nrcallees; ++i) { +        trampoline[0] = 0x48;           /* movabsq $..., %rax; */ +        trampoline[1] = 0xb8; +        *(void**)(trampoline+2) = NULL; /* callee's address */ +        trampoline[10] = 0xff;          /* jmpq *%rax */ +        trampoline[11] = 0xe0; +        trampvec[i] = trampoline; +        trampoline += TRAMPOLINE_BYTES; +    } +    hipe_flush_icache_range(address, nrcallees*TRAMPOLINE_BYTES); +} + +static void patch_trampoline(void *trampoline, void *destAddress) +{ +    unsigned char *tp = (unsigned char*) trampoline; + +    ASSERT(tp[0] == 0x48 && tp[1] == 0xb8); + +    *(void**)(tp+2) = destAddress; /* callee's address */ +    hipe_flush_icache_word(tp+2); +} +  void *hipe_alloc_code(Uint nrbytes, Eterm callees, Eterm *trampolines, Process *p)  { -    if (is_not_nil(callees)) +    int nrcallees; +    Eterm trampvecbin; +    unsigned char **trampvec; +    unsigned char *address; + +    nrcallees = check_callees(callees); +    if (nrcallees < 0)  	return NULL; -    *trampolines = NIL; -    return alloc_code(nrbytes); + +    trampvecbin = new_binary(p, NULL, nrcallees*sizeof(unsigned char*)); +    trampvec = (unsigned char **)binary_bytes(trampvecbin); + +    address = alloc_code(nrbytes + nrcallees*TRAMPOLINE_BYTES); +    generate_trampolines(address + nrbytes, nrcallees, callees, trampvec); +    *trampolines = trampvecbin; +    return address;  }  void hipe_free_code(void* code, unsigned int bytes) @@ -129,10 +204,9 @@ void *hipe_make_native_stub(void *callee_exp, unsigned int beamArity)       */      unsigned int codeSize;      unsigned char *code, *codep; -    unsigned int callEmuOffset; -    codeSize =	/* 23, 26, 29, or 32 bytes */ -      23 +	/* 23 when all offsets are 8-bit */ +    codeSize =	/* 30, 33, 36, or 39 bytes */ +      30 +	/* 30 when all offsets are 8-bit */        (P_CALLEE_EXP >= 128 ? 3 : 0) +        ((P_CALLEE_EXP + 4) >= 128 ? 3 : 0) +        (P_ARITY >= 128 ? 3 : 0); @@ -197,14 +271,15 @@ void *hipe_make_native_stub(void *callee_exp, unsigned int beamArity)      codep[0] = beamArity;      codep += 1; -    /* jmp callemu; 5 bytes */ -    callEmuOffset = (unsigned char*)nbif_callemu - (code + codeSize); -    codep[0] = 0xe9; -    codep[1] =  callEmuOffset        & 0xFF; -    codep[2] = (callEmuOffset >>  8) & 0xFF; -    codep[3] = (callEmuOffset >> 16) & 0xFF; -    codep[4] = (callEmuOffset >> 24) & 0xFF; -    codep += 5; +    /* jmp callemu; 12 bytes */ +    codep[0] = 0x48; +    codep[1] = 0xb8; +    codep += 2; +    *(Uint64*)codep = (Uint64)nbif_callemu; +    codep += 8; +    codep[0] = 0xff; +    codep[1] = 0xe0; +    codep += 2;      ASSERT(codep == code + codeSize); diff --git a/erts/emulator/hipe/hipe_bif0.c b/erts/emulator/hipe/hipe_bif0.c index e477c4cdea..a8be64e08d 100644 --- a/erts/emulator/hipe/hipe_bif0.c +++ b/erts/emulator/hipe/hipe_bif0.c @@ -1112,7 +1112,7 @@ static struct hipe_mfa_info* mod2mfa_put(struct hipe_mfa_info* mfa)  struct hipe_ref {      struct hipe_ref_head head;    /* list of refs to same calleee */      void *address; -#if defined(__arm__) || defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__) +#if defined(__x86_64__) || defined(__arm__) || defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__)      void *trampoline;  #endif      unsigned int flags; @@ -1543,7 +1543,7 @@ BIF_RETTYPE hipe_bifs_add_ref_2(BIF_ALIST_2)      ref = erts_alloc(ERTS_ALC_T_HIPE_LL, sizeof(struct hipe_ref));      ref->address = address; -#if defined(__arm__) || defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__) +#if defined(__x86_64__) || defined(__arm__) || defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__)      ref->trampoline = trampoline;  #endif      ref->flags = flags; @@ -1819,7 +1819,7 @@ void hipe_redirect_to_module(Module* modp)  	    if (ref->flags & REF_FLAG_IS_LOAD_MFA)  		res = hipe_patch_insn(ref->address, (Uint)p->remote_address, am_load_mfa);  	    else { -#if defined(__arm__) || defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__) +#if defined(__x86_64__) || defined(__arm__) || defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__)                  void* trampoline = ref->trampoline;  #else                  void* trampoline = NULL; diff --git a/lib/hipe/x86/hipe_rtl_to_x86.erl b/lib/hipe/x86/hipe_rtl_to_x86.erl index 31e4f6e4ac..22947da148 100644 --- a/lib/hipe/x86/hipe_rtl_to_x86.erl +++ b/lib/hipe/x86/hipe_rtl_to_x86.erl @@ -646,7 +646,7 @@ conv_imm(Opnd, Map) ->  is_imm64(Value) when is_integer(Value) ->    (Value < -(1 bsl (32 - 1))) or (Value > (1 bsl (32 - 1)) - 1);  is_imm64({_,atom})    -> false; % Atoms are 32 bits. -is_imm64({_,c_const}) -> false; % c_consts are 32 bits. +is_imm64({_,c_const}) -> true;  % c_consts are 64 bits.  is_imm64({_,_})       -> true . % Other relocs are 64 bits.  -else.  conv_imm(Opnd, Map) -> @@ -777,6 +777,18 @@ conv_fconv(Dst, Src) ->  %%% Finalise the conversion of a 2-address FP operation. +-ifdef(HIPE_AMD64). +conv_fp_unary(Dst, Src, 'fchs') -> +  Tmp = new_untagged_temp(), +  case same_opnd(Dst, Src) of +    true -> +      []; +    _ -> +      [hipe_x86:mk_fmove(Src, Dst)] +  end ++ +    mk_load_address(c_const, hipe_x86:mk_imm({sse2_fnegate_mask, c_const}), Tmp) ++ +    [hipe_x86:mk_fp_binop('xorpd', hipe_x86:mk_mem(Tmp, hipe_x86:mk_imm(0), double), Dst)]. +-else.  conv_fp_unary(Dst, Src, FpUnOp) ->    case same_opnd(Dst, Src) of      true -> @@ -785,6 +797,7 @@ conv_fp_unary(Dst, Src, FpUnOp) ->        [hipe_x86:mk_fmove(Src, Dst),         hipe_x86:mk_fp_unop(FpUnOp, Dst)]    end. +-endif.  conv_fp_unop(RtlFpUnOp) ->    case RtlFpUnOp of @@ -854,13 +867,8 @@ mk_jmp_switch(Index, JTabLab, Labels) ->  %%% Finalise the translation of a load_address instruction.  -ifdef(HIPE_AMD64). -mk_load_address(Type, Src, Dst) -> -  case Type of -    c_const -> % 32 bits -      [hipe_x86:mk_move(Src, Dst)]; -    _ -> -      [hipe_x86:mk_move64(Src, Dst)] -  end. +mk_load_address(_Type, Src, Dst) -> +  [hipe_x86:mk_move64(Src, Dst)].  -else.  mk_load_address(_Type, Src, Dst) ->    [hipe_x86:mk_move(Src, Dst)]. diff --git a/lib/hipe/x86/hipe_x86_assemble.erl b/lib/hipe/x86/hipe_x86_assemble.erl index 50919bdf4e..9d2586a14d 100644 --- a/lib/hipe/x86/hipe_x86_assemble.erl +++ b/lib/hipe/x86/hipe_x86_assemble.erl @@ -735,6 +735,7 @@ resolve_sse2_op(Op) ->      fdiv -> divsd;      fmul -> mulsd;      fsub -> subsd; +    xorpd -> xorpd;      _ -> exit({?MODULE, unknown_sse2_operator, Op})    end. diff --git a/lib/kernel/src/hipe_unified_loader.erl b/lib/kernel/src/hipe_unified_loader.erl index f8199fcf71..fd06f0f7d8 100644 --- a/lib/kernel/src/hipe_unified_loader.erl +++ b/lib/kernel/src/hipe_unified_loader.erl @@ -275,6 +275,7 @@ needs_trampolines(Architecture) ->      arm -> true;      powerpc -> true;      ppc64 -> true; +    amd64 -> true;      _ -> false    end. | 
