/* * %CopyrightBegin% * * Copyright Ericsson AB 2004-2016. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * %CopyrightEnd% */ #include <stddef.h> /* offsetof() */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "global.h" #include "erl_binary.h" #include "hipe_arch.h" #include "hipe_native_bif.h" /* nbif_callemu() */ #include "hipe_bif0.h" #if !defined(__powerpc64__) const unsigned int fconv_constant[2] = { 0x43300000, 0x80000000 }; #endif /* Flush dcache and invalidate icache for a range of addresses. */ void hipe_flush_icache_range(void *address, unsigned int nbytes) { const unsigned int L1_CACHE_SHIFT = 5; const unsigned long L1_CACHE_BYTES = 1 << L1_CACHE_SHIFT; unsigned long start, p; unsigned int nlines, n; if (!nbytes) return; start = (unsigned long)address & ~(L1_CACHE_BYTES-1); nlines = (((unsigned long)address & (L1_CACHE_BYTES-1)) + nbytes + (L1_CACHE_BYTES-1)) >> L1_CACHE_SHIFT; p = start; n = nlines; do { asm volatile("dcbst 0,%0" : : "r"(p) : "memory"); p += L1_CACHE_BYTES; } while (--n != 0); asm volatile("sync"); p = start; n = nlines; do { asm volatile("icbi 0,%0" : : "r"(p) : "memory"); p += L1_CACHE_BYTES; } while (--n != 0); asm volatile("sync\n\tisync"); } static int check_callees(Eterm callees) { Eterm *tuple; Uint arity; Uint i; if (is_not_tuple(callees)) return -1; tuple = tuple_val(callees); arity = arityval(tuple[0]); for (i = 1; i <= arity; ++i) { Eterm mfa = tuple[i]; if (is_not_tuple(mfa) || tuple_val(mfa)[0] != make_arityval(3) || is_not_atom(tuple_val(mfa)[1]) || is_not_atom(tuple_val(mfa)[2]) || is_not_small(tuple_val(mfa)[3]) || unsigned_val(tuple_val(mfa)[3]) > 255) return -1; } return arity; } static void generate_trampolines(Uint32* address, int nrcallees, Eterm callees, Uint32** trampvec) { Uint32* trampoline = address; int i; for (i = 0; i < nrcallees; ++i) { #if defined(__powerpc64__) # define TRAMPOLINE_WORDS 7 trampoline[0] = 0x3D600000; /* addis r11,r0,0 */ trampoline[1] = 0x616B0000; /* ori r11,r11,0 */ trampoline[2] = 0x796B07C6; /* rldicr r11,r11,32,31 */ trampoline[3] = 0x656B0000; /* oris r11,r11,0 */ trampoline[4] = 0x616B0000; /* ori r11,r11,0 */ trampoline[5] = 0x7D6903A6; /* mtctr r11 */ trampoline[6] = 0x4E800420; /* bctr */ #else # define TRAMPOLINE_WORDS 4 trampoline[0] = 0x39600000; /* addi r11,r0,0 */ trampoline[1] = 0x3D6B0000; /* addis r11,r11,0 */ trampoline[2] = 0x7D6903A6; /* mtctr r11 */ trampoline[3] = 0x4E800420; /* bctr */ #endif trampvec[i] = trampoline; trampoline += TRAMPOLINE_WORDS; } hipe_flush_icache_range(address, nrcallees*TRAMPOLINE_WORDS*sizeof(Uint32)); } void *hipe_alloc_code(Uint nrbytes, Eterm callees, Eterm *trampolines, Process *p) { Uint code_words; int nrcallees; Eterm trampvecbin; Uint32 **trampvec; Uint32 *address; if (nrbytes & 0x3) return NULL; code_words = nrbytes / sizeof(Uint32); nrcallees = check_callees(callees); if (nrcallees < 0) return NULL; trampvecbin = new_binary(p, NULL, nrcallees*sizeof(Uint32*)); trampvec = (Uint32**)binary_bytes(trampvecbin); address = erts_alloc(ERTS_ALC_T_HIPE_EXEC, (code_words + nrcallees*TRAMPOLINE_WORDS)*sizeof(Uint32)); generate_trampolines(address + code_words, nrcallees, callees, trampvec); *trampolines = trampvecbin; return address; } void hipe_free_code(void* code, unsigned int bytes) { erts_free(ERTS_ALC_T_HIPE_EXEC, code); } void hipe_free_native_stub(void* stub) { erts_free(ERTS_ALC_T_HIPE_EXEC, stub); } static void patch_imm16(Uint32 *address, unsigned int imm16) { unsigned int insn = *address; *address = (insn & ~0xFFFF) | (imm16 & 0xFFFF); hipe_flush_icache_word(address); } #if defined(__powerpc64__) /* * To load a 64-bit immediate value 'val' into Rd (Rd != R0): * * addis Rd, 0, val@highest // (val >> 48) & 0xFFFF * ori Rd, Rd, val@higher // (val >> 32) & 0xFFFF * rldicr Rd, Rd, 32, 31 * oris Rd, Rd, val@h // (val >> 16) & 0xFFFF * ori Rd, Rd, val@l // val & 0xFFFF */ static void patch_li64(Uint32 *address, Uint64 value) { patch_imm16(address+0, value >> 48); patch_imm16(address+1, value >> 32); /* rldicr Rd, Rd, 32, 31 */ patch_imm16(address+3, value >> 16); patch_imm16(address+4, value); } void hipe_patch_load_fe(Uint *address, Uint value) { patch_li64((Uint32*)address, value); } int hipe_patch_insn(void *address, Uint64 value, Eterm type) { switch (type) { case am_closure: case am_constant: case am_atom: case am_c_const: patch_li64((Uint32*)address, value); return 0; default: return -1; } } void *hipe_make_native_stub(void *callee_exp, unsigned int beamArity) { Uint32 *code; if ((unsigned long)&nbif_callemu & ~0x01FFFFFCUL) abort(); code = erts_alloc(ERTS_ALC_T_HIPE_EXEC, 7*sizeof(Uint32)); if (!code) return NULL; /* addis r12,0,callee_exp@highest */ code[0] = 0x3d800000 | (((unsigned long)callee_exp >> 48) & 0xffff); /* ori r12,r12,callee_exp@higher */ code[1] = 0x618c0000 | (((unsigned long)callee_exp >> 32) & 0xffff); /* sldi r12,r12,32 (rldicr r12,r12,32,31) */ code[2] = 0x798c07c6; /* oris r12,r12,callee_exp@h */ code[3] = 0x658c0000 | (((unsigned long)callee_exp >> 16) & 0xffff); /* ori r12,r12,callee_exp@l */ code[4] = 0x618c0000 | ((unsigned long)callee_exp & 0xffff); /* addi r0,0,beamArity */ code[5] = 0x38000000 | (beamArity & 0x7FFF); /* ba nbif_callemu */ code[6] = 0x48000002 | (unsigned long)&nbif_callemu; hipe_flush_icache_range(code, 7*sizeof(Uint32)); return code; } #else /* !__powerpc64__ */ /* * To load a 32-bit immediate value 'val' into Rd (Rd != R0): * * addi Rd, 0, val@l // val & 0xFFFF * addis Rd, Rd, val@ha // ((val + 0x8000) >> 16) & 0xFFFF * * The first addi sign-extends the low 16 bits, so if * val&(1<<15), the high portion of Rd will be -1 not 0. * val@ha compensates by adding 1 if val&(1<<15). */ static unsigned int at_ha(unsigned int val) { return ((val + 0x8000) >> 16) & 0xFFFF; } static void patch_li(Uint32 *address, Uint32 value) { patch_imm16(address, value); patch_imm16(address+1, at_ha(value)); } void hipe_patch_load_fe(Uint32 *address, Uint value) { patch_li(address, value); } int hipe_patch_insn(void *address, Uint32 value, Eterm type) { switch (type) { case am_closure: case am_constant: case am_atom: case am_c_const: break; default: return -1; } patch_li((Uint32*)address, value); return 0; } void *hipe_make_native_stub(void *callee_exp, unsigned int beamArity) { Uint32 *code; /* * Native code calls BEAM via a stub looking as follows: * * addi r12,0,callee_exp@l * addi r0,0,beamArity * addis r12,r12,callee_exp@ha * ba nbif_callemu * * I'm using r0 and r12 since the standard SVR4 ABI allows * them to be modified during function linkage. Trampolines * (for b/bl to distant targets) may modify r11. * * The runtime system code is linked completely below the * 32MB address boundary. Hence the branch to nbif_callemu * is done with a 'ba' instruction. */ /* verify that 'ba' can reach nbif_callemu */ if ((unsigned long)&nbif_callemu & ~0x01FFFFFCUL) abort(); code = erts_alloc(ERTS_ALC_T_HIPE_EXEC, 4*sizeof(Uint32)); if (!code) return NULL; /* addi r12,0,callee_exp@l */ code[0] = 0x39800000 | ((unsigned long)callee_exp & 0xFFFF); /* addi r0,0,beamArity */ code[1] = 0x38000000 | (beamArity & 0x7FFF); /* addis r12,r12,callee_exp@ha */ code[2] = 0x3D8C0000 | at_ha((unsigned long)callee_exp); /* ba nbif_callemu */ code[3] = 0x48000002 | (unsigned long)&nbif_callemu; hipe_flush_icache_range(code, 4*sizeof(Uint32)); return code; } #endif /* !__powerpc64__ */ static void patch_b(Uint32 *address, Sint32 offset, Uint32 AA) { Uint32 oldI = *address; Uint32 newI = (oldI & 0xFC000001) | ((offset & 0x00FFFFFF) << 2) | (AA & 2); *address = newI; hipe_flush_icache_word(address); } int hipe_patch_call(void *callAddress, void *destAddress, void *trampoline) { if ((UWord)destAddress == ((UWord)destAddress & 0x01FFFFFC)) { /* The destination is in the [0,32MB[ range. We can reach it with a ba/bla instruction. This is the typical case for BIFs and primops. It's also common for trap-to-BEAM stubs (on ppc32). */ patch_b((Uint32*)callAddress, (Sint32)destAddress >> 2, 2); } else { SWord destOffset = ((SWord)destAddress - (SWord)callAddress) >> 2; if (destOffset >= -0x800000 && destOffset <= 0x7FFFFF) { /* The destination is within a [-32MB,+32MB[ range from us. We can reach it with a b/bl instruction. This is typical for nearby Erlang code. */ patch_b((Uint32*)callAddress, (Sint32)destOffset, 0); } else { /* The destination is too distant for b/bl/ba/bla. Must do a b/bl to the trampoline. */ SWord trampOffset = ((SWord)trampoline - (SWord)callAddress) >> 2; if (trampOffset >= -0x800000 && trampOffset <= 0x7FFFFF) { /* Update the trampoline's address computation. (May be redundant, but we can't tell.) */ #if defined(__powerpc64__) patch_li64((Uint32*)trampoline, (Uint64)destAddress); #else patch_li((Uint32*)trampoline, (Uint32)destAddress); #endif /* Update this call site. */ patch_b((Uint32*)callAddress, (Sint32)trampOffset, 0); } else return -1; } } return 0; } void hipe_arch_print_pcb(struct hipe_process_state *p) { #define U(n,x) \ printf(" % 4d | %s | 0x%0*lx | %*s |\r\n", (int)offsetof(struct hipe_process_state,x), n, 2*(int)sizeof(long), (unsigned long)p->x, 2+2*(int)sizeof(long), "") U("nra ", nra); U("narity ", narity); #undef U }