/* * %CopyrightBegin% * * Copyright Ericsson AB 2005-2009. All Rights Reserved. * * The contents of this file are subject to the Erlang Public License, * Version 1.1, (the "License"); you may not use this file except in * compliance with the License. You should have received a copy of the * Erlang Public License along with this software. If not, it can be * retrieved online at http://www.erlang.org/. * * Software distributed under the License is distributed on an "AS IS" * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See * the License for the specific language governing rights and limitations * under the License. * * %CopyrightEnd% */ /* $Id$ */ #include <stddef.h> /* offsetof() */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "global.h" #include "erl_binary.h" #include <sys/mman.h> #include "hipe_arch.h" #include "hipe_native_bif.h" /* nbif_callemu() */ #include "hipe_bif0.h" /* Flush dcache and invalidate icache for a range of addresses. */ void hipe_flush_icache_range(void *address, unsigned int nbytes) { #if defined(__ARM_EABI__) register unsigned long beg __asm__("r0") = (unsigned long)address; register unsigned long end __asm__("r1") = (unsigned long)address + nbytes; register unsigned long flg __asm__("r2") = 0; register unsigned long scno __asm__("r7") = 0xf0002; __asm__ __volatile__("swi 0" /* sys_cacheflush() */ : "=r"(beg) : "0"(beg), "r"(end), "r"(flg), "r"(scno)); #else register unsigned long beg __asm__("r0") = (unsigned long)address; register unsigned long end __asm__("r1") = (unsigned long)address + nbytes; register unsigned long flg __asm__("r2") = 0; __asm__ __volatile__("swi 0x9f0002" /* sys_cacheflush() */ : "=r"(beg) : "0"(beg), "r"(end), "r"(flg)); #endif } void hipe_flush_icache_word(void *address) { hipe_flush_icache_range(address, 4); } /* * Management of 32MB code segments for regular code and trampolines. */ #define SEGMENT_NRBYTES (32*1024*1024) /* named constant, _not_ a tunable */ static struct segment { unsigned int *base; /* [base,base+32MB[ */ unsigned int *code_pos; /* INV: base <= code_pos <= tramp_pos */ unsigned int *tramp_pos; /* INV: tramp_pos <= base+32MB */ /* On ARM we always allocate a trampoline at base+32MB-8 for nbif_callemu, so tramp_pos <= base+32MB-8. */ } curseg; #define in_area(ptr,start,nbytes) \ ((unsigned long)((char*)(ptr) - (char*)(start)) < (nbytes)) static void *new_code_mapping(void) { return mmap(0, SEGMENT_NRBYTES, PROT_EXEC|PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); } static int check_callees(Eterm callees) { Eterm *tuple; Uint arity; Uint i; if (is_not_tuple(callees)) return -1; tuple = tuple_val(callees); arity = arityval(tuple[0]); for (i = 1; i <= arity; ++i) { Eterm mfa = tuple[i]; if (is_atom(mfa)) continue; if (is_not_tuple(mfa) || tuple_val(mfa)[0] != make_arityval(3) || is_not_atom(tuple_val(mfa)[1]) || is_not_atom(tuple_val(mfa)[2]) || is_not_small(tuple_val(mfa)[3]) || unsigned_val(tuple_val(mfa)[3]) > 255) return -1; } return arity; } static unsigned int *try_alloc(Uint nrwords, int nrcallees, Eterm callees, unsigned int **trampvec) { unsigned int *base, *address, *tramp_pos, nrfreewords; int trampnr; Eterm mfa, m, f; unsigned int a, *trampoline; m = NIL; f = NIL; a = 0; /* silence stupid compiler warning */ tramp_pos = curseg.tramp_pos; address = curseg.code_pos; nrfreewords = tramp_pos - address; if (nrwords > nrfreewords) return NULL; curseg.code_pos = address + nrwords; nrfreewords -= nrwords; base = curseg.base; for (trampnr = 1; trampnr <= nrcallees; ++trampnr) { mfa = tuple_val(callees)[trampnr]; if (is_atom(mfa)) trampoline = hipe_primop_get_trampoline(mfa); else { m = tuple_val(mfa)[1]; f = tuple_val(mfa)[2]; a = unsigned_val(tuple_val(mfa)[3]); trampoline = hipe_mfa_get_trampoline(m, f, a); } if (!in_area(trampoline, base, SEGMENT_NRBYTES)) { if (nrfreewords < 2) return NULL; nrfreewords -= 2; tramp_pos = trampoline = tramp_pos - 2; trampoline[0] = 0xE51FF004; /* ldr pc, [pc,#-4] */ trampoline[1] = 0; /* callee's address */ hipe_flush_icache_range(trampoline, 2*sizeof(int)); if (is_atom(mfa)) hipe_primop_set_trampoline(mfa, trampoline); else hipe_mfa_set_trampoline(m, f, a, trampoline); } trampvec[trampnr-1] = trampoline; } curseg.tramp_pos = tramp_pos; return address; } void *hipe_alloc_code(Uint nrbytes, Eterm callees, Eterm *trampolines, Process *p) { Uint nrwords; int nrcallees; Eterm trampvecbin; unsigned int **trampvec; unsigned int *address; unsigned int *base; struct segment oldseg; if (nrbytes & 0x3) return NULL; nrwords = nrbytes >> 2; nrcallees = check_callees(callees); if (nrcallees < 0) return NULL; trampvecbin = new_binary(p, NULL, nrcallees*sizeof(unsigned int*)); trampvec = (unsigned int**)binary_bytes(trampvecbin); address = try_alloc(nrwords, nrcallees, callees, trampvec); if (!address) { base = new_code_mapping(); if (base == MAP_FAILED) return NULL; oldseg = curseg; curseg.base = base; curseg.code_pos = base; curseg.tramp_pos = (unsigned int*)((char*)base + SEGMENT_NRBYTES); #if defined(__arm__) curseg.tramp_pos -= 2; curseg.tramp_pos[0] = 0xE51FF004; /* ldr pc, [pc,#-4] */ curseg.tramp_pos[1] = (unsigned int)&nbif_callemu; #endif address = try_alloc(nrwords, nrcallees, callees, trampvec); if (!address) { munmap(base, SEGMENT_NRBYTES); curseg = oldseg; return NULL; } /* commit to new segment, ignore leftover space in old segment */ } *trampolines = trampvecbin; return address; } static unsigned int *alloc_stub(Uint nrwords, unsigned int **tramp_callemu) { unsigned int *address; unsigned int *base; struct segment oldseg; address = try_alloc(nrwords, 0, NIL, NULL); if (!address) { base = new_code_mapping(); if (base == MAP_FAILED) return NULL; oldseg = curseg; curseg.base = base; curseg.code_pos = base; curseg.tramp_pos = (unsigned int*)((char*)base + SEGMENT_NRBYTES); #if defined(__arm__) curseg.tramp_pos -= 2; curseg.tramp_pos[0] = 0xE51FF004; /* ldr pc, [pc,#-4] */ curseg.tramp_pos[1] = (unsigned int)&nbif_callemu; #endif address = try_alloc(nrwords, 0, NIL, NULL); if (!address) { munmap(base, SEGMENT_NRBYTES); curseg = oldseg; return NULL; } /* commit to new segment, ignore leftover space in old segment */ } *tramp_callemu = (unsigned int*)((char*)curseg.base + SEGMENT_NRBYTES) - 2; return address; } /* * ARMv5's support for 32-bit immediates is effectively non-existent. * Hence, every 32-bit immediate is stored in memory and loaded via * a PC-relative addressing mode. Relocation entries refer to those * data words, NOT the load instructions, so patching is trivial. */ static void patch_imm32(Uint32 *address, unsigned int imm32) { *address = imm32; hipe_flush_icache_word(address); } void hipe_patch_load_fe(Uint32 *address, Uint value) { patch_imm32(address, value); } int hipe_patch_insn(void *address, Uint32 value, Eterm type) { switch (type) { case am_closure: case am_constant: case am_atom: case am_c_const: break; default: return -1; } patch_imm32((Uint32*)address, value); return 0; } /* called from hipe_bif0.c:hipe_bifs_make_native_stub_2() and hipe_bif0.c:hipe_make_stub() */ void *hipe_make_native_stub(void *beamAddress, unsigned int beamArity) { unsigned int *code; #if defined(__arm__) unsigned int *tramp_callemu; int callemu_offset; #endif /* * Native code calls BEAM via a stub looking as follows: * * mov r0, #beamArity * ldr r8, [pc,#0] // beamAddress * b nbif_callemu * .long beamAddress * * I'm using r0 and r8 since they aren't used for * parameter passing in native code. The branch to * nbif_callemu may need to go via a trampoline. * (Trampolines are allowed to modify r12, but they don't.) */ #if !defined(__arm__) /* verify that 'ba' can reach nbif_callemu */ if ((unsigned long)&nbif_callemu & ~0x01FFFFFCUL) abort(); #endif #if defined(__arm__) code = alloc_stub(4, &tramp_callemu); callemu_offset = ((int)&nbif_callemu - ((int)&code[2] + 8)) >> 2; if (!(callemu_offset >= -0x00800000 && callemu_offset <= 0x007FFFFF)) { callemu_offset = ((int)tramp_callemu - ((int)&code[2] + 8)) >> 2; if (!(callemu_offset >= -0x00800000 && callemu_offset <= 0x007FFFFF)) abort(); } #else code = alloc_stub(4, &trampoline); #endif #if defined(__arm__) /* mov r0, #beamArity */ code[0] = 0xE3A00000 | (beamArity & 0xFF); /* ldr r8, [pc,#0] // beamAddress */ code[1] = 0xE59F8000; /* b nbif_callemu */ code[2] = 0xEA000000 | (callemu_offset & 0x00FFFFFF); /* .long beamAddress */ code[3] = (unsigned int)beamAddress; #else /* addi r12,0,beamAddress@l */ code[0] = 0x39800000 | ((unsigned long)beamAddress & 0xFFFF); /* addi r0,0,beamArity */ code[1] = 0x38000000 | (beamArity & 0x7FFF); /* addis r12,r12,beamAddress@ha */ code[2] = 0x3D8C0000 | at_ha((unsigned long)beamAddress); /* ba nbif_callemu */ code[3] = 0x48000002 | (unsigned long)&nbif_callemu; #endif hipe_flush_icache_range(code, 4*sizeof(int)); return code; } static void patch_b(Uint32 *address, Sint32 offset, Uint32 AA) { Uint32 oldI = *address; #if defined(__arm__) Uint32 newI = (oldI & 0xFF000000) | (offset & 0x00FFFFFF); #else Uint32 newI = (oldI & 0xFC000001) | ((offset & 0x00FFFFFF) << 2) | (AA & 2); #endif *address = newI; hipe_flush_icache_word(address); } int hipe_patch_call(void *callAddress, void *destAddress, void *trampoline) { #if !defined(__arm__) if ((Uint32)destAddress == ((Uint32)destAddress & 0x01FFFFFC)) { /* The destination is in the [0,32MB[ range. We can reach it with a ba/bla instruction. This is the typical case for BIFs and primops. It's also common for trap-to-BEAM stubs (on ppc32). */ patch_b((Uint32*)callAddress, (Uint32)destAddress >> 2, 2); } else { #endif #if defined(__arm__) Sint32 destOffset = ((Sint32)destAddress - ((Sint32)callAddress+8)) >> 2; #else Sint32 destOffset = ((Sint32)destAddress - (Sint32)callAddress) >> 2; #endif if (destOffset >= -0x800000 && destOffset <= 0x7FFFFF) { /* The destination is within a [-32MB,+32MB[ range from us. We can reach it with a b/bl instruction. This is typical for nearby Erlang code. */ patch_b((Uint32*)callAddress, destOffset, 0); } else { /* The destination is too distant for b/bl/ba/bla. Must do a b/bl to the trampoline. */ #if defined(__arm__) Sint32 trampOffset = ((Sint32)trampoline - ((Sint32)callAddress+8)) >> 2; #else Sint32 trampOffset = ((Sint32)trampoline - (Sint32)callAddress) >> 2; #endif if (trampOffset >= -0x800000 && trampOffset <= 0x7FFFFF) { /* Update the trampoline's address computation. (May be redundant, but we can't tell.) */ #if defined(__arm__) patch_imm32((Uint32*)trampoline+1, (Uint32)destAddress); #else patch_li((Uint32*)trampoline, (Uint32)destAddress); #endif /* Update this call site. */ patch_b((Uint32*)callAddress, trampOffset, 0); } else return -1; } #if !defined(__arm__) } #endif return 0; } void hipe_arch_print_pcb(struct hipe_process_state *p) { #define U(n,x) \ printf(" % 4d | %s | 0x%0*lx | %*s |\r\n", (int)offsetof(struct hipe_process_state,x), n, 2*(int)sizeof(long), (unsigned long)p->x, 2+2*(int)sizeof(long), "") U("nra ", nra); U("narity ", narity); #undef U }