/* * %CopyrightBegin% * * Copyright Ericsson AB 2005-2016. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * %CopyrightEnd% */ #ifdef __arm__ #include /* offsetof() */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "global.h" #include "erl_binary.h" #include "hipe_arch.h" #include "hipe_native_bif.h" /* nbif_callemu() */ #include "hipe_bif0.h" #ifndef __has_builtin # define __has_builtin(x) 0 #endif /* Flush dcache and invalidate icache for a range of addresses. */ void hipe_flush_icache_range(void *address, unsigned int nbytes) { void* end = (char*)address + nbytes; #if ERTS_AT_LEAST_GCC_VSN__(4, 3, 0) || __has_builtin(__builtin___clear_cache) __builtin___clear_cache(address, end); #elif defined(__clang__) void __clear_cache(void *start, void *end); __clear_cache(address, end); #elif defined(__linux__) # if defined(__ARM_EABI__) register unsigned long beg __asm__("r0") = (unsigned long)address; register unsigned long end __asm__("r1") = (unsigned long)end; register unsigned long flg __asm__("r2") = 0; register unsigned long scno __asm__("r7") = 0xf0002; __asm__ __volatile__("swi 0" /* sys_cacheflush() */ : "=r"(beg) : "0"(beg), "r"(end), "r"(flg), "r"(scno)); # else register unsigned long beg __asm__("r0") = (unsigned long)address; register unsigned long end __asm__("r1") = (unsigned long)end; register unsigned long flg __asm__("r2") = 0; __asm__ __volatile__("swi 0x9f0002" /* sys_cacheflush() */ : "=r"(beg) : "0"(beg), "r"(end), "r"(flg)); # endif #else # error "Don't know how to flush instruction cache" #endif } void hipe_flush_icache_word(void *address) { hipe_flush_icache_range(address, 4); } static int check_callees(Eterm callees) { Eterm *tuple; Uint arity; Uint i; if (is_not_tuple(callees)) return -1; tuple = tuple_val(callees); arity = arityval(tuple[0]); for (i = 1; i <= arity; ++i) { Eterm mfa = tuple[i]; if (is_atom(mfa)) continue; if (is_not_tuple(mfa) || tuple_val(mfa)[0] != make_arityval(3) || is_not_atom(tuple_val(mfa)[1]) || is_not_atom(tuple_val(mfa)[2]) || is_not_small(tuple_val(mfa)[3]) || unsigned_val(tuple_val(mfa)[3]) > 255) return -1; } return arity; } #define TRAMPOLINE_WORDS 2 static void generate_trampolines(Uint32* address, int nrcallees, Eterm callees, Uint32** trampvec) { Uint32* trampoline = address; int i; for (i = 0; i < nrcallees; ++i) { trampoline[0] = 0xE51FF004; /* ldr pc, [pc,#-4] */ trampoline[1] = 0; /* callee's address */ trampvec[i] = trampoline; trampoline += TRAMPOLINE_WORDS; } hipe_flush_icache_range(address, nrcallees*2*sizeof(Uint32)); } void *hipe_alloc_code(Uint nrbytes, Eterm callees, Eterm *trampolines, Process *p) { Uint code_words; int nrcallees; Eterm trampvecbin; Uint32 **trampvec; Uint32 *address; if (nrbytes & 0x3) return NULL; code_words = nrbytes / sizeof(Uint32); nrcallees = check_callees(callees); if (nrcallees < 0) return NULL; trampvecbin = new_binary(p, NULL, nrcallees*sizeof(Uint32*)); trampvec = (Uint32**)binary_bytes(trampvecbin); address = erts_alloc(ERTS_ALC_T_HIPE_EXEC, (code_words + nrcallees*TRAMPOLINE_WORDS)*sizeof(Uint32)); generate_trampolines(address + code_words, nrcallees, callees, trampvec); *trampolines = trampvecbin; return address; } void hipe_free_code(void* code, unsigned int bytes) { erts_free(ERTS_ALC_T_HIPE_EXEC, code); } /* * ARMv5's support for 32-bit immediates is effectively non-existent. * Hence, every 32-bit immediate is stored in memory and loaded via * a PC-relative addressing mode. Relocation entries refer to those * data words, NOT the load instructions, so patching is trivial. */ static void patch_imm32(Uint32 *address, unsigned int imm32) { *address = imm32; hipe_flush_icache_word(address); } void hipe_patch_load_fe(Uint32 *address, Uint value) { patch_imm32(address, value); } int hipe_patch_insn(void *address, Uint32 value, Eterm type) { switch (type) { case am_closure: case am_constant: case am_atom: case am_c_const: break; default: return -1; } patch_imm32((Uint32*)address, value); return 0; } /* Make stub for native code calling exported beam function */ void *hipe_make_native_stub(void *callee_exp, unsigned int beamArity) { unsigned int *code; int callemu_offset; int is_short_jmp; /* * Native code calls BEAM via a stub looking as follows: * * mov r0, #beamArity * ldr r8, [pc,#0] // callee_exp * b nbif_callemu * .long callee_exp * * or if nbif_callemu is too far away: * * mov r0, #beamArity * ldr r8, [pc,#0] // callee_exp * ldr pc, [pc,#0] // nbif_callemu * .long callee_exp * .long nbif_callemu * * I'm using r0 and r8 since they aren't used for * parameter passing in native code. */ code = erts_alloc(ERTS_ALC_T_HIPE_EXEC, 5*sizeof(Uint32)); if (!code) return NULL; callemu_offset = ((int)&nbif_callemu - ((int)&code[2] + 8)) >> 2; is_short_jmp = (callemu_offset >= -0x00800000 && callemu_offset <= 0x007FFFFF); #ifdef DEBUG if (is_short_jmp && (callemu_offset % 3)==0) { is_short_jmp = 0; } #endif /* mov r0, #beamArity */ code[0] = 0xE3A00000 | (beamArity & 0xFF); /* ldr r8, [pc,#0] // callee_exp */ code[1] = 0xE59F8000; if (is_short_jmp) { /* b nbif_callemu */ code[2] = 0xEA000000 | (callemu_offset & 0x00FFFFFF); } else { /* ldr pc, [pc,#0] // nbif_callemu */ code[2] = 0xE59FF000; /* .long nbif_callemu */ code[4] = (unsigned int)&nbif_callemu; } /* .long callee_exp */ code[3] = (unsigned int)callee_exp; hipe_flush_icache_range(code, 5*sizeof(Uint32)); return code; } void hipe_free_native_stub(void* stub) { erts_free(ERTS_ALC_T_HIPE_EXEC, stub); } static void patch_b(Uint32 *address, Sint32 offset, Uint32 AA) { Uint32 oldI = *address; Uint32 newI = (oldI & 0xFF000000) | (offset & 0x00FFFFFF); *address = newI; hipe_flush_icache_word(address); } int hipe_patch_call(void *callAddress, void *destAddress, void *trampoline) { Sint32 destOffset = ((Sint32)destAddress - ((Sint32)callAddress+8)) >> 2; if (destOffset >= -0x800000 && destOffset <= 0x7FFFFF) { /* The destination is within a [-32MB,+32MB[ range from us. We can reach it with a b/bl instruction. This is typical for nearby Erlang code. */ patch_b((Uint32*)callAddress, destOffset, 0); } else { /* The destination is too distant for b/bl. Must do a b/bl to the trampoline. */ Sint32 trampOffset = ((Sint32)trampoline - ((Sint32)callAddress+8)) >> 2; if (trampOffset >= -0x800000 && trampOffset <= 0x7FFFFF) { /* Update the trampoline's address computation. (May be redundant, but we can't tell.) */ patch_imm32((Uint32*)trampoline+1, (Uint32)destAddress); /* Update this call site. */ patch_b((Uint32*)callAddress, trampOffset, 0); } else return -1; } return 0; } void hipe_arch_print_pcb(struct hipe_process_state *p) { #define U(n,x) \ printf(" % 4d | %s | 0x%0*lx | %*s |\r\n", (int)offsetof(struct hipe_process_state,x), n, 2*(int)sizeof(long), (unsigned long)p->x, 2+2*(int)sizeof(long), "") U("nra ", nra); U("narity ", narity); #undef U } #endif /*__arm__*/