/*
* %CopyrightBegin%
*
* Copyright Ericsson AB 2005-2009. All Rights Reserved.
*
* The contents of this file are subject to the Erlang Public License,
* Version 1.1, (the "License"); you may not use this file except in
* compliance with the License. You should have received a copy of the
* Erlang Public License along with this software. If not, it can be
* retrieved online at http://www.erlang.org/.
*
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
* the License for the specific language governing rights and limitations
* under the License.
*
* %CopyrightEnd%
*/
/* $Id$
*/
#include <stddef.h> /* offsetof() */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "global.h"
#include "erl_binary.h"
#include <sys/mman.h>
#include "hipe_arch.h"
#include "hipe_native_bif.h" /* nbif_callemu() */
#include "hipe_bif0.h"
/* Flush dcache and invalidate icache for a range of addresses. */
void hipe_flush_icache_range(void *address, unsigned int nbytes)
{
#if defined(__ARM_EABI__)
register unsigned long beg __asm__("r0") = (unsigned long)address;
register unsigned long end __asm__("r1") = (unsigned long)address + nbytes;
register unsigned long flg __asm__("r2") = 0;
register unsigned long scno __asm__("r7") = 0xf0002;
__asm__ __volatile__("swi 0" /* sys_cacheflush() */
: "=r"(beg)
: "0"(beg), "r"(end), "r"(flg), "r"(scno));
#else
register unsigned long beg __asm__("r0") = (unsigned long)address;
register unsigned long end __asm__("r1") = (unsigned long)address + nbytes;
register unsigned long flg __asm__("r2") = 0;
__asm__ __volatile__("swi 0x9f0002" /* sys_cacheflush() */
: "=r"(beg)
: "0"(beg), "r"(end), "r"(flg));
#endif
}
void hipe_flush_icache_word(void *address)
{
hipe_flush_icache_range(address, 4);
}
/*
* Management of 32MB code segments for regular code and trampolines.
*/
#define SEGMENT_NRBYTES (32*1024*1024) /* named constant, _not_ a tunable */
static struct segment {
unsigned int *base; /* [base,base+32MB[ */
unsigned int *code_pos; /* INV: base <= code_pos <= tramp_pos */
unsigned int *tramp_pos; /* INV: tramp_pos <= base+32MB */
/* On ARM we always allocate a trampoline at base+32MB-8 for
nbif_callemu, so tramp_pos <= base+32MB-8. */
} curseg;
#define in_area(ptr,start,nbytes) \
((unsigned long)((char*)(ptr) - (char*)(start)) < (nbytes))
static void *new_code_mapping(void)
{
return mmap(0, SEGMENT_NRBYTES,
PROT_EXEC|PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS,
-1, 0);
}
static int check_callees(Eterm callees)
{
Eterm *tuple;
Uint arity;
Uint i;
if (is_not_tuple(callees))
return -1;
tuple = tuple_val(callees);
arity = arityval(tuple[0]);
for (i = 1; i <= arity; ++i) {
Eterm mfa = tuple[i];
if (is_atom(mfa))
continue;
if (is_not_tuple(mfa) ||
tuple_val(mfa)[0] != make_arityval(3) ||
is_not_atom(tuple_val(mfa)[1]) ||
is_not_atom(tuple_val(mfa)[2]) ||
is_not_small(tuple_val(mfa)[3]) ||
unsigned_val(tuple_val(mfa)[3]) > 255)
return -1;
}
return arity;
}
static unsigned int *try_alloc(Uint nrwords, int nrcallees, Eterm callees, unsigned int **trampvec)
{
unsigned int *base, *address, *tramp_pos, nrfreewords;
int trampnr;
Eterm mfa, m, f;
unsigned int a, *trampoline;
m = NIL; f = NIL; a = 0; /* silence stupid compiler warning */
tramp_pos = curseg.tramp_pos;
address = curseg.code_pos;
nrfreewords = tramp_pos - address;
if (nrwords > nrfreewords)
return NULL;
curseg.code_pos = address + nrwords;
nrfreewords -= nrwords;
base = curseg.base;
for (trampnr = 1; trampnr <= nrcallees; ++trampnr) {
mfa = tuple_val(callees)[trampnr];
if (is_atom(mfa))
trampoline = hipe_primop_get_trampoline(mfa);
else {
m = tuple_val(mfa)[1];
f = tuple_val(mfa)[2];
a = unsigned_val(tuple_val(mfa)[3]);
trampoline = hipe_mfa_get_trampoline(m, f, a);
}
if (!in_area(trampoline, base, SEGMENT_NRBYTES)) {
if (nrfreewords < 2)
return NULL;
nrfreewords -= 2;
tramp_pos = trampoline = tramp_pos - 2;
trampoline[0] = 0xE51FF004; /* ldr pc, [pc,#-4] */
trampoline[1] = 0; /* callee's address */
hipe_flush_icache_range(trampoline, 2*sizeof(int));
if (is_atom(mfa))
hipe_primop_set_trampoline(mfa, trampoline);
else
hipe_mfa_set_trampoline(m, f, a, trampoline);
}
trampvec[trampnr-1] = trampoline;
}
curseg.tramp_pos = tramp_pos;
return address;
}
void *hipe_alloc_code(Uint nrbytes, Eterm callees, Eterm *trampolines, Process *p)
{
Uint nrwords;
int nrcallees;
Eterm trampvecbin;
unsigned int **trampvec;
unsigned int *address;
unsigned int *base;
struct segment oldseg;
if (nrbytes & 0x3)
return NULL;
nrwords = nrbytes >> 2;
nrcallees = check_callees(callees);
if (nrcallees < 0)
return NULL;
trampvecbin = new_binary(p, NULL, nrcallees*sizeof(unsigned int*));
trampvec = (unsigned int**)binary_bytes(trampvecbin);
address = try_alloc(nrwords, nrcallees, callees, trampvec);
if (!address) {
base = new_code_mapping();
if (base == MAP_FAILED)
return NULL;
oldseg = curseg;
curseg.base = base;
curseg.code_pos = base;
curseg.tramp_pos = (unsigned int*)((char*)base + SEGMENT_NRBYTES);
#if defined(__arm__)
curseg.tramp_pos -= 2;
curseg.tramp_pos[0] = 0xE51FF004; /* ldr pc, [pc,#-4] */
curseg.tramp_pos[1] = (unsigned int)&nbif_callemu;
#endif
address = try_alloc(nrwords, nrcallees, callees, trampvec);
if (!address) {
munmap(base, SEGMENT_NRBYTES);
curseg = oldseg;
return NULL;
}
/* commit to new segment, ignore leftover space in old segment */
}
*trampolines = trampvecbin;
return address;
}
static unsigned int *alloc_stub(Uint nrwords, unsigned int **tramp_callemu)
{
unsigned int *address;
unsigned int *base;
struct segment oldseg;
address = try_alloc(nrwords, 0, NIL, NULL);
if (!address) {
base = new_code_mapping();
if (base == MAP_FAILED)
return NULL;
oldseg = curseg;
curseg.base = base;
curseg.code_pos = base;
curseg.tramp_pos = (unsigned int*)((char*)base + SEGMENT_NRBYTES);
#if defined(__arm__)
curseg.tramp_pos -= 2;
curseg.tramp_pos[0] = 0xE51FF004; /* ldr pc, [pc,#-4] */
curseg.tramp_pos[1] = (unsigned int)&nbif_callemu;
#endif
address = try_alloc(nrwords, 0, NIL, NULL);
if (!address) {
munmap(base, SEGMENT_NRBYTES);
curseg = oldseg;
return NULL;
}
/* commit to new segment, ignore leftover space in old segment */
}
*tramp_callemu = (unsigned int*)((char*)curseg.base + SEGMENT_NRBYTES) - 2;
return address;
}
/*
* ARMv5's support for 32-bit immediates is effectively non-existent.
* Hence, every 32-bit immediate is stored in memory and loaded via
* a PC-relative addressing mode. Relocation entries refer to those
* data words, NOT the load instructions, so patching is trivial.
*/
static void patch_imm32(Uint32 *address, unsigned int imm32)
{
*address = imm32;
hipe_flush_icache_word(address);
}
void hipe_patch_load_fe(Uint32 *address, Uint value)
{
patch_imm32(address, value);
}
int hipe_patch_insn(void *address, Uint32 value, Eterm type)
{
switch (type) {
case am_closure:
case am_constant:
case am_atom:
case am_c_const:
break;
default:
return -1;
}
patch_imm32((Uint32*)address, value);
return 0;
}
/* called from hipe_bif0.c:hipe_bifs_make_native_stub_2()
and hipe_bif0.c:hipe_make_stub() */
void *hipe_make_native_stub(void *beamAddress, unsigned int beamArity)
{
unsigned int *code;
#if defined(__arm__)
unsigned int *tramp_callemu;
int callemu_offset;
#endif
/*
* Native code calls BEAM via a stub looking as follows:
*
* mov r0, #beamArity
* ldr r8, [pc,#0] // beamAddress
* b nbif_callemu
* .long beamAddress
*
* I'm using r0 and r8 since they aren't used for
* parameter passing in native code. The branch to
* nbif_callemu may need to go via a trampoline.
* (Trampolines are allowed to modify r12, but they don't.)
*/
#if !defined(__arm__)
/* verify that 'ba' can reach nbif_callemu */
if ((unsigned long)&nbif_callemu & ~0x01FFFFFCUL)
abort();
#endif
#if defined(__arm__)
code = alloc_stub(4, &tramp_callemu);
callemu_offset = ((int)&nbif_callemu - ((int)&code[2] + 8)) >> 2;
if (!(callemu_offset >= -0x00800000 && callemu_offset <= 0x007FFFFF)) {
callemu_offset = ((int)tramp_callemu - ((int)&code[2] + 8)) >> 2;
if (!(callemu_offset >= -0x00800000 && callemu_offset <= 0x007FFFFF))
abort();
}
#else
code = alloc_stub(4, &trampoline);
#endif
#if defined(__arm__)
/* mov r0, #beamArity */
code[0] = 0xE3A00000 | (beamArity & 0xFF);
/* ldr r8, [pc,#0] // beamAddress */
code[1] = 0xE59F8000;
/* b nbif_callemu */
code[2] = 0xEA000000 | (callemu_offset & 0x00FFFFFF);
/* .long beamAddress */
code[3] = (unsigned int)beamAddress;
#else
/* addi r12,0,beamAddress@l */
code[0] = 0x39800000 | ((unsigned long)beamAddress & 0xFFFF);
/* addi r0,0,beamArity */
code[1] = 0x38000000 | (beamArity & 0x7FFF);
/* addis r12,r12,beamAddress@ha */
code[2] = 0x3D8C0000 | at_ha((unsigned long)beamAddress);
/* ba nbif_callemu */
code[3] = 0x48000002 | (unsigned long)&nbif_callemu;
#endif
hipe_flush_icache_range(code, 4*sizeof(int));
return code;
}
static void patch_b(Uint32 *address, Sint32 offset, Uint32 AA)
{
Uint32 oldI = *address;
#if defined(__arm__)
Uint32 newI = (oldI & 0xFF000000) | (offset & 0x00FFFFFF);
#else
Uint32 newI = (oldI & 0xFC000001) | ((offset & 0x00FFFFFF) << 2) | (AA & 2);
#endif
*address = newI;
hipe_flush_icache_word(address);
}
int hipe_patch_call(void *callAddress, void *destAddress, void *trampoline)
{
#if !defined(__arm__)
if ((Uint32)destAddress == ((Uint32)destAddress & 0x01FFFFFC)) {
/* The destination is in the [0,32MB[ range.
We can reach it with a ba/bla instruction.
This is the typical case for BIFs and primops.
It's also common for trap-to-BEAM stubs (on ppc32). */
patch_b((Uint32*)callAddress, (Uint32)destAddress >> 2, 2);
} else {
#endif
#if defined(__arm__)
Sint32 destOffset = ((Sint32)destAddress - ((Sint32)callAddress+8)) >> 2;
#else
Sint32 destOffset = ((Sint32)destAddress - (Sint32)callAddress) >> 2;
#endif
if (destOffset >= -0x800000 && destOffset <= 0x7FFFFF) {
/* The destination is within a [-32MB,+32MB[ range from us.
We can reach it with a b/bl instruction.
This is typical for nearby Erlang code. */
patch_b((Uint32*)callAddress, destOffset, 0);
} else {
/* The destination is too distant for b/bl/ba/bla.
Must do a b/bl to the trampoline. */
#if defined(__arm__)
Sint32 trampOffset = ((Sint32)trampoline - ((Sint32)callAddress+8)) >> 2;
#else
Sint32 trampOffset = ((Sint32)trampoline - (Sint32)callAddress) >> 2;
#endif
if (trampOffset >= -0x800000 && trampOffset <= 0x7FFFFF) {
/* Update the trampoline's address computation.
(May be redundant, but we can't tell.) */
#if defined(__arm__)
patch_imm32((Uint32*)trampoline+1, (Uint32)destAddress);
#else
patch_li((Uint32*)trampoline, (Uint32)destAddress);
#endif
/* Update this call site. */
patch_b((Uint32*)callAddress, trampOffset, 0);
} else
return -1;
}
#if !defined(__arm__)
}
#endif
return 0;
}
void hipe_arch_print_pcb(struct hipe_process_state *p)
{
#define U(n,x) \
printf(" % 4d | %s | 0x%0*lx | %*s |\r\n", (int)offsetof(struct hipe_process_state,x), n, 2*(int)sizeof(long), (unsigned long)p->x, 2+2*(int)sizeof(long), "")
U("nra ", nra);
U("narity ", narity);
#undef U
}