/*
* %CopyrightBegin%
*
* Copyright Ericsson AB 2003-2011. All Rights Reserved.
*
* The contents of this file are subject to the Erlang Public License,
* Version 1.1, (the "License"); you may not use this file except in
* compliance with the License. You should have received a copy of the
* Erlang Public License along with this software. If not, it can be
* retrieved online at http://www.erlang.org/.
*
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
* the License for the specific language governing rights and limitations
* under the License.
*
* %CopyrightEnd%
*/
#include <stddef.h> /* offsetof() */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "global.h"
#include <sys/mman.h>
#include "hipe_arch.h"
#include "hipe_native_bif.h" /* nbif_callemu() */
#undef F_TIMO
#undef THE_NON_VALUE
#undef ERL_FUN_SIZE
#include "hipe_literals.h"
void hipe_patch_load_fe(Uint32 *address, Uint32 value)
{
/* address points to a disp32 or imm32 operand */
*address = value;
}
int hipe_patch_insn(void *address, Uint32 value, Eterm type)
{
switch (type) {
case am_closure:
case am_constant:
case am_atom:
case am_c_const:
break;
case am_x86_abs_pcrel:
value += (Uint)address;
break;
default:
return -1;
}
*(Uint32*)address = value;
return 0;
}
int hipe_patch_call(void *callAddress, void *destAddress, void *trampoline)
{
Uint rel32;
if (trampoline)
return -1;
rel32 = (Uint)destAddress - (Uint)callAddress - 4;
*(Uint32*)callAddress = rel32;
hipe_flush_icache_word(callAddress);
return 0;
}
/*
* Memory allocator for executable code.
*
* This is required on x86 because some combinations
* of Linux kernels and CPU generations default to
* non-executable memory mappings, causing ordinary
* malloc() memory to be non-executable.
*/
static unsigned int code_bytes;
static char *code_next;
#if 0 /* change to non-zero to get allocation statistics at exit() */
static unsigned int total_mapped, nr_joins, nr_splits, total_alloc, nr_allocs, nr_large, total_lost;
static unsigned int atexit_done;
static void alloc_code_stats(void)
{
printf("\r\nalloc_code_stats: %u bytes mapped, %u joins, %u splits, %u bytes allocated, %u average alloc, %u large allocs, %u bytes lost\r\n",
total_mapped, nr_joins, nr_splits, total_alloc, nr_allocs ? total_alloc/nr_allocs : 0, nr_large, total_lost);
}
static void atexit_alloc_code_stats(void)
{
if (!atexit_done) {
atexit_done = 1;
(void)atexit(alloc_code_stats);
}
}
#define ALLOC_CODE_STATS(X) do{X;}while(0)
#else
#define ALLOC_CODE_STATS(X) do{}while(0)
#endif
/* FreeBSD 6.1 and Darwin breakage */
#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
#define MAP_ANONYMOUS MAP_ANON
#endif
static void morecore(unsigned int alloc_bytes)
{
unsigned int map_bytes;
char *map_hint, *map_start;
/* Page-align the amount to allocate. */
map_bytes = (alloc_bytes + 4095) & ~4095;
/* Round up small allocations. */
if (map_bytes < 1024*1024)
map_bytes = 1024*1024;
else
ALLOC_CODE_STATS(++nr_large);
/* Create a new memory mapping, ensuring it is executable
and in the low 2GB of the address space. Also attempt
to make it adjacent to the previous mapping. */
map_hint = code_next + code_bytes;
if ((unsigned long)map_hint & 4095)
abort();
map_start = mmap(map_hint, map_bytes,
PROT_EXEC|PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS
#ifdef __x86_64__
|MAP_32BIT
#endif
,
-1, 0);
if (map_start == MAP_FAILED) {
perror("mmap");
abort();
}
ALLOC_CODE_STATS(total_mapped += map_bytes);
/* Merge adjacent mappings, so the trailing portion of the previous
mapping isn't lost. In practice this is quite successful. */
if (map_start == map_hint) {
ALLOC_CODE_STATS(++nr_joins);
code_bytes += map_bytes;
} else {
ALLOC_CODE_STATS(++nr_splits);
ALLOC_CODE_STATS(total_lost += code_bytes);
code_next = map_start;
code_bytes = map_bytes;
}
ALLOC_CODE_STATS(atexit_alloc_code_stats());
}
static void *alloc_code(unsigned int alloc_bytes)
{
void *res;
/* Align function entries. */
alloc_bytes = (alloc_bytes + 3) & ~3;
if (code_bytes < alloc_bytes)
morecore(alloc_bytes);
ALLOC_CODE_STATS(++nr_allocs);
ALLOC_CODE_STATS(total_alloc += alloc_bytes);
res = code_next;
code_next += alloc_bytes;
code_bytes -= alloc_bytes;
return res;
}
void *hipe_alloc_code(Uint nrbytes, Eterm callees, Eterm *trampolines, Process *p)
{
if (is_not_nil(callees))
return NULL;
*trampolines = NIL;
return alloc_code(nrbytes);
}
/* called from hipe_bif0.c:hipe_bifs_make_native_stub_2()
and hipe_bif0.c:hipe_make_stub() */
void *hipe_make_native_stub(void *beamAddress, unsigned int beamArity)
{
/*
* This creates a native code stub with the following contents:
*
* movl $Address, P_BEAM_IP(%ebp)
* movb $Arity, P_ARITY(%ebp)
* jmp callemu
*
* The stub has variable size, depending on whether the P_BEAM_IP
* and P_ARITY offsets fit in 8-bit signed displacements or not.
* The rel32 offset in the final jmp depends on its actual location,
* which also depends on the size of the previous instructions.
* Arity is stored with a movb because (a) Bj�rn tells me arities
* are <= 255, and (b) a movb is smaller and faster than a movl.
*/
unsigned int codeSize;
unsigned char *code, *codep;
unsigned int callEmuOffset;
codeSize = /* 16, 19, or 22 bytes */
16 + /* 16 when both offsets are 8-bit */
(P_BEAM_IP >= 128 ? 3 : 0) +
(P_ARITY >= 128 ? 3 : 0);
codep = code = alloc_code(codeSize);
/* movl $beamAddress, P_BEAM_IP(%ebp); 3 or 6 bytes, plus 4 */
codep[0] = 0xc7;
#if P_BEAM_IP >= 128
codep[1] = 0x85; /* disp32[EBP] */
codep[2] = P_BEAM_IP & 0xFF;
codep[3] = (P_BEAM_IP >> 8) & 0xFF;
codep[4] = (P_BEAM_IP >> 16) & 0xFF;
codep[5] = (P_BEAM_IP >> 24) & 0xFF;
codep += 6;
#else
codep[1] = 0x45; /* disp8[EBP] */
codep[2] = P_BEAM_IP;
codep += 3;
#endif
codep[0] = ((unsigned int)beamAddress) & 0xFF;
codep[1] = ((unsigned int)beamAddress >> 8) & 0xFF;
codep[2] = ((unsigned int)beamAddress >> 16) & 0xFF;
codep[3] = ((unsigned int)beamAddress >> 24) & 0xFF;
codep += 4;
/* movb $beamArity, P_ARITY(%ebp); 3 or 6 bytes */
codep[0] = 0xc6;
#if P_ARITY >= 128
codep[1] = 0x85; /* disp32[EBP] */
codep[2] = P_ARITY & 0xFF;
codep[3] = (P_ARITY >> 8) & 0xFF;
codep[4] = (P_ARITY >> 16) & 0xFF;
codep[5] = (P_ARITY >> 24) & 0xFF;
codep += 6;
#else
codep[1] = 0x45; /* disp8[EBP] */
codep[2] = P_ARITY;
codep += 3;
#endif
codep[0] = beamArity;
codep += 1;
/* jmp callemu; 5 bytes */
callEmuOffset = (unsigned char*)nbif_callemu - (code + codeSize);
codep[0] = 0xe9;
codep[1] = callEmuOffset & 0xFF;
codep[2] = (callEmuOffset >> 8) & 0xFF;
codep[3] = (callEmuOffset >> 16) & 0xFF;
codep[4] = (callEmuOffset >> 24) & 0xFF;
codep += 5;
ASSERT(codep == code + codeSize);
/* I-cache flush? */
return code;
}
void hipe_arch_print_pcb(struct hipe_process_state *p)
{
#define U(n,x) \
printf(" % 4d | %s | 0x%08x | |\r\n", (int)offsetof(struct hipe_process_state,x), n, (unsigned)p->x)
U("ncsp ", ncsp);
U("narity ", narity);
#undef U
}