aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator/hipe/hipe_ppc.c
diff options
context:
space:
mode:
authorErlang/OTP <[email protected]>2009-11-20 14:54:40 +0000
committerErlang/OTP <[email protected]>2009-11-20 14:54:40 +0000
commit84adefa331c4159d432d22840663c38f155cd4c1 (patch)
treebff9a9c66adda4df2106dfd0e5c053ab182a12bd /erts/emulator/hipe/hipe_ppc.c
downloadotp-84adefa331c4159d432d22840663c38f155cd4c1.tar.gz
otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.bz2
otp-84adefa331c4159d432d22840663c38f155cd4c1.zip
The R13B03 release.OTP_R13B03
Diffstat (limited to 'erts/emulator/hipe/hipe_ppc.c')
-rw-r--r--erts/emulator/hipe/hipe_ppc.c487
1 files changed, 487 insertions, 0 deletions
diff --git a/erts/emulator/hipe/hipe_ppc.c b/erts/emulator/hipe/hipe_ppc.c
new file mode 100644
index 0000000000..3a0beedb68
--- /dev/null
+++ b/erts/emulator/hipe/hipe_ppc.c
@@ -0,0 +1,487 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2004-2009. All Rights Reserved.
+ *
+ * The contents of this file are subject to the Erlang Public License,
+ * Version 1.1, (the "License"); you may not use this file except in
+ * compliance with the License. You should have received a copy of the
+ * Erlang Public License along with this software. If not, it can be
+ * retrieved online at http://www.erlang.org/.
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * %CopyrightEnd%
+ */
+/* $Id$
+ */
+#include <stddef.h> /* offsetof() */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#include "global.h"
+#include "erl_binary.h"
+#include <sys/mman.h>
+
+#include "hipe_arch.h"
+#include "hipe_native_bif.h" /* nbif_callemu() */
+#include "hipe_bif0.h"
+
+#if !defined(__powerpc64__)
+const unsigned int fconv_constant[2] = { 0x43300000, 0x80000000 };
+#endif
+
+/* Flush dcache and invalidate icache for a range of addresses. */
+void hipe_flush_icache_range(void *address, unsigned int nbytes)
+{
+ const unsigned int L1_CACHE_SHIFT = 5;
+ const unsigned long L1_CACHE_BYTES = 1 << L1_CACHE_SHIFT;
+ unsigned long start, p;
+ unsigned int nlines, n;
+
+ if (!nbytes)
+ return;
+
+ start = (unsigned long)address & ~(L1_CACHE_BYTES-1);
+ nlines =
+ (((unsigned long)address & (L1_CACHE_BYTES-1))
+ + nbytes
+ + (L1_CACHE_BYTES-1)) >> L1_CACHE_SHIFT;
+
+ p = start;
+ n = nlines;
+ do {
+ asm volatile("dcbst 0,%0" : : "r"(p) : "memory");
+ p += L1_CACHE_BYTES;
+ } while (--n != 0);
+ asm volatile("sync");
+ p = start;
+ n = nlines;
+ do {
+ asm volatile("icbi 0,%0" : : "r"(p) : "memory");
+ p += L1_CACHE_BYTES;
+ } while (--n != 0);
+ asm volatile("sync\n\tisync");
+}
+
+/*
+ * Management of 32MB code segments for regular code and trampolines.
+ */
+
+#define SEGMENT_NRBYTES (32*1024*1024) /* named constant, _not_ a tunable */
+
+static struct segment {
+ unsigned int *base; /* [base,base+32MB[ */
+ unsigned int *code_pos; /* INV: base <= code_pos <= tramp_pos */
+ unsigned int *tramp_pos; /* INV: tramp_pos <= base+32MB */
+} curseg;
+
+#define in_area(ptr,start,nbytes) \
+ ((unsigned long)((char*)(ptr) - (char*)(start)) < (nbytes))
+
+/* Darwin breakage */
+#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+
+#if defined(__powerpc64__)
+static void *new_code_mapping(void)
+{
+ char *map_hint, *map_start;
+
+ /*
+ * Allocate a new 32MB code segment in the low 2GB of the address space.
+ *
+ * This is problematic for several reasons:
+ * - Linux/ppc64 lacks the MAP_32BIT flag that Linux/x86-64 has.
+ * - The address space hint to mmap is only respected if that
+ * area is available. If it isn't, then mmap falls back to its
+ * defaults, which (according to testing) results in very high
+ * (and thus useless for us) addresses being returned.
+ * - Another mapping, presumably the brk, also occupies low addresses.
+ *
+ * As initial implementation, simply start allocating at the 0.5GB
+ * boundary. This leaves plenty of space for the brk before malloc
+ * needs to switch to mmap, while allowing for 1.5GB of code.
+ *
+ * A more robust implementation would be to parse /proc/self/maps,
+ * reserve all available space between (say) 0.5GB and 2GB with
+ * PROT_NONE MAP_NORESERVE mappings, and then allocate by releasing
+ * 32MB segments and re-mapping them properly. This would work on
+ * Linux/ppc64, I have no idea how things should be done on Darwin64.
+ */
+ if (curseg.base)
+ map_hint = (char*)curseg.base + SEGMENT_NRBYTES;
+ else
+ map_hint = (char*)(512*1024*1024); /* 0.5GB */
+ map_start = mmap(map_hint, SEGMENT_NRBYTES,
+ PROT_EXEC|PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANONYMOUS,
+ -1, 0);
+ if (map_start != MAP_FAILED &&
+ (((unsigned long)map_start + (SEGMENT_NRBYTES-1)) & ~0x7FFFFFFFUL)) {
+ fprintf(stderr, "mmap with hint %p returned code memory %p\r\n", map_hint, map_start);
+ abort();
+ }
+ return map_start;
+}
+#else
+static void *new_code_mapping(void)
+{
+ return mmap(0, SEGMENT_NRBYTES,
+ PROT_EXEC|PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANONYMOUS,
+ -1, 0);
+}
+#endif
+
+static int check_callees(Eterm callees)
+{
+ Eterm *tuple;
+ Uint arity;
+ Uint i;
+
+ if (is_not_tuple(callees))
+ return -1;
+ tuple = tuple_val(callees);
+ arity = arityval(tuple[0]);
+ for (i = 1; i <= arity; ++i) {
+ Eterm mfa = tuple[i];
+ if (is_not_tuple(mfa) ||
+ tuple_val(mfa)[0] != make_arityval(3) ||
+ is_not_atom(tuple_val(mfa)[1]) ||
+ is_not_atom(tuple_val(mfa)[2]) ||
+ is_not_small(tuple_val(mfa)[3]) ||
+ unsigned_val(tuple_val(mfa)[3]) > 255)
+ return -1;
+ }
+ return arity;
+}
+
+static unsigned int *try_alloc(Uint nrwords, int nrcallees, Eterm callees, unsigned int **trampvec)
+{
+ unsigned int *base, *address, *tramp_pos, nrfreewords;
+ int trampnr;
+
+ tramp_pos = curseg.tramp_pos;
+ address = curseg.code_pos;
+ nrfreewords = tramp_pos - address;
+ if (nrwords > nrfreewords)
+ return NULL;
+ curseg.code_pos = address + nrwords;
+ nrfreewords -= nrwords;
+
+ base = curseg.base;
+ for (trampnr = 1; trampnr <= nrcallees; ++trampnr) {
+ Eterm mfa = tuple_val(callees)[trampnr];
+ Eterm m = tuple_val(mfa)[1];
+ Eterm f = tuple_val(mfa)[2];
+ unsigned int a = unsigned_val(tuple_val(mfa)[3]);
+ unsigned int *trampoline = hipe_mfa_get_trampoline(m, f, a);
+ if (!in_area(trampoline, base, SEGMENT_NRBYTES)) {
+ if (nrfreewords < 4)
+ return NULL;
+ nrfreewords -= 4;
+ tramp_pos = trampoline = tramp_pos - 4;
+#if defined(__powerpc64__)
+ trampoline[0] = 0x3D600000; /* addis r11,0,0 */
+ trampoline[1] = 0x616B0000; /* ori r11,r11,0 */
+#else
+ trampoline[0] = 0x39600000; /* addi r11,r0,0 */
+ trampoline[1] = 0x3D6B0000; /* addis r11,r11,0 */
+#endif
+ trampoline[2] = 0x7D6903A6; /* mtctr r11 */
+ trampoline[3] = 0x4E800420; /* bctr */
+ hipe_flush_icache_range(trampoline, 4*sizeof(int));
+ hipe_mfa_set_trampoline(m, f, a, trampoline);
+ }
+ trampvec[trampnr-1] = trampoline;
+ }
+ curseg.tramp_pos = tramp_pos;
+ return address;
+}
+
+void *hipe_alloc_code(Uint nrbytes, Eterm callees, Eterm *trampolines, Process *p)
+{
+ Uint nrwords;
+ int nrcallees;
+ Eterm trampvecbin;
+ unsigned int **trampvec;
+ unsigned int *address;
+ unsigned int *base;
+ struct segment oldseg;
+
+ if (nrbytes & 0x3)
+ return NULL;
+ nrwords = nrbytes >> 2;
+
+ nrcallees = check_callees(callees);
+ if (nrcallees < 0)
+ return NULL;
+ trampvecbin = new_binary(p, NULL, nrcallees*sizeof(unsigned int*));
+ trampvec = (unsigned int**)binary_bytes(trampvecbin);
+
+ address = try_alloc(nrwords, nrcallees, callees, trampvec);
+ if (!address) {
+ base = new_code_mapping();
+ if (base == MAP_FAILED)
+ return NULL;
+ oldseg = curseg;
+ curseg.base = base;
+ curseg.code_pos = base;
+ curseg.tramp_pos = (unsigned int*)((char*)base + SEGMENT_NRBYTES);
+
+ address = try_alloc(nrwords, nrcallees, callees, trampvec);
+ if (!address) {
+ munmap(base, SEGMENT_NRBYTES);
+ curseg = oldseg;
+ return NULL;
+ }
+ /* commit to new segment, ignore leftover space in old segment */
+ }
+ *trampolines = trampvecbin;
+ return address;
+}
+
+static unsigned int *alloc_stub(Uint nrwords)
+{
+ unsigned int *address;
+ unsigned int *base;
+ struct segment oldseg;
+
+ address = try_alloc(nrwords, 0, NIL, NULL);
+ if (!address) {
+ base = new_code_mapping();
+ if (base == MAP_FAILED)
+ return NULL;
+ oldseg = curseg;
+ curseg.base = base;
+ curseg.code_pos = base;
+ curseg.tramp_pos = (unsigned int*)((char*)base + SEGMENT_NRBYTES);
+
+ address = try_alloc(nrwords, 0, NIL, NULL);
+ if (!address) {
+ munmap(base, SEGMENT_NRBYTES);
+ curseg = oldseg;
+ return NULL;
+ }
+ /* commit to new segment, ignore leftover space in old segment */
+ }
+ return address;
+}
+
+static void patch_imm16(Uint32 *address, unsigned int imm16)
+{
+ unsigned int insn = *address;
+ *address = (insn & ~0xFFFF) | (imm16 & 0xFFFF);
+ hipe_flush_icache_word(address);
+}
+
+#if defined(__powerpc64__)
+static void patch_li64(Uint32 *address, Uint64 value)
+{
+ patch_imm16(address+0, value >> 48);/* addis r,0,value@highest */
+ patch_imm16(address+1, value >> 32);/* ori r,r,value@higher */
+ /* sldi r,r,32 */
+ patch_imm16(address+3, value >> 16);/* oris r,r,value@h */
+ patch_imm16(address+4, value); /* ori r,r,value@l */
+}
+
+static int patch_li31(Uint32 *address, Uint32 value)
+{
+ if ((value >> 31) != 0)
+ return -1;
+ patch_imm16(address, value >> 16); /* addis r,0,value@h */
+ patch_imm16(address+1, value); /* ori r,r,value@l */
+}
+
+void hipe_patch_load_fe(Uint *address, Uint value)
+{
+ patch_li64((Uint32*)address, value);
+}
+
+int hipe_patch_insn(void *address, Uint64 value, Eterm type)
+{
+ switch (type) {
+ case am_closure:
+ case am_constant:
+ patch_li64((Uint32*)address, value);
+ return 0;
+ case am_atom:
+ case am_c_const:
+ return patch_li31((Uint32*)address, value);
+ default:
+ return -1;
+ }
+}
+
+void *hipe_make_native_stub(void *beamAddress, unsigned int beamArity)
+{
+ unsigned int *code;
+
+ if ((unsigned long)&nbif_callemu & ~0x01FFFFFCUL)
+ abort();
+
+ code = alloc_stub(7);
+
+ /* addis r12,0,beamAddress@highest */
+ code[0] = 0x3d800000 | (((unsigned long)beamAddress >> 48) & 0xffff);
+ /* ori r12,r12,beamAddress@higher */
+ code[1] = 0x618c0000 | (((unsigned long)beamAddress >> 32) & 0xffff);
+ /* sldi r12,r12,32 (rldicr r12,r12,32,31) */
+ code[2] = 0x798c07c6;
+ /* oris r12,r12,beamAddress@h */
+ code[3] = 0x658c0000 | (((unsigned long)beamAddress >> 16) & 0xffff);
+ /* ori r12,r12,beamAddress@l */
+ code[4] = 0x618c0000 | ((unsigned long)beamAddress & 0xffff);
+ /* addi r0,0,beamArity */
+ code[5] = 0x38000000 | (beamArity & 0x7FFF);
+ /* ba nbif_callemu */
+ code[6] = 0x48000002 | (unsigned long)&nbif_callemu;
+
+ hipe_flush_icache_range(code, 7*sizeof(int));
+
+ return code;
+}
+#else /* !__powerpc64__ */
+/*
+ * To load a 32-bit immediate value 'val' into Rd (Rd != R0):
+ *
+ * addi Rd, 0, val@l // val & 0xFFFF
+ * addis Rd, Rd, val@ha // ((val + 0x8000) >> 16) & 0xFFFF
+ *
+ * The first addi sign-extends the low 16 bits, so if
+ * val&(1<<15), the high portion of Rd will be -1 not 0.
+ * val@ha compensates by adding 1 if val&(1<<15).
+ */
+static unsigned int at_ha(unsigned int val)
+{
+ return ((val + 0x8000) >> 16) & 0xFFFF;
+}
+
+static void patch_li(Uint32 *address, Uint32 value)
+{
+ patch_imm16(address, value);
+ patch_imm16(address+1, at_ha(value));
+}
+
+void hipe_patch_load_fe(Uint32 *address, Uint value)
+{
+ patch_li(address, value);
+}
+
+int hipe_patch_insn(void *address, Uint32 value, Eterm type)
+{
+ switch (type) {
+ case am_closure:
+ case am_constant:
+ case am_atom:
+ case am_c_const:
+ break;
+ default:
+ return -1;
+ }
+ patch_li((Uint32*)address, value);
+ return 0;
+}
+
+/* called from hipe_bif0.c:hipe_bifs_make_native_stub_2()
+ and hipe_bif0.c:hipe_make_stub() */
+void *hipe_make_native_stub(void *beamAddress, unsigned int beamArity)
+{
+ unsigned int *code;
+
+ /*
+ * Native code calls BEAM via a stub looking as follows:
+ *
+ * addi r12,0,beamAddress@l
+ * addi r0,0,beamArity
+ * addis r12,r12,beamAddress@ha
+ * ba nbif_callemu
+ *
+ * I'm using r0 and r12 since the standard SVR4 ABI allows
+ * them to be modified during function linkage. Trampolines
+ * (for b/bl to distant targets) may modify r11.
+ *
+ * The runtime system code is linked completely below the
+ * 32MB address boundary. Hence the branch to nbif_callemu
+ * is done with a 'ba' instruction.
+ */
+
+ /* verify that 'ba' can reach nbif_callemu */
+ if ((unsigned long)&nbif_callemu & ~0x01FFFFFCUL)
+ abort();
+
+ code = alloc_stub(4);
+
+ /* addi r12,0,beamAddress@l */
+ code[0] = 0x39800000 | ((unsigned long)beamAddress & 0xFFFF);
+ /* addi r0,0,beamArity */
+ code[1] = 0x38000000 | (beamArity & 0x7FFF);
+ /* addis r12,r12,beamAddress@ha */
+ code[2] = 0x3D8C0000 | at_ha((unsigned long)beamAddress);
+ /* ba nbif_callemu */
+ code[3] = 0x48000002 | (unsigned long)&nbif_callemu;
+
+ hipe_flush_icache_range(code, 4*sizeof(int));
+
+ return code;
+}
+#endif /* !__powerpc64__ */
+
+static void patch_b(Uint32 *address, Sint32 offset, Uint32 AA)
+{
+ Uint32 oldI = *address;
+ Uint32 newI = (oldI & 0xFC000001) | ((offset & 0x00FFFFFF) << 2) | (AA & 2);
+ *address = newI;
+ hipe_flush_icache_word(address);
+}
+
+int hipe_patch_call(void *callAddress, void *destAddress, void *trampoline)
+{
+ if ((Uint32)destAddress == ((Uint32)destAddress & 0x01FFFFFC)) {
+ /* The destination is in the [0,32MB[ range.
+ We can reach it with a ba/bla instruction.
+ This is the typical case for BIFs and primops.
+ It's also common for trap-to-BEAM stubs (on ppc32). */
+ patch_b((Uint32*)callAddress, (Uint32)destAddress >> 2, 2);
+ } else {
+ Sint32 destOffset = ((Sint32)destAddress - (Sint32)callAddress) >> 2;
+ if (destOffset >= -0x800000 && destOffset <= 0x7FFFFF) {
+ /* The destination is within a [-32MB,+32MB[ range from us.
+ We can reach it with a b/bl instruction.
+ This is typical for nearby Erlang code. */
+ patch_b((Uint32*)callAddress, destOffset, 0);
+ } else {
+ /* The destination is too distant for b/bl/ba/bla.
+ Must do a b/bl to the trampoline. */
+ Sint32 trampOffset = ((Sint32)trampoline - (Sint32)callAddress) >> 2;
+ if (trampOffset >= -0x800000 && trampOffset <= 0x7FFFFF) {
+ /* Update the trampoline's address computation.
+ (May be redundant, but we can't tell.) */
+#if defined(__powerpc64__)
+ /* This relies on the fact that we allocate code below 2GB. */
+ patch_li31((Uint32*)trampoline, (Uint32)destAddress);
+#else
+ patch_li((Uint32*)trampoline, (Uint32)destAddress);
+#endif
+ /* Update this call site. */
+ patch_b((Uint32*)callAddress, trampOffset, 0);
+ } else
+ return -1;
+ }
+ }
+ return 0;
+}
+
+void hipe_arch_print_pcb(struct hipe_process_state *p)
+{
+#define U(n,x) \
+ printf(" % 4d | %s | 0x%0*lx | %*s |\r\n", (int)offsetof(struct hipe_process_state,x), n, 2*(int)sizeof(long), (unsigned long)p->x, 2+2*(int)sizeof(long), "")
+ U("nra ", nra);
+ U("narity ", narity);
+#undef U
+}