aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKostis Sagonas <[email protected]>2011-02-10 02:32:27 +0200
committerNiclas Axelsson <[email protected]>2011-02-14 17:27:49 +0100
commitdbba32bf7743b2ec5952c884ab9c7007db24d760 (patch)
treee42ede6e55c309143a582b78a3675598a981a135
parent60826a52cbed78fa3b30bacd6d37e1c7766f5d99 (diff)
downloadotp-dbba32bf7743b2ec5952c884ab9c7007db24d760.tar.gz
otp-dbba32bf7743b2ec5952c884ab9c7007db24d760.tar.bz2
otp-dbba32bf7743b2ec5952c884ab9c7007db24d760.zip
Changes for the PPC64 backend
-rw-r--r--erts/emulator/hipe/hipe_amd64.tab4
-rw-r--r--erts/emulator/hipe/hipe_ppc.c110
-rw-r--r--erts/emulator/hipe/hipe_ppc.h10
-rw-r--r--erts/emulator/hipe/hipe_ppc_asm.m416
-rw-r--r--erts/emulator/hipe/hipe_ppc_glue.S12
5 files changed, 75 insertions, 77 deletions
diff --git a/erts/emulator/hipe/hipe_amd64.tab b/erts/emulator/hipe/hipe_amd64.tab
index 8933f7a9e4..e039d74525 100644
--- a/erts/emulator/hipe/hipe_amd64.tab
+++ b/erts/emulator/hipe/hipe_amd64.tab
@@ -16,13 +16,9 @@
#
# %CopyrightEnd%
#
-
# AMD64-specific atoms and bifs
atom amd64
atom handle_fp_exception
atom inc_stack_0
atom sse2_fnegate_mask
-
-# bif hipe_bifs:write_s64/2
-bif hipe_bifs:write_u64/2
diff --git a/erts/emulator/hipe/hipe_ppc.c b/erts/emulator/hipe/hipe_ppc.c
index c641253ab5..bc25061a16 100644
--- a/erts/emulator/hipe/hipe_ppc.c
+++ b/erts/emulator/hipe/hipe_ppc.c
@@ -87,48 +87,6 @@ static struct segment {
#define MAP_ANONYMOUS MAP_ANON
#endif
-#if defined(__powerpc64__)
-static void *new_code_mapping(void)
-{
- char *map_hint, *map_start;
-
- /*
- * Allocate a new 32MB code segment in the low 2GB of the address space.
- *
- * This is problematic for several reasons:
- * - Linux/ppc64 lacks the MAP_32BIT flag that Linux/x86-64 has.
- * - The address space hint to mmap is only respected if that
- * area is available. If it isn't, then mmap falls back to its
- * defaults, which (according to testing) results in very high
- * (and thus useless for us) addresses being returned.
- * - Another mapping, presumably the brk, also occupies low addresses.
- *
- * As initial implementation, simply start allocating at the 0.5GB
- * boundary. This leaves plenty of space for the brk before malloc
- * needs to switch to mmap, while allowing for 1.5GB of code.
- *
- * A more robust implementation would be to parse /proc/self/maps,
- * reserve all available space between (say) 0.5GB and 2GB with
- * PROT_NONE MAP_NORESERVE mappings, and then allocate by releasing
- * 32MB segments and re-mapping them properly. This would work on
- * Linux/ppc64, I have no idea how things should be done on Darwin64.
- */
- if (curseg.base)
- map_hint = (char*)curseg.base + SEGMENT_NRBYTES;
- else
- map_hint = (char*)(512*1024*1024); /* 0.5GB */
- map_start = mmap(map_hint, SEGMENT_NRBYTES,
- PROT_EXEC|PROT_READ|PROT_WRITE,
- MAP_PRIVATE|MAP_ANONYMOUS,
- -1, 0);
- if (map_start != MAP_FAILED &&
- (((unsigned long)map_start + (SEGMENT_NRBYTES-1)) & ~0x7FFFFFFFUL)) {
- fprintf(stderr, "mmap with hint %p returned code memory %p\r\n", map_hint, map_start);
- abort();
- }
- return map_start;
-}
-#else
static void *new_code_mapping(void)
{
return mmap(0, SEGMENT_NRBYTES,
@@ -136,7 +94,6 @@ static void *new_code_mapping(void)
MAP_PRIVATE|MAP_ANONYMOUS,
-1, 0);
}
-#endif
static int check_callees(Eterm callees)
{
@@ -182,20 +139,30 @@ static unsigned int *try_alloc(Uint nrwords, int nrcallees, Eterm callees, unsig
unsigned int a = unsigned_val(tuple_val(mfa)[3]);
unsigned int *trampoline = hipe_mfa_get_trampoline(m, f, a);
if (!in_area(trampoline, base, SEGMENT_NRBYTES)) {
+#if defined(__powerpc64__)
+ if (nrfreewords < 7)
+ return NULL;
+ nrfreewords -= 7;
+ tramp_pos = trampoline = tramp_pos - 7;
+ trampoline[0] = 0x3D600000; /* addis r11,r0,0 */
+ trampoline[1] = 0x616B0000; /* ori r11,r11,0 */
+ trampoline[2] = 0x796B07C6; /* rldicr r11,r11,32,31 */
+ trampoline[3] = 0x656B0000; /* oris r11,r11,0 */
+ trampoline[4] = 0x616B0000; /* ori r11,r11,0 */
+ trampoline[5] = 0x7D6903A6; /* mtctr r11 */
+ trampoline[6] = 0x4E800420; /* bctr */
+ hipe_flush_icache_range(trampoline, 7*sizeof(int));
+#else
if (nrfreewords < 4)
return NULL;
nrfreewords -= 4;
tramp_pos = trampoline = tramp_pos - 4;
-#if defined(__powerpc64__)
- trampoline[0] = 0x3D600000; /* addis r11,0,0 */
- trampoline[1] = 0x616B0000; /* ori r11,r11,0 */
-#else
trampoline[0] = 0x39600000; /* addi r11,r0,0 */
trampoline[1] = 0x3D6B0000; /* addis r11,r11,0 */
-#endif
trampoline[2] = 0x7D6903A6; /* mtctr r11 */
trampoline[3] = 0x4E800420; /* bctr */
hipe_flush_icache_range(trampoline, 4*sizeof(int));
+#endif
hipe_mfa_set_trampoline(m, f, a, trampoline);
}
trampvec[trampnr-1] = trampoline;
@@ -281,21 +248,22 @@ static void patch_imm16(Uint32 *address, unsigned int imm16)
}
#if defined(__powerpc64__)
+/*
+ * To load a 64-bit immediate value 'val' into Rd (Rd != R0):
+ *
+ * addis Rd, 0, val@highest // (val >> 48) & 0xFFFF
+ * ori Rd, Rd, val@higher // (val >> 32) & 0xFFFF
+ * rldicr Rd, Rd, 32, 31
+ * oris Rd, Rd, val@h // (val >> 16) & 0xFFFF
+ * ori Rd, Rd, val@l // val & 0xFFFF
+ */
static void patch_li64(Uint32 *address, Uint64 value)
{
- patch_imm16(address+0, value >> 48);/* addis r,0,value@highest */
- patch_imm16(address+1, value >> 32);/* ori r,r,value@higher */
- /* sldi r,r,32 */
- patch_imm16(address+3, value >> 16);/* oris r,r,value@h */
- patch_imm16(address+4, value); /* ori r,r,value@l */
-}
-
-static int patch_li31(Uint32 *address, Uint32 value)
-{
- if ((value >> 31) != 0)
- return -1;
- patch_imm16(address, value >> 16); /* addis r,0,value@h */
- patch_imm16(address+1, value); /* ori r,r,value@l */
+ patch_imm16(address+0, value >> 48);
+ patch_imm16(address+1, value >> 32);
+ /* rldicr Rd, Rd, 32, 31 */
+ patch_imm16(address+3, value >> 16);
+ patch_imm16(address+4, value);
}
void hipe_patch_load_fe(Uint *address, Uint value)
@@ -308,11 +276,10 @@ int hipe_patch_insn(void *address, Uint64 value, Eterm type)
switch (type) {
case am_closure:
case am_constant:
- patch_li64((Uint32*)address, value);
- return 0;
case am_atom:
case am_c_const:
- return patch_li31((Uint32*)address, value);
+ patch_li64((Uint32*)address, value);
+ return 0;
default:
return -1;
}
@@ -442,34 +409,33 @@ static void patch_b(Uint32 *address, Sint32 offset, Uint32 AA)
int hipe_patch_call(void *callAddress, void *destAddress, void *trampoline)
{
- if ((Uint32)destAddress == ((Uint32)destAddress & 0x01FFFFFC)) {
+ if ((UWord)destAddress == ((UWord)destAddress & 0x01FFFFFC)) {
/* The destination is in the [0,32MB[ range.
We can reach it with a ba/bla instruction.
This is the typical case for BIFs and primops.
It's also common for trap-to-BEAM stubs (on ppc32). */
- patch_b((Uint32*)callAddress, (Uint32)destAddress >> 2, 2);
+ patch_b((Uint32*)callAddress, (Sint32)destAddress >> 2, 2);
} else {
- Sint32 destOffset = ((Sint32)destAddress - (Sint32)callAddress) >> 2;
+ SWord destOffset = ((SWord)destAddress - (SWord)callAddress) >> 2;
if (destOffset >= -0x800000 && destOffset <= 0x7FFFFF) {
/* The destination is within a [-32MB,+32MB[ range from us.
We can reach it with a b/bl instruction.
This is typical for nearby Erlang code. */
- patch_b((Uint32*)callAddress, destOffset, 0);
+ patch_b((Uint32*)callAddress, (Sint32)destOffset, 0);
} else {
/* The destination is too distant for b/bl/ba/bla.
Must do a b/bl to the trampoline. */
- Sint32 trampOffset = ((Sint32)trampoline - (Sint32)callAddress) >> 2;
+ SWord trampOffset = ((SWord)trampoline - (SWord)callAddress) >> 2;
if (trampOffset >= -0x800000 && trampOffset <= 0x7FFFFF) {
/* Update the trampoline's address computation.
(May be redundant, but we can't tell.) */
#if defined(__powerpc64__)
- /* This relies on the fact that we allocate code below 2GB. */
- patch_li31((Uint32*)trampoline, (Uint32)destAddress);
+ patch_li64((Uint32*)trampoline, (Uint64)destAddress);
#else
patch_li((Uint32*)trampoline, (Uint32)destAddress);
#endif
/* Update this call site. */
- patch_b((Uint32*)callAddress, trampOffset, 0);
+ patch_b((Uint32*)callAddress, (Sint32)trampOffset, 0);
} else
return -1;
}
diff --git a/erts/emulator/hipe/hipe_ppc.h b/erts/emulator/hipe/hipe_ppc.h
index 5c3d12392d..66000c1846 100644
--- a/erts/emulator/hipe/hipe_ppc.h
+++ b/erts/emulator/hipe/hipe_ppc.h
@@ -44,12 +44,20 @@ static __inline__ int hipe_word32_address_ok(void *address)
return ((unsigned long)address & 0x3) == 0;
}
+#if defined(__powerpc64__)
+/* for hipe_bifs_{read,write}_{s,u}64 */
+static __inline__ int hipe_word64_address_ok(void *address)
+{
+ return ((unsigned long)address & 0x7) == 0;
+}
+#endif
+
/* Native stack growth direction. */
#define HIPE_NSTACK_GROWS_DOWN
#if defined(__powerpc64__)
#define hipe_arch_name am_ppc64
-#define AEXTERN(RET,NAME,PROTO) extern const int NAME
+#define AEXTERN(RET,NAME,PROTO) extern const int NAME[]
AEXTERN(void,hipe_ppc_inc_stack,(void));
#else
#define hipe_arch_name am_powerpc
diff --git a/erts/emulator/hipe/hipe_ppc_asm.m4 b/erts/emulator/hipe/hipe_ppc_asm.m4
index 3264c78df5..0eb5c441e6 100644
--- a/erts/emulator/hipe/hipe_ppc_asm.m4
+++ b/erts/emulator/hipe/hipe_ppc_asm.m4
@@ -62,15 +62,31 @@ ifelse(OPSYS,darwin,``
#define SEMI @
#define SET_SIZE(NAME) /*empty*/
#define TYPE_FUNCTION(NAME) /*empty*/
+#define OPD(NAME) /*empty*/
'',``
/* Not Darwin */''
`ifelse(ARCH,ppc64,``
/* 64-bit */
+/*
+ * The 64-bit PowerPC ABI requires us to setup Official Procedure Descriptors
+ * for functions called from C. These are exported as "func", while the entry
+ * point should is exported as ".func". A function pointer in C points to the
+ * function descriptor in the opd rather than to the function entry point.
+ */
#define JOIN(X,Y) X##Y
#define CSYM(NAME) JOIN(.,NAME)
+#define OPD(NAME) \
+ .pushsection .opd, "aw"; \
+ .align 3; \
+ .global NAME; \
+NAME: \
+ .quad CSYM(NAME), .TOC.@tocbase, 0; \
+ .type NAME, @function; \
+ .popsection
'',``
/* 32-bit */
#define CSYM(NAME) NAME
+#define OPD(NAME) /*empty*/
'')'
``#define ASYM(NAME) NAME
#define GLOBAL(NAME) .global NAME
diff --git a/erts/emulator/hipe/hipe_ppc_glue.S b/erts/emulator/hipe/hipe_ppc_glue.S
index dda2aca2ec..6f0217c738 100644
--- a/erts/emulator/hipe/hipe_ppc_glue.S
+++ b/erts/emulator/hipe/hipe_ppc_glue.S
@@ -198,6 +198,7 @@
* int hipe_ppc_call_to_native(Process *p);
* Emulated code recursively calls native code.
*/
+ OPD(hipe_ppc_call_to_native)
GLOBAL(CSYM(hipe_ppc_call_to_native))
CSYM(hipe_ppc_call_to_native):
/* save C context */
@@ -229,6 +230,7 @@ ASYM(nbif_return):
* int hipe_ppc_return_to_native(Process *p);
* Emulated code returns to its native code caller.
*/
+ OPD(hipe_ppc_return_to_native)
GLOBAL(CSYM(hipe_ppc_return_to_native))
CSYM(hipe_ppc_return_to_native):
/* save C context */
@@ -252,6 +254,7 @@ CSYM(hipe_ppc_return_to_native):
* int hipe_ppc_tailcall_to_native(Process *p);
* Emulated code tailcalls native code.
*/
+ OPD(hipe_ppc_tailcall_to_native)
GLOBAL(CSYM(hipe_ppc_tailcall_to_native))
CSYM(hipe_ppc_tailcall_to_native):
/* save C context */
@@ -274,6 +277,7 @@ CSYM(hipe_ppc_tailcall_to_native):
* int hipe_ppc_throw_to_native(Process *p);
* Emulated code throws an exception to its native code caller.
*/
+ OPD(hipe_ppc_throw_to_native)
GLOBAL(CSYM(hipe_ppc_throw_to_native))
CSYM(hipe_ppc_throw_to_native):
/* save C context */
@@ -455,6 +459,10 @@ ASYM(nbif_fail):
li r3, HIPE_MODE_SWITCH_RES_THROW
b .flush_exit /* no need to save RA */
+ OPD(nbif_0_gc_after_bif)
+ OPD(nbif_1_gc_after_bif)
+ OPD(nbif_2_gc_after_bif)
+ OPD(nbif_3_gc_after_bif)
GLOBAL(CSYM(nbif_0_gc_after_bif))
GLOBAL(CSYM(nbif_1_gc_after_bif))
GLOBAL(CSYM(nbif_2_gc_after_bif))
@@ -493,18 +501,22 @@ CSYM(nbif_3_gc_after_bif):
* The heap pointer was just read from P.
* TEMP_LR contains a copy of LR
*/
+ OPD(nbif_0_simple_exception)
GLOBAL(CSYM(nbif_0_simple_exception))
CSYM(nbif_0_simple_exception):
li r4, 0
b .nbif_simple_exception
+ OPD(nbif_1_simple_exception)
GLOBAL(CSYM(nbif_1_simple_exception))
CSYM(nbif_1_simple_exception):
li r4, 1
b .nbif_simple_exception
+ OPD(nbif_2_simple_exception)
GLOBAL(CSYM(nbif_2_simple_exception))
CSYM(nbif_2_simple_exception):
li r4, 2
b .nbif_simple_exception
+ OPD(nbif_3_simple_exception)
GLOBAL(CSYM(nbif_3_simple_exception))
CSYM(nbif_3_simple_exception):
li r4, 3