aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator/hipe/hipe_amd64.c
diff options
context:
space:
mode:
Diffstat (limited to 'erts/emulator/hipe/hipe_amd64.c')
-rw-r--r--erts/emulator/hipe/hipe_amd64.c196
1 files changed, 52 insertions, 144 deletions
diff --git a/erts/emulator/hipe/hipe_amd64.c b/erts/emulator/hipe/hipe_amd64.c
index b5dff06987..62739d2a78 100644
--- a/erts/emulator/hipe/hipe_amd64.c
+++ b/erts/emulator/hipe/hipe_amd64.c
@@ -1,18 +1,19 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2004-2011. All Rights Reserved.
+ * Copyright Ericsson AB 2004-2016. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
*
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
@@ -82,21 +83,6 @@ int hipe_patch_call(void *callAddress, void *destAddress, void *trampoline)
return 0;
}
-/*
- * Memory allocator for executable code.
- *
- * This is required on AMD64 because some Linux kernels
- * (including 2.6.10-rc1 and newer www.kernel.org ones)
- * default to non-executable memory mappings, causing
- * ordinary malloc() memory to be non-executable.
- *
- * Implementing this properly also allows us to ensure that
- * executable code ends up in the low 2GB of the address space,
- * as required by HiPE/AMD64's small code model.
- */
-static unsigned int code_bytes;
-static char *code_next;
-
#if 0 /* change to non-zero to get allocation statistics at exit() */
static unsigned int total_mapped, nr_joins, nr_splits, total_alloc, nr_allocs, nr_large, total_lost;
static unsigned int atexit_done;
@@ -120,100 +106,20 @@ static void atexit_alloc_code_stats(void)
#define ALLOC_CODE_STATS(X) do{}while(0)
#endif
-/* FreeBSD 6.1 breakage */
-#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
-#define MAP_ANONYMOUS MAP_ANON
-#endif
-
-static void morecore(unsigned int alloc_bytes)
-{
- unsigned int map_bytes;
- char *map_hint, *map_start;
-
- /* Page-align the amount to allocate. */
- map_bytes = (alloc_bytes + 4095) & ~4095;
-
- /* Round up small allocations. */
- if (map_bytes < 1024*1024)
- map_bytes = 1024*1024;
- else
- ALLOC_CODE_STATS(++nr_large);
-
- /* Create a new memory mapping, ensuring it is executable
- and in the low 2GB of the address space. Also attempt
- to make it adjacent to the previous mapping. */
- map_hint = code_next + code_bytes;
-#if !defined(MAP_32BIT)
- /* FreeBSD doesn't have MAP_32BIT, and it doesn't respect
- a plain map_hint (returns high mappings even though the
- hint refers to a free area), so we have to use both map_hint
- and MAP_FIXED to get addresses below the 2GB boundary.
- This is even worse than the Linux/ppc64 case.
- Similarly, Solaris 10 doesn't have MAP_32BIT,
- and it doesn't respect a plain map_hint. */
- if (!map_hint) /* first call */
- map_hint = (char*)(512*1024*1024); /* 0.5GB */
-#endif
- if ((unsigned long)map_hint & 4095)
- abort();
- map_start = mmap(map_hint, map_bytes,
- PROT_EXEC|PROT_READ|PROT_WRITE,
- MAP_PRIVATE|MAP_ANONYMOUS
-#if defined(MAP_32BIT)
- |MAP_32BIT
-#elif defined(__FreeBSD__) || defined(__sun__)
- |MAP_FIXED
-#endif
- ,
- -1, 0);
- ALLOC_CODE_STATS(fprintf(stderr, "%s: mmap(%p,%u,...) == %p\r\n", __FUNCTION__, map_hint, map_bytes, map_start));
-#if !defined(MAP_32BIT)
- if (map_start != MAP_FAILED &&
- (((unsigned long)map_start + (map_bytes-1)) & ~0x7FFFFFFFUL)) {
- fprintf(stderr, "mmap with hint %p returned code memory %p\r\n", map_hint, map_start);
- abort();
- }
-#endif
- if (map_start == MAP_FAILED) {
- perror("mmap");
- abort();
- }
- ALLOC_CODE_STATS(total_mapped += map_bytes);
-
- /* Merge adjacent mappings, so the trailing portion of the previous
- mapping isn't lost. In practice this is quite successful. */
- if (map_start == map_hint) {
- ALLOC_CODE_STATS(++nr_joins);
- code_bytes += map_bytes;
-#if !defined(MAP_32BIT)
- if (!code_next) /* first call */
- code_next = map_start;
-#endif
- } else {
- ALLOC_CODE_STATS(++nr_splits);
- ALLOC_CODE_STATS(total_lost += code_bytes);
- code_next = map_start;
- code_bytes = map_bytes;
- }
-
- ALLOC_CODE_STATS(atexit_alloc_code_stats());
-}
-
+/*
+ * Memory allocator for executable code.
+ *
+ * We use a dedicated allocator for executable code (from OTP 19.0)
+ * to make sure the memory we get is executable (PROT_EXEC)
+ * and to ensure that executable code ends up in the low 2GB
+ * of the address space, as required by HiPE/AMD64's small code model.
+ */
static void *alloc_code(unsigned int alloc_bytes)
{
- void *res;
-
- /* Align function entries. */
- alloc_bytes = (alloc_bytes + 3) & ~3;
-
- if (code_bytes < alloc_bytes)
- morecore(alloc_bytes);
ALLOC_CODE_STATS(++nr_allocs);
ALLOC_CODE_STATS(total_alloc += alloc_bytes);
- res = code_next;
- code_next += alloc_bytes;
- code_bytes -= alloc_bytes;
- return res;
+
+ return erts_alloc(ERTS_ALC_T_HIPE_EXEC, alloc_bytes);
}
void *hipe_alloc_code(Uint nrbytes, Eterm callees, Eterm *trampolines, Process *p)
@@ -224,18 +130,18 @@ void *hipe_alloc_code(Uint nrbytes, Eterm callees, Eterm *trampolines, Process *
return alloc_code(nrbytes);
}
-/* called from hipe_bif0.c:hipe_bifs_make_native_stub_2()
- and hipe_bif0.c:hipe_make_stub() */
-void *hipe_make_native_stub(void *beamAddress, unsigned int beamArity)
+/* Make stub for native code calling exported beam function.
+*/
+void *hipe_make_native_stub(void *callee_exp, unsigned int beamArity)
{
/*
* This creates a native code stub with the following contents:
*
- * movq $Address, P_BEAM_IP(%ebp) %% Actually two movl
+ * movq $Address, P_CALLEE_EXP(%ebp) %% Actually two movl
* movb $Arity, P_ARITY(%ebp)
* jmp callemu
*
- * The stub has variable size, depending on whether the P_BEAM_IP
+ * The stub has variable size, depending on whether the P_CALLEE_EXP
* and P_ARITY offsets fit in 8-bit signed displacements or not.
* The rel32 offset in the final jmp depends on its actual location,
* which also depends on the size of the previous instructions.
@@ -248,49 +154,51 @@ void *hipe_make_native_stub(void *beamAddress, unsigned int beamArity)
codeSize = /* 23, 26, 29, or 32 bytes */
23 + /* 23 when all offsets are 8-bit */
- (P_BEAM_IP >= 128 ? 3 : 0) +
- ((P_BEAM_IP + 4) >= 128 ? 3 : 0) +
+ (P_CALLEE_EXP >= 128 ? 3 : 0) +
+ ((P_CALLEE_EXP + 4) >= 128 ? 3 : 0) +
(P_ARITY >= 128 ? 3 : 0);
codep = code = alloc_code(codeSize);
+ if (!code)
+ return NULL;
- /* movl $beamAddress, P_BEAM_IP(%ebp); 3 or 6 bytes, plus 4 */
+ /* movl $callee_exp, P_CALLEE_EXP(%ebp); 3 or 6 bytes, plus 4 */
codep[0] = 0xc7;
-#if P_BEAM_IP >= 128
+#if P_CALLEE_EXP >= 128
codep[1] = 0x85; /* disp32[EBP] */
- codep[2] = P_BEAM_IP & 0xFF;
- codep[3] = (P_BEAM_IP >> 8) & 0xFF;
- codep[4] = (P_BEAM_IP >> 16) & 0xFF;
- codep[5] = (P_BEAM_IP >> 24) & 0xFF;
+ codep[2] = P_CALLEE_EXP & 0xFF;
+ codep[3] = (P_CALLEE_EXP >> 8) & 0xFF;
+ codep[4] = (P_CALLEE_EXP >> 16) & 0xFF;
+ codep[5] = (P_CALLEE_EXP >> 24) & 0xFF;
codep += 6;
#else
codep[1] = 0x45; /* disp8[EBP] */
- codep[2] = P_BEAM_IP;
+ codep[2] = P_CALLEE_EXP;
codep += 3;
#endif
- codep[0] = ((unsigned long)beamAddress ) & 0xFF;
- codep[1] = ((unsigned long)beamAddress >> 8) & 0xFF;
- codep[2] = ((unsigned long)beamAddress >> 16) & 0xFF;
- codep[3] = ((unsigned long)beamAddress >> 24) & 0xFF;
+ codep[0] = ((unsigned long)callee_exp ) & 0xFF;
+ codep[1] = ((unsigned long)callee_exp >> 8) & 0xFF;
+ codep[2] = ((unsigned long)callee_exp >> 16) & 0xFF;
+ codep[3] = ((unsigned long)callee_exp >> 24) & 0xFF;
codep += 4;
- /* movl (shl 32 $beamAddress), P_BEAM_IP+4(%ebp); 3 or 6 bytes, plus 4 */
+ /* movl (shl 32 $callee_exp), P_CALLEE_EXP+4(%ebp); 3 or 6 bytes, plus 4 */
codep[0] = 0xc7;
-#if P_BEAM_IP+4 >= 128
+#if P_CALLEE_EXP+4 >= 128
codep[1] = 0x85; /* disp32[EBP] */
- codep[2] = (P_BEAM_IP+4) & 0xFF;
- codep[3] = ((P_BEAM_IP+4) >> 8) & 0xFF;
- codep[4] = ((P_BEAM_IP+4) >> 16) & 0xFF;
- codep[5] = ((P_BEAM_IP+4) >> 24) & 0xFF;
+ codep[2] = (P_CALLEE_EXP+4) & 0xFF;
+ codep[3] = ((P_CALLEE_EXP+4) >> 8) & 0xFF;
+ codep[4] = ((P_CALLEE_EXP+4) >> 16) & 0xFF;
+ codep[5] = ((P_CALLEE_EXP+4) >> 24) & 0xFF;
codep += 6;
#else
codep[1] = 0x45; /* disp8[EBP] */
- codep[2] = (P_BEAM_IP+4);
+ codep[2] = (P_CALLEE_EXP+4);
codep += 3;
#endif
- codep[0] = ((unsigned long)beamAddress >> 32) & 0xFF;
- codep[1] = ((unsigned long)beamAddress >> 40) & 0xFF;
- codep[2] = ((unsigned long)beamAddress >> 48) & 0xFF;
- codep[3] = ((unsigned long)beamAddress >> 56) & 0xFF;
+ codep[0] = ((unsigned long)callee_exp >> 32) & 0xFF;
+ codep[1] = ((unsigned long)callee_exp >> 40) & 0xFF;
+ codep[2] = ((unsigned long)callee_exp >> 48) & 0xFF;
+ codep[3] = ((unsigned long)callee_exp >> 56) & 0xFF;
codep += 4;
/* movb $beamArity, P_ARITY(%ebp); 3 or 6 bytes */