diff options
Diffstat (limited to 'erts/emulator/hipe/hipe_amd64.c')
-rw-r--r-- | erts/emulator/hipe/hipe_amd64.c | 196 |
1 files changed, 52 insertions, 144 deletions
diff --git a/erts/emulator/hipe/hipe_amd64.c b/erts/emulator/hipe/hipe_amd64.c index b5dff06987..62739d2a78 100644 --- a/erts/emulator/hipe/hipe_amd64.c +++ b/erts/emulator/hipe/hipe_amd64.c @@ -1,18 +1,19 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2004-2011. All Rights Reserved. + * Copyright Ericsson AB 2004-2016. All Rights Reserved. * - * The contents of this file are subject to the Erlang Public License, - * Version 1.1, (the "License"); you may not use this file except in - * compliance with the License. You should have received a copy of the - * Erlang Public License along with this software. If not, it can be - * retrieved online at http://www.erlang.org/. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * Software distributed under the License is distributed on an "AS IS" - * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See - * the License for the specific language governing rights and limitations - * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. * * %CopyrightEnd% */ @@ -82,21 +83,6 @@ int hipe_patch_call(void *callAddress, void *destAddress, void *trampoline) return 0; } -/* - * Memory allocator for executable code. - * - * This is required on AMD64 because some Linux kernels - * (including 2.6.10-rc1 and newer www.kernel.org ones) - * default to non-executable memory mappings, causing - * ordinary malloc() memory to be non-executable. - * - * Implementing this properly also allows us to ensure that - * executable code ends up in the low 2GB of the address space, - * as required by HiPE/AMD64's small code model. - */ -static unsigned int code_bytes; -static char *code_next; - #if 0 /* change to non-zero to get allocation statistics at exit() */ static unsigned int total_mapped, nr_joins, nr_splits, total_alloc, nr_allocs, nr_large, total_lost; static unsigned int atexit_done; @@ -120,100 +106,20 @@ static void atexit_alloc_code_stats(void) #define ALLOC_CODE_STATS(X) do{}while(0) #endif -/* FreeBSD 6.1 breakage */ -#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) -#define MAP_ANONYMOUS MAP_ANON -#endif - -static void morecore(unsigned int alloc_bytes) -{ - unsigned int map_bytes; - char *map_hint, *map_start; - - /* Page-align the amount to allocate. */ - map_bytes = (alloc_bytes + 4095) & ~4095; - - /* Round up small allocations. */ - if (map_bytes < 1024*1024) - map_bytes = 1024*1024; - else - ALLOC_CODE_STATS(++nr_large); - - /* Create a new memory mapping, ensuring it is executable - and in the low 2GB of the address space. Also attempt - to make it adjacent to the previous mapping. */ - map_hint = code_next + code_bytes; -#if !defined(MAP_32BIT) - /* FreeBSD doesn't have MAP_32BIT, and it doesn't respect - a plain map_hint (returns high mappings even though the - hint refers to a free area), so we have to use both map_hint - and MAP_FIXED to get addresses below the 2GB boundary. - This is even worse than the Linux/ppc64 case. - Similarly, Solaris 10 doesn't have MAP_32BIT, - and it doesn't respect a plain map_hint. */ - if (!map_hint) /* first call */ - map_hint = (char*)(512*1024*1024); /* 0.5GB */ -#endif - if ((unsigned long)map_hint & 4095) - abort(); - map_start = mmap(map_hint, map_bytes, - PROT_EXEC|PROT_READ|PROT_WRITE, - MAP_PRIVATE|MAP_ANONYMOUS -#if defined(MAP_32BIT) - |MAP_32BIT -#elif defined(__FreeBSD__) || defined(__sun__) - |MAP_FIXED -#endif - , - -1, 0); - ALLOC_CODE_STATS(fprintf(stderr, "%s: mmap(%p,%u,...) == %p\r\n", __FUNCTION__, map_hint, map_bytes, map_start)); -#if !defined(MAP_32BIT) - if (map_start != MAP_FAILED && - (((unsigned long)map_start + (map_bytes-1)) & ~0x7FFFFFFFUL)) { - fprintf(stderr, "mmap with hint %p returned code memory %p\r\n", map_hint, map_start); - abort(); - } -#endif - if (map_start == MAP_FAILED) { - perror("mmap"); - abort(); - } - ALLOC_CODE_STATS(total_mapped += map_bytes); - - /* Merge adjacent mappings, so the trailing portion of the previous - mapping isn't lost. In practice this is quite successful. */ - if (map_start == map_hint) { - ALLOC_CODE_STATS(++nr_joins); - code_bytes += map_bytes; -#if !defined(MAP_32BIT) - if (!code_next) /* first call */ - code_next = map_start; -#endif - } else { - ALLOC_CODE_STATS(++nr_splits); - ALLOC_CODE_STATS(total_lost += code_bytes); - code_next = map_start; - code_bytes = map_bytes; - } - - ALLOC_CODE_STATS(atexit_alloc_code_stats()); -} - +/* + * Memory allocator for executable code. + * + * We use a dedicated allocator for executable code (from OTP 19.0) + * to make sure the memory we get is executable (PROT_EXEC) + * and to ensure that executable code ends up in the low 2GB + * of the address space, as required by HiPE/AMD64's small code model. + */ static void *alloc_code(unsigned int alloc_bytes) { - void *res; - - /* Align function entries. */ - alloc_bytes = (alloc_bytes + 3) & ~3; - - if (code_bytes < alloc_bytes) - morecore(alloc_bytes); ALLOC_CODE_STATS(++nr_allocs); ALLOC_CODE_STATS(total_alloc += alloc_bytes); - res = code_next; - code_next += alloc_bytes; - code_bytes -= alloc_bytes; - return res; + + return erts_alloc(ERTS_ALC_T_HIPE_EXEC, alloc_bytes); } void *hipe_alloc_code(Uint nrbytes, Eterm callees, Eterm *trampolines, Process *p) @@ -224,18 +130,18 @@ void *hipe_alloc_code(Uint nrbytes, Eterm callees, Eterm *trampolines, Process * return alloc_code(nrbytes); } -/* called from hipe_bif0.c:hipe_bifs_make_native_stub_2() - and hipe_bif0.c:hipe_make_stub() */ -void *hipe_make_native_stub(void *beamAddress, unsigned int beamArity) +/* Make stub for native code calling exported beam function. +*/ +void *hipe_make_native_stub(void *callee_exp, unsigned int beamArity) { /* * This creates a native code stub with the following contents: * - * movq $Address, P_BEAM_IP(%ebp) %% Actually two movl + * movq $Address, P_CALLEE_EXP(%ebp) %% Actually two movl * movb $Arity, P_ARITY(%ebp) * jmp callemu * - * The stub has variable size, depending on whether the P_BEAM_IP + * The stub has variable size, depending on whether the P_CALLEE_EXP * and P_ARITY offsets fit in 8-bit signed displacements or not. * The rel32 offset in the final jmp depends on its actual location, * which also depends on the size of the previous instructions. @@ -248,49 +154,51 @@ void *hipe_make_native_stub(void *beamAddress, unsigned int beamArity) codeSize = /* 23, 26, 29, or 32 bytes */ 23 + /* 23 when all offsets are 8-bit */ - (P_BEAM_IP >= 128 ? 3 : 0) + - ((P_BEAM_IP + 4) >= 128 ? 3 : 0) + + (P_CALLEE_EXP >= 128 ? 3 : 0) + + ((P_CALLEE_EXP + 4) >= 128 ? 3 : 0) + (P_ARITY >= 128 ? 3 : 0); codep = code = alloc_code(codeSize); + if (!code) + return NULL; - /* movl $beamAddress, P_BEAM_IP(%ebp); 3 or 6 bytes, plus 4 */ + /* movl $callee_exp, P_CALLEE_EXP(%ebp); 3 or 6 bytes, plus 4 */ codep[0] = 0xc7; -#if P_BEAM_IP >= 128 +#if P_CALLEE_EXP >= 128 codep[1] = 0x85; /* disp32[EBP] */ - codep[2] = P_BEAM_IP & 0xFF; - codep[3] = (P_BEAM_IP >> 8) & 0xFF; - codep[4] = (P_BEAM_IP >> 16) & 0xFF; - codep[5] = (P_BEAM_IP >> 24) & 0xFF; + codep[2] = P_CALLEE_EXP & 0xFF; + codep[3] = (P_CALLEE_EXP >> 8) & 0xFF; + codep[4] = (P_CALLEE_EXP >> 16) & 0xFF; + codep[5] = (P_CALLEE_EXP >> 24) & 0xFF; codep += 6; #else codep[1] = 0x45; /* disp8[EBP] */ - codep[2] = P_BEAM_IP; + codep[2] = P_CALLEE_EXP; codep += 3; #endif - codep[0] = ((unsigned long)beamAddress ) & 0xFF; - codep[1] = ((unsigned long)beamAddress >> 8) & 0xFF; - codep[2] = ((unsigned long)beamAddress >> 16) & 0xFF; - codep[3] = ((unsigned long)beamAddress >> 24) & 0xFF; + codep[0] = ((unsigned long)callee_exp ) & 0xFF; + codep[1] = ((unsigned long)callee_exp >> 8) & 0xFF; + codep[2] = ((unsigned long)callee_exp >> 16) & 0xFF; + codep[3] = ((unsigned long)callee_exp >> 24) & 0xFF; codep += 4; - /* movl (shl 32 $beamAddress), P_BEAM_IP+4(%ebp); 3 or 6 bytes, plus 4 */ + /* movl (shl 32 $callee_exp), P_CALLEE_EXP+4(%ebp); 3 or 6 bytes, plus 4 */ codep[0] = 0xc7; -#if P_BEAM_IP+4 >= 128 +#if P_CALLEE_EXP+4 >= 128 codep[1] = 0x85; /* disp32[EBP] */ - codep[2] = (P_BEAM_IP+4) & 0xFF; - codep[3] = ((P_BEAM_IP+4) >> 8) & 0xFF; - codep[4] = ((P_BEAM_IP+4) >> 16) & 0xFF; - codep[5] = ((P_BEAM_IP+4) >> 24) & 0xFF; + codep[2] = (P_CALLEE_EXP+4) & 0xFF; + codep[3] = ((P_CALLEE_EXP+4) >> 8) & 0xFF; + codep[4] = ((P_CALLEE_EXP+4) >> 16) & 0xFF; + codep[5] = ((P_CALLEE_EXP+4) >> 24) & 0xFF; codep += 6; #else codep[1] = 0x45; /* disp8[EBP] */ - codep[2] = (P_BEAM_IP+4); + codep[2] = (P_CALLEE_EXP+4); codep += 3; #endif - codep[0] = ((unsigned long)beamAddress >> 32) & 0xFF; - codep[1] = ((unsigned long)beamAddress >> 40) & 0xFF; - codep[2] = ((unsigned long)beamAddress >> 48) & 0xFF; - codep[3] = ((unsigned long)beamAddress >> 56) & 0xFF; + codep[0] = ((unsigned long)callee_exp >> 32) & 0xFF; + codep[1] = ((unsigned long)callee_exp >> 40) & 0xFF; + codep[2] = ((unsigned long)callee_exp >> 48) & 0xFF; + codep[3] = ((unsigned long)callee_exp >> 56) & 0xFF; codep += 4; /* movb $beamArity, P_ARITY(%ebp); 3 or 6 bytes */ |