diff options
Diffstat (limited to 'erts/emulator/hipe')
88 files changed, 19095 insertions, 0 deletions
diff --git a/erts/emulator/hipe/TODO b/erts/emulator/hipe/TODO new file mode 100644 index 0000000000..624ab560e7 --- /dev/null +++ b/erts/emulator/hipe/TODO @@ -0,0 +1,30 @@ + + %CopyrightBegin% + + Copyright Ericsson AB 2004-2009. All Rights Reserved. + + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + %CopyrightEnd% + +PowerPC: +* create and use trampolines for long calls + +X86: + +SPARC: +* The inc_stack code saves more argument registers than + necessary: a C callee won't clobber %l or %i regs. +* Does noproc_primop_interface_N really need to save and + restore FCALLS/HP/RA/NSP around P-less primop calls? + (x86 doesn't save and restore HP in this interface.) + Ditto for nocons_nofail_primop_interface_0. diff --git a/erts/emulator/hipe/elf64ppc.x b/erts/emulator/hipe/elf64ppc.x new file mode 100644 index 0000000000..299eed8192 --- /dev/null +++ b/erts/emulator/hipe/elf64ppc.x @@ -0,0 +1,224 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2005-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* Default linker script, for normal executables */ +OUTPUT_FORMAT("elf64-powerpc", "elf64-powerpc", + "elf64-powerpc") +OUTPUT_ARCH(powerpc:common64) +ENTRY(_start) +SEARCH_DIR("/mnt/archive/cross-ppc64/ppc64-unknown-linux/lib"); +/* Do we need any of these for elf? + __DYNAMIC = 0; */ +SECTIONS +{ + /* Read-only sections, merged into text segment: */ + PROVIDE (__executable_start = 0x0180000); . = 0x01800000 + SIZEOF_HEADERS; + .interp : { *(.interp) } + .hash : { *(.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + .rel.init : { *(.rel.init) } + .rela.init : { *(.rela.init) } + .rel.text : { *(.rel.text .rel.text.* .rel.gnu.linkonce.t.*) } + .rela.text : { *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) } + .rel.fini : { *(.rel.fini) } + .rela.fini : { *(.rela.fini) } + .rel.rodata : { *(.rel.rodata .rel.rodata.* .rel.gnu.linkonce.r.*) } + .rela.rodata : { *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) } + .rel.data : { *(.rel.data .rel.data.* .rel.gnu.linkonce.d.*) } + .rela.data : { *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) } + .rel.tdata : { *(.rel.tdata .rel.tdata.* .rel.gnu.linkonce.td.*) } + .rela.tdata : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) } + .rel.tbss : { *(.rel.tbss .rel.tbss.* .rel.gnu.linkonce.tb.*) } + .rela.tbss : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) } + .rel.ctors : { *(.rel.ctors) } + .rela.ctors : { *(.rela.ctors) } + .rel.dtors : { *(.rel.dtors) } + .rela.dtors : { *(.rela.dtors) } + .rel.got : { *(.rel.got) } + .rela.got : { *(.rela.got) } + .rela.toc : { *(.rela.toc) } + .rel.sdata : { *(.rel.sdata .rel.sdata.* .rel.gnu.linkonce.s.*) } + .rela.sdata : { *(.rela.sdata .rela.sdata.* .rela.gnu.linkonce.s.*) } + .rel.sbss : { *(.rel.sbss .rel.sbss.* .rel.gnu.linkonce.sb.*) } + .rela.sbss : { *(.rela.sbss .rela.sbss.* .rela.gnu.linkonce.sb.*) } + .rel.sdata2 : { *(.rel.sdata2 .rel.sdata2.* .rel.gnu.linkonce.s2.*) } + .rela.sdata2 : { *(.rela.sdata2 .rela.sdata2.* .rela.gnu.linkonce.s2.*) } + .rel.sbss2 : { *(.rel.sbss2 .rel.sbss2.* .rel.gnu.linkonce.sb2.*) } + .rela.sbss2 : { *(.rela.sbss2 .rela.sbss2.* .rela.gnu.linkonce.sb2.*) } + .rel.bss : { *(.rel.bss .rel.bss.* .rel.gnu.linkonce.b.*) } + .rela.bss : { *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) } + .rel.plt : { *(.rel.plt) } + .rela.plt : { *(.rela.plt) } + .rela.tocbss : { *(.rela.tocbss) } + .init : + { + KEEP (*(.init)) + } =0x60000000 + .text : + { + *(.text .stub .text.* .gnu.linkonce.t.*) + /* .gnu.warning sections are handled specially by elf32.em. */ + *(.gnu.warning) + *(.sfpr .glink) + } =0x60000000 + .fini : + { + KEEP (*(.fini)) + } =0x60000000 + PROVIDE (__etext = .); + PROVIDE (_etext = .); + PROVIDE (etext = .); + .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } + .rodata1 : { *(.rodata1) } + .sdata2 : { *(.sdata2 .sdata2.* .gnu.linkonce.s2.*) } + .sbss2 : { *(.sbss2 .sbss2.* .gnu.linkonce.sb2.*) } + .eh_frame_hdr : { *(.eh_frame_hdr) } + /* Adjust the address for the data segment. We want to adjust up to + the same address within the page on the next page up. */ + . = ALIGN (0x10000) - ((0x10000 - .) & (0x10000 - 1)); . = DATA_SEGMENT_ALIGN (0x10000, 0x1000); + /* Ensure the __preinit_array_start label is properly aligned. We + could instead move the label definition inside the section, but + the linker would then create the section even if it turns out to + be empty, which isn't pretty. */ + . = ALIGN(64 / 8); + PROVIDE (__preinit_array_start = .); + .preinit_array : { *(.preinit_array) } + PROVIDE (__preinit_array_end = .); + PROVIDE (__init_array_start = .); + .init_array : { *(.init_array) } + PROVIDE (__init_array_end = .); + PROVIDE (__fini_array_start = .); + .fini_array : { *(.fini_array) } + PROVIDE (__fini_array_end = .); + .data : + { + *(.data .data.* .gnu.linkonce.d.*) + SORT(CONSTRUCTORS) + } + .data1 : { *(.data1) } + .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) } + .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) } + .eh_frame : { KEEP (*(.eh_frame)) } + .gcc_except_table : { *(.gcc_except_table) } + .toc1 ALIGN(8) : { *(.toc1) } + .opd ALIGN(8) : { KEEP (*(.opd)) } + .dynamic : { *(.dynamic) } + .ctors : + { + /* gcc uses crtbegin.o to find the start of + the constructors, so we make sure it is + first. Because this is a wildcard, it + doesn't matter if the user does not + actually link against crtbegin.o; the + linker won't look for a file to match a + wildcard. The wildcard also means that it + doesn't matter which directory crtbegin.o + is in. */ + KEEP (*crtbegin*.o(.ctors)) + /* We don't want to include the .ctor section from + from the crtend.o file until after the sorted ctors. + The .ctor section from the crtend file contains the + end of ctors marker and it must be last */ + KEEP (*(EXCLUDE_FILE (*crtend*.o ) .ctors)) + KEEP (*(SORT(.ctors.*))) + KEEP (*(.ctors)) + } + .dtors : + { + KEEP (*crtbegin*.o(.dtors)) + KEEP (*(EXCLUDE_FILE (*crtend*.o ) .dtors)) + KEEP (*(SORT(.dtors.*))) + KEEP (*(.dtors)) + } + .jcr : { KEEP (*(.jcr)) } + .got ALIGN(8) : { *(.got .toc) } + /* We want the small data sections together, so single-instruction offsets + can access them all, and initialized data all before uninitialized, so + we can shorten the on-disk segment size. */ + .sdata : + { + *(.sdata .sdata.* .gnu.linkonce.s.*) + } + _edata = .; + PROVIDE (edata = .); + __bss_start = .; + .tocbss ALIGN(8) : { *(.tocbss)} + .sbss : + { + PROVIDE (__sbss_start = .); + PROVIDE (___sbss_start = .); + *(.dynsbss) + *(.sbss .sbss.* .gnu.linkonce.sb.*) + *(.scommon) + PROVIDE (__sbss_end = .); + PROVIDE (___sbss_end = .); + } + .plt : { *(.plt) } + .bss : + { + *(.dynbss) + *(.bss .bss.* .gnu.linkonce.b.*) + *(COMMON) + /* Align here to ensure that the .bss section occupies space up to + _end. Align after .bss to ensure correct alignment even if the + .bss section disappears because there are no input sections. */ + . = ALIGN(64 / 8); + } + . = ALIGN(64 / 8); + _end = .; + PROVIDE (end = .); + . = DATA_SEGMENT_END (.); + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + /* DWARF debug sections. + Symbols in the DWARF debugging sections are relative to the beginning + of the section so we begin them at 0. */ + /* DWARF 1 */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2 */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2 */ + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + /DISCARD/ : { *(.note.GNU-stack) } +} diff --git a/erts/emulator/hipe/hipe_abi.txt b/erts/emulator/hipe/hipe_abi.txt new file mode 100644 index 0000000000..aea30d262d --- /dev/null +++ b/erts/emulator/hipe/hipe_abi.txt @@ -0,0 +1,72 @@ + + %CopyrightBegin% + %CopyrightEnd% + +$Id$ + +HiPE ABI +======== +This document describes aspects of HiPE's runtime system +that are common for all supported architectures. + +Calling Convention +------------------ +The first NR_ARG_REGS parameters (an architecture parameter) +are passed in registers. +Remaining parameters are pushed on the stack, in left-to-right order. +Left-to-right order is used to cater for the BEAM interpreter's +calling convention for closures. + +The callee deallocates the stacked actual parameters from the stack +before returning. This is required for correct implementation of +tailcalls. + +Stack Descriptors +----------------- +For each native code call site there is a stack descriptor which +describes certain static properties of that call: +- The call site's return address, used as key for lookups. +- The caller's local exception handler code address, if present. +- The caller's (fixed) frame size, in words. +- The set of live and traceable words in the caller's frame. +- The caller's arity. If f/N recursively calls g/M, then the + call site's arity is N, not M. (M is not a function of the + return address, due to the presence of tailcalls.) + +Exceptions +---------- +A recursive call occurring within the scope of a local exception +handler is indicated by having a stack descriptor with a non-NULL +exception handler code address. + +If an exception is thrown, the runtime system will unwind the native +stack one frame at a time, using the stack descriptors associated +with each frame's return address. + +When a frame with an active exception handler is found, the stack +pointer is reset to the low address of the fixed portion of that frame, +and a branch is made to the handler. + +Garbage Collection Interface +---------------------------- +[gc-points are call sites. each call site has a stack descriptor. +the descriptor allows the gc to traverse the stack and to find +all live Erlang terms.] + +BIFs +---- +C BIFs are called on the C stack, not the current native stack. + +A C BIF returns a single tagged Erlang value. To indicate an +exceptional condition, it puts an error code in p->freason +and returns THE_NON_VALUE (zero, except in debug mode). + +If p->freason == TRAP, then the BIF redirects its call to some +other function, given by p->def_arg_reg[]. +The BIF and the new callee may have different arities. + +The "hipe_${ARCH}_bifs.m4" macro files take care of these issues +by automatically generating assembly code which performs the +necessary stack switching, parameter copying, and checking for +and handling of exceptional conditions. To compiled Erlang code, +a call to a C BIF looks like an ordinary function call. diff --git a/erts/emulator/hipe/hipe_amd64.c b/erts/emulator/hipe/hipe_amd64.c new file mode 100644 index 0000000000..ff87492f4d --- /dev/null +++ b/erts/emulator/hipe/hipe_amd64.c @@ -0,0 +1,376 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2004-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + */ +#include <stddef.h> /* offsetof() */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#include "global.h" +#include <sys/mman.h> +#include "error.h" +#include "bif.h" +#include "big.h" /* term_to_Sint() */ + +#include "hipe_arch.h" +#include "hipe_bif0.h" +#include "hipe_native_bif.h" /* nbif_callemu() */ + +#undef F_TIMO +#undef THE_NON_VALUE +#undef ERL_FUN_SIZE +#include "hipe_literals.h" + +const Uint sse2_fnegate_mask[2] = {0x8000000000000000,0}; + +void hipe_patch_load_fe(Uint64 *address, Uint64 value) +{ + /* address points to an imm64 operand */ + *address = value; + hipe_flush_icache_word(address); +} + +int hipe_patch_insn(void *address, Uint64 value, Eterm type) +{ + switch (type) { + case am_closure: + case am_constant: + *(Uint64*)address = value; + break; + case am_c_const: + case am_atom: + /* check that value fits in an unsigned imm32 */ + /* XXX: are we sure it's not really a signed imm32? */ + if ((Uint)(Uint32)value != value) + return -1; + *(Uint32*)address = (Uint32)value; + break; + default: + return -1; + } + hipe_flush_icache_word(address); + return 0; +} + +int hipe_patch_call(void *callAddress, void *destAddress, void *trampoline) +{ + Sint rel32; + + if (trampoline) + return -1; + rel32 = (Sint)destAddress - (Sint)callAddress - 4; + if ((Sint)(Sint32)rel32 != rel32) + return -1; + *(Uint32*)callAddress = (Uint32)rel32; + hipe_flush_icache_word(callAddress); + return 0; +} + +/* + * Memory allocator for executable code. + * + * This is required on AMD64 because some Linux kernels + * (including 2.6.10-rc1 and newer www.kernel.org ones) + * default to non-executable memory mappings, causing + * ordinary malloc() memory to be non-executable. + * + * Implementing this properly also allows us to ensure that + * executable code ends up in the low 2GB of the address space, + * as required by HiPE/AMD64's small code model. + */ +static unsigned int code_bytes; +static char *code_next; + +#if 0 /* change to non-zero to get allocation statistics at exit() */ +static unsigned int total_mapped, nr_joins, nr_splits, total_alloc, nr_allocs, nr_large, total_lost; +static unsigned int atexit_done; + +static void alloc_code_stats(void) +{ + printf("\r\nalloc_code_stats: %u bytes mapped, %u joins, %u splits, %u bytes allocated, %u average alloc, %u large allocs, %u bytes lost\r\n", + total_mapped, nr_joins, nr_splits, total_alloc, nr_allocs ? total_alloc/nr_allocs : 0, nr_large, total_lost); +} + +static void atexit_alloc_code_stats(void) +{ + if (!atexit_done) { + atexit_done = 1; + (void)atexit(alloc_code_stats); + } +} + +#define ALLOC_CODE_STATS(X) do{X;}while(0) +#else +#define ALLOC_CODE_STATS(X) do{}while(0) +#endif + +/* FreeBSD 6.1 breakage */ +#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) +#define MAP_ANONYMOUS MAP_ANON +#endif + +static void morecore(unsigned int alloc_bytes) +{ + unsigned int map_bytes; + char *map_hint, *map_start; + + /* Page-align the amount to allocate. */ + map_bytes = (alloc_bytes + 4095) & ~4095; + + /* Round up small allocations. */ + if (map_bytes < 1024*1024) + map_bytes = 1024*1024; + else + ALLOC_CODE_STATS(++nr_large); + + /* Create a new memory mapping, ensuring it is executable + and in the low 2GB of the address space. Also attempt + to make it adjacent to the previous mapping. */ + map_hint = code_next + code_bytes; +#if !defined(MAP_32BIT) + /* FreeBSD doesn't have MAP_32BIT, and it doesn't respect + a plain map_hint (returns high mappings even though the + hint refers to a free area), so we have to use both map_hint + and MAP_FIXED to get addresses below the 2GB boundary. + This is even worse than the Linux/ppc64 case. + Similarly, Solaris 10 doesn't have MAP_32BIT, + and it doesn't respect a plain map_hint. */ + if (!map_hint) /* first call */ + map_hint = (char*)(512*1024*1024); /* 0.5GB */ +#endif + if ((unsigned long)map_hint & 4095) + abort(); + map_start = mmap(map_hint, map_bytes, + PROT_EXEC|PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS +#if defined(MAP_32BIT) + |MAP_32BIT +#elif defined(__FreeBSD__) || defined(__sun__) + |MAP_FIXED +#endif + , + -1, 0); + ALLOC_CODE_STATS(fprintf(stderr, "%s: mmap(%p,%u,...) == %p\r\n", __FUNCTION__, map_hint, map_bytes, map_start)); +#if !defined(MAP_32BIT) + if (map_start != MAP_FAILED && + (((unsigned long)map_start + (map_bytes-1)) & ~0x7FFFFFFFUL)) { + fprintf(stderr, "mmap with hint %p returned code memory %p\r\n", map_hint, map_start); + abort(); + } +#endif + if (map_start == MAP_FAILED) { + perror("mmap"); + abort(); + } + ALLOC_CODE_STATS(total_mapped += map_bytes); + + /* Merge adjacent mappings, so the trailing portion of the previous + mapping isn't lost. In practice this is quite successful. */ + if (map_start == map_hint) { + ALLOC_CODE_STATS(++nr_joins); + code_bytes += map_bytes; +#if !defined(MAP_32BIT) + if (!code_next) /* first call */ + code_next = map_start; +#endif + } else { + ALLOC_CODE_STATS(++nr_splits); + ALLOC_CODE_STATS(total_lost += code_bytes); + code_next = map_start; + code_bytes = map_bytes; + } + + ALLOC_CODE_STATS(atexit_alloc_code_stats()); +} + +static void *alloc_code(unsigned int alloc_bytes) +{ + void *res; + + /* Align function entries. */ + alloc_bytes = (alloc_bytes + 3) & ~3; + + if (code_bytes < alloc_bytes) + morecore(alloc_bytes); + ALLOC_CODE_STATS(++nr_allocs); + ALLOC_CODE_STATS(total_alloc += alloc_bytes); + res = code_next; + code_next += alloc_bytes; + code_bytes -= alloc_bytes; + return res; +} + +void *hipe_alloc_code(Uint nrbytes, Eterm callees, Eterm *trampolines, Process *p) +{ + if (is_not_nil(callees)) + return NULL; + *trampolines = NIL; + return alloc_code(nrbytes); +} + +/* called from hipe_bif0.c:hipe_bifs_make_native_stub_2() + and hipe_bif0.c:hipe_make_stub() */ +void *hipe_make_native_stub(void *beamAddress, unsigned int beamArity) +{ + /* + * This creates a native code stub with the following contents: + * + * movq $Address, P_BEAM_IP(%ebp) %% Actually two movl + * movb $Arity, P_ARITY(%ebp) + * jmp callemu + * + * The stub has variable size, depending on whether the P_BEAM_IP + * and P_ARITY offsets fit in 8-bit signed displacements or not. + * The rel32 offset in the final jmp depends on its actual location, + * which also depends on the size of the previous instructions. + * Arity is stored with a movb because (a) Bj�rn tells me arities + * are <= 255, and (b) a movb is smaller and faster than a movl. + */ + unsigned int codeSize; + unsigned char *code, *codep; + unsigned int callEmuOffset; + + codeSize = /* 23, 26, 29, or 32 bytes */ + 23 + /* 23 when all offsets are 8-bit */ + (P_BEAM_IP >= 128 ? 3 : 0) + + ((P_BEAM_IP + 4) >= 128 ? 3 : 0) + + (P_ARITY >= 128 ? 3 : 0); + codep = code = alloc_code(codeSize); + + /* movl $beamAddress, P_BEAM_IP(%ebp); 3 or 6 bytes, plus 4 */ + codep[0] = 0xc7; +#if P_BEAM_IP >= 128 + codep[1] = 0x85; /* disp32[EBP] */ + codep[2] = P_BEAM_IP & 0xFF; + codep[3] = (P_BEAM_IP >> 8) & 0xFF; + codep[4] = (P_BEAM_IP >> 16) & 0xFF; + codep[5] = (P_BEAM_IP >> 24) & 0xFF; + codep += 6; +#else + codep[1] = 0x45; /* disp8[EBP] */ + codep[2] = P_BEAM_IP; + codep += 3; +#endif + codep[0] = ((unsigned long)beamAddress ) & 0xFF; + codep[1] = ((unsigned long)beamAddress >> 8) & 0xFF; + codep[2] = ((unsigned long)beamAddress >> 16) & 0xFF; + codep[3] = ((unsigned long)beamAddress >> 24) & 0xFF; + codep += 4; + + /* movl (shl 32 $beamAddress), P_BEAM_IP+4(%ebp); 3 or 6 bytes, plus 4 */ + codep[0] = 0xc7; +#if P_BEAM_IP+4 >= 128 + codep[1] = 0x85; /* disp32[EBP] */ + codep[2] = (P_BEAM_IP+4) & 0xFF; + codep[3] = ((P_BEAM_IP+4) >> 8) & 0xFF; + codep[4] = ((P_BEAM_IP+4) >> 16) & 0xFF; + codep[5] = ((P_BEAM_IP+4) >> 24) & 0xFF; + codep += 6; +#else + codep[1] = 0x45; /* disp8[EBP] */ + codep[2] = (P_BEAM_IP+4); + codep += 3; +#endif + codep[0] = ((unsigned long)beamAddress >> 32) & 0xFF; + codep[1] = ((unsigned long)beamAddress >> 40) & 0xFF; + codep[2] = ((unsigned long)beamAddress >> 48) & 0xFF; + codep[3] = ((unsigned long)beamAddress >> 56) & 0xFF; + codep += 4; + + /* movb $beamArity, P_ARITY(%ebp); 3 or 6 bytes */ + codep[0] = 0xc6; +#if P_ARITY >= 128 + codep[1] = 0x85; /* disp32[EBP] */ + codep[2] = P_ARITY & 0xFF; + codep[3] = (P_ARITY >> 8) & 0xFF; + codep[4] = (P_ARITY >> 16) & 0xFF; + codep[5] = (P_ARITY >> 24) & 0xFF; + codep += 6; +#else + codep[1] = 0x45; /* disp8[EBP] */ + codep[2] = P_ARITY; + codep += 3; +#endif + codep[0] = beamArity; + codep += 1; + + /* jmp callemu; 5 bytes */ + callEmuOffset = (unsigned char*)nbif_callemu - (code + codeSize); + codep[0] = 0xe9; + codep[1] = callEmuOffset & 0xFF; + codep[2] = (callEmuOffset >> 8) & 0xFF; + codep[3] = (callEmuOffset >> 16) & 0xFF; + codep[4] = (callEmuOffset >> 24) & 0xFF; + codep += 5; + + ASSERT(codep == code + codeSize); + + /* I-cache flush? */ + + return code; +} + +void hipe_arch_print_pcb(struct hipe_process_state *p) +{ +#define U(n,x) \ + printf(" % 4d | %s | 0x%0*lx | %*s |\r\n", (int)offsetof(struct hipe_process_state,x), n, 2*(int)sizeof(long), (unsigned long)p->x, 2+2*(int)sizeof(long), "") + U("ncsp ", ncsp); + U("narity ", narity); +#undef U +} + +/* + * XXX: The following should really be moved to a generic hipe_bifs_64 file. + */ + +#if 0 /* unused */ +static int term_to_Sint64(Eterm term, Sint64 *sp) +{ + return term_to_Sint(term, sp); +} + +BIF_RETTYPE hipe_bifs_write_s64_2(BIF_ALIST_2) +{ + Sint64 *address; + Sint64 value; + + address = term_to_address(BIF_ARG_1); + if (!address || !hipe_word64_address_ok(address)) + BIF_ERROR(BIF_P, BADARG); + if (!term_to_Sint64(BIF_ARG_2, &value)) + BIF_ERROR(BIF_P, BADARG); + *address = value; + BIF_RET(NIL); +} +#endif + +BIF_RETTYPE hipe_bifs_write_u64_2(BIF_ALIST_2) +{ + Uint64 *address; + Uint64 value; + + address = term_to_address(BIF_ARG_1); + if (!address || !hipe_word64_address_ok(address)) + BIF_ERROR(BIF_P, BADARG); + if (!term_to_Uint(BIF_ARG_2, &value)) + BIF_ERROR(BIF_P, BADARG); + *address = value; + hipe_flush_icache_word(address); + BIF_RET(NIL); +} diff --git a/erts/emulator/hipe/hipe_amd64.h b/erts/emulator/hipe/hipe_amd64.h new file mode 100644 index 0000000000..532d47c092 --- /dev/null +++ b/erts/emulator/hipe/hipe_amd64.h @@ -0,0 +1,37 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2004-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + */ +#ifndef HIPE_AMD64_H +#define HIPE_AMD64_H + +#include "hipe_x86.h" +#undef hipe_arch_name + +/* for hipe_bifs_{read,write}_{s,u}64 */ +static __inline__ int hipe_word64_address_ok(void *address) +{ + return 1; +} + +#define hipe_arch_name am_amd64 + +extern const Uint sse2_fnegate_mask[]; + +#endif /* HIPE_AMD64_H */ diff --git a/erts/emulator/hipe/hipe_amd64.tab b/erts/emulator/hipe/hipe_amd64.tab new file mode 100644 index 0000000000..3787bbf23b --- /dev/null +++ b/erts/emulator/hipe/hipe_amd64.tab @@ -0,0 +1,28 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 2004-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# +# $Id$ +# AMD64-specific atoms and bifs + +atom amd64 +atom handle_fp_exception +atom inc_stack_0 +atom sse2_fnegate_mask + +# bif hipe_bifs:write_s64/2 +bif hipe_bifs:write_u64/2 diff --git a/erts/emulator/hipe/hipe_amd64_abi.txt b/erts/emulator/hipe/hipe_amd64_abi.txt new file mode 100644 index 0000000000..27beff4ea2 --- /dev/null +++ b/erts/emulator/hipe/hipe_amd64_abi.txt @@ -0,0 +1,150 @@ + + %CopyrightBegin% + %CopyrightEnd% + +$Id$ + +HiPE AMD64 ABI +============== +This document describes aspects of HiPE's runtime system +that are specific for the AMD64 (x86-64) architecture. + +Register Usage +-------------- +%rsp and %rbp are fixed and must be preserved by calls (callee-save). +%rax, %rbx, %rcx, %rdx, %rsi, %rdi, %r8, %r9, %r10, %r11, %r12, %r13, %r14 +are clobbered by calls (caller-save). +%r15 is a fixed global register (unallocatable). + +%rsp is the native code stack pointer, growing towards lower addresses. +%rbp (aka P) is the current process' "Process*". +%r15 (aka HP) is the current process' heap pointer. (If HP_IN_R15 is true.) + +Notes: +- C/AMD64 16-byte aligns %rsp, presumably for SSE and signal handling. + HiPE/AMD64 does not need that, so our %rsp is only 8-byte aligned. +- HiPE/x86 uses %esi for HP, but C/AMD64 uses %rsi for parameter passing, + so HiPE/AMD64 should not use %rsi for HP. +- Using %r15 for HP requires a REX instruction prefix, but performing + 64-bit stores needs one anyway, so the only REX-prefix overhead + occurs when incrementing or copying HP [not true (we need REX for 64 + bit add and mov too);�only overhead is when accessing floats on the + heap /Luna]. +- XXX: HiPE/x86 could just as easily use %ebx for HP. HiPE/AMD64 could use + %rbx, but the performance impact is probably minor. Try&measure? +- XXX: Cache SP_LIMIT, HP_LIMIT, and FCALLS in registers? Try&measure. + +Calling Convention +------------------ +Same as in the HiPE/x86 ABI, with the following adjustments: + +The first NR_ARG_REGS (a tunable parameter between 0 and 6, inclusive) +parameters are passed in %rsi, %rdx, %rcx, %r8, %r9, and %rdi. + +The first return value from a function is placed in %rax, the second +(if any) is placed in %rdx. + +Notes: +- Currently, NR_ARG_REGS==0. +- C BIFs expect P in C parameter register 1: %rdi. By making Erlang + parameter registers 1-5 coincide with C parameter registers 2-6, + our BIF wrappers can simply move P to %rdi without having to shift + the remaining parameter registers. +- A few primop calls target C functions that do not take a P parameter. + For these, the code generator should have a "ccall" instruction which + passes parameters starting with %rdi instead of %rsi. +- %rdi can still be used for Erlang parameter passing. The BIF wrappers + will push it to the C stack, but \emph{parameter \#6 would have been + pushed anyway}, so there is no additional overhead. +- We could pass more parameters in %rax, %rbx, %r10, %r11, %r12, %r13, + and %r14. However: + * we may need a scratch register for distant call trampolines + * using >6 argument registers complicates the mode-switch interface + (needs hacks and special-case optimisations) + * it is questionable whether using more than 6 improves performance; + it may be better to just cache more P state in registers + +Instruction Encoding / Code Model +--------------------------------- +AMD64 maintains x86's limit of <= 32 bits for PC-relative offsets +in call and jmp instructions. HiPE/AMD64 handles this as follows: +- The compiler emits ordinary call/jmp instructions for + recursive calls and tailcalls. +- The runtime system code is loaded into the low 32 bits of the + address space. (C/AMD64 small or medium code model.) By using mmap() + with the MAP_32BIT flag when allocating memory for code, all + code will be in the low 32 bits of the address space, and hence + no trampolines will be necessary. + +When generating code for non-immediate literals (boxed objects in +the constants pool), the code generator should use AMD64's new +instruction for loading a 64-bit immediate into a register: +mov reg,imm with a rex prefix. + +Notes: +- The loader/linker could redirect a distant call (where the offset + does not fit in a 32-bit signed immediate) to a linker-generated + trampoline. However, managing trampolines requires changes in the + loaders and possibly also the object code format, since the trampoline + must be close to the call site, which implies that code and its + trampolines must be created as a unit. This is the better long-term + solution, not just for AMD64 but also for SPARC32 and PowerPC, + both of which have similar problems. +- The constants pool could also be restricted to the low 32 bits of + the address space. However: + * We want to move away from a single constants pool. With multiple + areas, the address space restriction may be unrealistic. + * Creating the address of a literal is an infrequent operation, so + the performance impact of using 64-bit immediates should be minor. + +Stack Frame Layout +Garbage Collection Interface +BIFs +Stacks and Unix Signal Handlers +------------------------------- +Same as in the HiPE/x86 ABI. + + +Standard C/AMD64 Calling Conventions +==================================== +See <http://www.x86-64.org/abi.pdf>. + +%rax, %rdx, %rcx, %rsi, %rdi, %r8, %r9, %r10, %r11 are clobbered by calls (caller-save) +%rsp, %rbp, %rbx, %r12, %r13, %r14, %r15 are preserved by calls (callee-save) +[note: %rsi and %rdi are calleR-save, not calleE-save as in the x86 ABI] +%rsp is the stack pointer (fixed). It is required that ((%rsp+8) & 15) == 0 +when a function is entered. (Section 3.2.2 in the ABI document.) +%rbp is optional frame pointer or local variable +The first six integer parameters are passed in %rdi, %rsi, %rdx, %rcx, %r8, and %r9. +Remaining integer parameters are pushed right-to-left on the stack. +When calling a variadic function, %rax (%al actually) must contain an upper +bound on the number of SSE parameter registers, 0-8 inclusive. +%r10 is used for passing a function's static chain pointer. +%r11 is available for PLT code when computing the target address. +The first integer return value is put in %rax, the second (for __int128) in %rdx. +A memory return value (exact definition is complicated, but basically "large struct"), +is implemented as follows: the caller passes a pointer in %rdi as a hidden first +parameter, the callee stores the result there and returns this pointer in %rax. +The caller deallocates stacked parameters after return (addq $N, %rsp). + +Windows 64-bit C Calling Conventions +==================================== +See "Calling Convention for x64 64-Bit Environments" in msdn. + +%rax, %rcx, %rdx, %r8, %r9, %r10, %r11 are clobbered by calls (caller-save). +%rsp, %rbp, %rbx, %rsi, %rdi, %r12, %r13, %r14, %r15 are preserved +by calls (callee-save). +[Note: %rsi and %rdi are calleE-save not calleR-save as in the Linux/Solaris ABI] +%rsp is the stack pointer (fixed). %rsp & 15 should be 0 at all times, +except at the start of a function's prologue when ((%rsp+8) & 15) == 0. +Leaf functions may leave (%rsp & 15) != 0. +The first four integer parameters are passed in %rcx, %rdx, %r8, and %r9. +Remaining integer parameters are pushed right-to-left on the stack, +starting at the fifth slot above the caller's stack pointer. +The bottom of the caller's frame must contain 4 slots where the callee +can save the four integer parameter registers, even if fewer than 4 +parameters are passed in registers. +An integer return value is put in %rax. Large integers (_m128), floats, +and doubles are returned in %xmm0. Larger return values cause the caller +to pass a pointer to a result buffer in %rcx as a hidden first parameter. +The caller may deallocate stacked parameters after return (addq $N, %rsp). diff --git a/erts/emulator/hipe/hipe_amd64_asm.m4 b/erts/emulator/hipe/hipe_amd64_asm.m4 new file mode 100644 index 0000000000..9ce9b4fc5b --- /dev/null +++ b/erts/emulator/hipe/hipe_amd64_asm.m4 @@ -0,0 +1,244 @@ +changecom(`/*', `*/')dnl +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2004-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* + * $Id$ + */ +`#ifndef HIPE_AMD64_ASM_H +#define HIPE_AMD64_ASM_H' + +dnl +dnl Tunables. +dnl +define(LEAF_WORDS,24)dnl number of stack words for leaf functions +define(NR_ARG_REGS,4)dnl admissible values are 0 to 6, inclusive +define(HP_IN_REGISTER,1)dnl 1 to reserve a global register for HP +define(FCALLS_IN_REGISTER,0)dnl 1 to reserve global register for FCALLS +define(HEAP_LIMIT_IN_REGISTER,0)dnl global for HL +define(SIMULATE_NSP,0)dnl change to 1 to simulate call/ret insns + +`#define AMD64_LEAF_WORDS 'LEAF_WORDS +`#define LEAF_WORDS 'LEAF_WORDS + +/* + * Reserved registers. + */ +`#define P %rbp' + +`#define AMD64_HP_IN_REGISTER 'HP_IN_REGISTER +`#if AMD64_HP_IN_REGISTER +#define AMD64_HEAP_POINTER 15' +define(HP,%r15)dnl Only change this together with above +`#define SAVE_HP movq 'HP`, P_HP(P) +#define RESTORE_HP movq P_HP(P), 'HP` +#else +#define SAVE_HP /*empty*/ +#define RESTORE_HP /*empty*/ +#endif' + +`#define AMD64_FCALLS_IN_REGISTER 'FCALLS_IN_REGISTER +`#if AMD64_FCALLS_IN_REGISTER +#define AMD64_FCALLS_REGISTER 11' +define(FCALLS,%r11)dnl This goes together with line above +`#define SAVE_FCALLS movq 'FCALLS`, P_FCALLS(P) +#define RESTORE_FCALLS movq P_FCALLS(P), 'FCALLS` +#else +#define SAVE_FCALLS /*empty*/ +#define RESTORE_FCALLS /*empty*/ +#endif' + +`#define AMD64_HEAP_LIMIT_IN_REGISTER 'HEAP_LIMIT_IN_REGISTER +`#if AMD64_HEAP_LIMIT_IN_REGISTER +#define AMD64_HEAP_LIMIT_REGISTER 12' +define(HEAP_LIMIT,%r12)dnl Change this together with line above +`#define RESTORE_HEAP_LIMIT movq P_HP_LIMIT(P), 'HEAP_LIMIT` +#else +#define RESTORE_HEAP_LIMIT /*empty*/ +#endif' + +define(NSP,%rsp)dnl +`#define NSP 'NSP +`#define SAVE_CSP movq %rsp, P_CSP(P) +#define RESTORE_CSP movq P_CSP(P), %rsp' + +`#define AMD64_SIMULATE_NSP 'SIMULATE_NSP + +/* + * Context switching macros. + */ +`#define SWITCH_C_TO_ERLANG_QUICK \ + SAVE_CSP; \ + movq P_NSP(P), NSP' + +`#define SWITCH_ERLANG_TO_C_QUICK \ + movq NSP, P_NSP(P); \ + RESTORE_CSP' + +`#define SAVE_CACHED_STATE \ + SAVE_HP; \ + SAVE_FCALLS' + +`#define RESTORE_CACHED_STATE \ + RESTORE_HP; \ + RESTORE_HEAP_LIMIT; \ + RESTORE_FCALLS' + +`#define SWITCH_C_TO_ERLANG \ + RESTORE_CACHED_STATE; \ + SWITCH_C_TO_ERLANG_QUICK' + +`#define SWITCH_ERLANG_TO_C \ + SAVE_CACHED_STATE; \ + SWITCH_ERLANG_TO_C_QUICK' + +/* + * Argument (parameter) registers. + */ +`#define AMD64_NR_ARG_REGS 'NR_ARG_REGS +`#define NR_ARG_REGS 'NR_ARG_REGS + +define(defarg,`define(ARG$1,`$2')dnl +#`define ARG'$1 $2' +)dnl + +ifelse(eval(NR_ARG_REGS >= 1),0,, +`defarg(0,`%rsi')')dnl +ifelse(eval(NR_ARG_REGS >= 2),0,, +`defarg(1,`%rdx')')dnl +ifelse(eval(NR_ARG_REGS >= 3),0,, +`defarg(2,`%rcx')')dnl +ifelse(eval(NR_ARG_REGS >= 4),0,, +`defarg(3,`%r8')')dnl +ifelse(eval(NR_ARG_REGS >= 5),0,, +`defarg(4,`%r9')')dnl +ifelse(eval(NR_ARG_REGS >= 6),0,, +`defarg(5,`%rdi')')dnl + +/* + * TEMP_RV: + * Used in nbif_stack_trap_ra to preserve the return value. + * Must be a C callee-save register. + * Must be otherwise unused in the return path. + */ +`#define TEMP_RV %rbx' + +dnl XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +dnl X X +dnl X hipe_amd64_glue.S support X +dnl X X +dnl XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX + +dnl +dnl LOAD_ARG_REGS +dnl (identical to x86 version except for movq) +dnl +define(LAR_1,`movq P_ARG$1(P), ARG$1 ; ')dnl +define(LAR_N,`ifelse(eval($1 >= 0),0,,`LAR_N(eval($1-1))LAR_1($1)')')dnl +define(LOAD_ARG_REGS,`LAR_N(eval(NR_ARG_REGS-1))')dnl +`#define LOAD_ARG_REGS 'LOAD_ARG_REGS + +dnl +dnl STORE_ARG_REGS +dnl (identical to x86 version except for movq) +dnl +define(SAR_1,`movq ARG$1, P_ARG$1(P) ; ')dnl +define(SAR_N,`ifelse(eval($1 >= 0),0,,`SAR_N(eval($1-1))SAR_1($1)')')dnl +define(STORE_ARG_REGS,`SAR_N(eval(NR_ARG_REGS-1))')dnl +`#define STORE_ARG_REGS 'STORE_ARG_REGS + +dnl +dnl NSP_CALL(FUN) +dnl Emit a CALL FUN instruction, or simulate it. +dnl FUN must not be an NSP-based memory operand. +dnl +ifelse(eval(SIMULATE_NSP),0, +``#define NSP_CALL(FUN) call FUN'', +``#define NSP_CALL(FUN) subq $8,NSP; leaq 1f(%rip),%rax; movq %rax,(NSP); jmp FUN; 1:'')dnl + +dnl +dnl NSP_RETN(NPOP) +dnl Emit a RET $NPOP instruction, or simulate it. +dnl NPOP should be non-zero. +dnl +ifelse(eval(SIMULATE_NSP),0, +``#define NSP_RETN(NPOP) ret $NPOP'', +``#define NSP_RETN(NPOP) movq (NSP),TEMP_RV; addq $8+NPOP,NSP; jmp *TEMP_RV'')dnl + +dnl +dnl NSP_RET0 +dnl Emit a RET instruction, or simulate it. +dnl +ifelse(eval(SIMULATE_NSP),0, +``#define NSP_RET0 ret'', +``#define NSP_RET0 movq (NSP),TEMP_RV; addq $8,NSP; jmp *TEMP_RV'')dnl + +dnl XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +dnl X X +dnl X hipe_amd64_bifs.m4 support X +dnl X X +dnl XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX + +dnl +dnl NBIF_ARG(DST,ARITY,ARGNO) +dnl Access a formal parameter. +dnl It will be a memory load via NSP when ARGNO >= NR_ARG_REGS. +dnl It will be a register move when 0 <= ARGNO < NR_ARG_REGS; if +dnl the source and destination are the same, the move is suppressed. +dnl +dnl This must be called before SWITCH_ERLANG_TO_C{,QUICK}. +dnl This must not be called if the C BIF's arity > 6. +dnl +define(NBIF_MOVE_REG,`ifelse($1,$2,`# movq $2, $1',`movq $2, $1')')dnl +define(NBIF_REG_ARG,`NBIF_MOVE_REG($1,ARG$2)')dnl +define(NBIF_STK_LOAD,`movq $2(NSP), $1')dnl +define(NBIF_STK_ARG,`NBIF_STK_LOAD($1,eval(8*($2-$3)))')dnl +define(NBIF_ARG,`ifelse(eval($3 >= NR_ARG_REGS),0,`NBIF_REG_ARG($1,$3)',`NBIF_STK_ARG($1,$2,$3)')')dnl +`/* #define NBIF_ARG_1_0 'NBIF_ARG(%rsi,1,0)` */' +`/* #define NBIF_ARG_2_0 'NBIF_ARG(%rsi,2,0)` */' +`/* #define NBIF_ARG_2_1 'NBIF_ARG(%rdx,2,1)` */' +`/* #define NBIF_ARG_3_0 'NBIF_ARG(%rsi,3,0)` */' +`/* #define NBIF_ARG_3_1 'NBIF_ARG(%rdx,3,1)` */' +`/* #define NBIF_ARG_3_2 'NBIF_ARG(%rcx,3,2)` */' +`/* #define NBIF_ARG_5_0 'NBIF_ARG(%rsi,5,0)` */' +`/* #define NBIF_ARG_5_1 'NBIF_ARG(%rdx,5,1)` */' +`/* #define NBIF_ARG_5_2 'NBIF_ARG(%rcx,5,2)` */' +`/* #define NBIF_ARG_5_3 'NBIF_ARG(%r8,5,3)` */' +`/* #define NBIF_ARG_5_4 'NBIF_ARG(%r9,5,4)` */' + +dnl XXX: For >6 arity C BIFs, we need: +dnl NBIF_COPY_NSP(ARITY) +dnl SWITCH_ERLANG_TO_C +dnl NBIF_GE6_ARG_MOVE(DSTREG,ARITY,ARGNO) +dnl pushq NBIF_GE6_ARG_OPND(ARITY,ARGNO) <-- uses NSP copied above + +dnl +dnl NBIF_RET(ARITY) +dnl Generates a return from a native BIF, taking care to pop +dnl any stacked formal parameters. +dnl +define(RET_POP,`ifelse(eval($1 > NR_ARG_REGS),0,0,eval(8*($1 - NR_ARG_REGS)))')dnl +define(NBIF_RET_N,`ifelse(eval($1),0,`NSP_RET0',`NSP_RETN($1)')')dnl +define(NBIF_RET,`NBIF_RET_N(eval(RET_POP($1)))')dnl +`/* #define NBIF_RET_0 'NBIF_RET(0)` */' +`/* #define NBIF_RET_1 'NBIF_RET(1)` */' +`/* #define NBIF_RET_2 'NBIF_RET(2)` */' +`/* #define NBIF_RET_3 'NBIF_RET(3)` */' +`/* #define NBIF_RET_5 'NBIF_RET(5)` */' + +`#endif /* HIPE_AMD64_ASM_H */' diff --git a/erts/emulator/hipe/hipe_amd64_bifs.m4 b/erts/emulator/hipe/hipe_amd64_bifs.m4 new file mode 100644 index 0000000000..66fd167f47 --- /dev/null +++ b/erts/emulator/hipe/hipe_amd64_bifs.m4 @@ -0,0 +1,555 @@ +changecom(`/*', `*/')dnl +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2004-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* + * $Id$ + */ + +include(`hipe/hipe_amd64_asm.m4') +#`include' "hipe_literals.h" + +`#if THE_NON_VALUE == 0 +#define TEST_GOT_EXN testq %rax, %rax +#else +#define TEST_GOT_EXN cmpq $THE_NON_VALUE, %rax +#endif' + +`#define TEST_GOT_MBUF movq P_MBUF(P), %rdx; testq %rdx, %rdx; jnz 3f; 2: +#define JOIN3(A,B,C) A##B##C +#define HANDLE_GOT_MBUF(ARITY) 3: call JOIN3(nbif_,ARITY,_gc_after_bif); jmp 2b' + +/* + * standard_bif_interface_1(nbif_name, cbif_name) + * standard_bif_interface_2(nbif_name, cbif_name) + * standard_bif_interface_3(nbif_name, cbif_name) + * + * Generate native interface for a BIF with 1-3 parameters and + * standard failure mode. + */ +define(standard_bif_interface_1, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .section ".text" + .align 4 + .global $1 +$1: + /* set up the parameters */ + movq P, %rdi + NBIF_ARG(%rsi,1,0) + + /* make the call on the C stack */ + SWITCH_ERLANG_TO_C + call $2 + TEST_GOT_MBUF + SWITCH_C_TO_ERLANG + + /* throw exception if failure, otherwise return */ + TEST_GOT_EXN + jz nbif_1_simple_exception + NBIF_RET(1) + HANDLE_GOT_MBUF(1) + .size $1,.-$1 + .type $1,@function +#endif') + +define(standard_bif_interface_2, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .section ".text" + .align 4 + .global $1 +$1: + /* set up the parameters */ + movq P, %rdi + NBIF_ARG(%rsi,2,0) + NBIF_ARG(%rdx,2,1) + + /* make the call on the C stack */ + SWITCH_ERLANG_TO_C + call $2 + TEST_GOT_MBUF + SWITCH_C_TO_ERLANG + + /* throw exception if failure, otherwise return */ + TEST_GOT_EXN + jz nbif_2_simple_exception + NBIF_RET(2) + HANDLE_GOT_MBUF(2) + .size $1,.-$1 + .type $1,@function +#endif') + +define(standard_bif_interface_3, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .section ".text" + .align 4 + .global $1 +$1: + /* set up the parameters */ + movq P, %rdi + NBIF_ARG(%rsi,3,0) + NBIF_ARG(%rdx,3,1) + NBIF_ARG(%rcx,3,2) + + /* make the call on the C stack */ + SWITCH_ERLANG_TO_C + call $2 + TEST_GOT_MBUF + SWITCH_C_TO_ERLANG + + /* throw exception if failure, otherwise return */ + TEST_GOT_EXN + jz nbif_3_simple_exception + NBIF_RET(3) + HANDLE_GOT_MBUF(3) + .size $1,.-$1 + .type $1,@function +#endif') + +/* + * fail_bif_interface_0(nbif_name, cbif_name) + * + * Generate native interface for a BIF with 0 parameters and + * standard failure mode. + */ +define(fail_bif_interface_0, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .section ".text" + .align 4 + .global $1 +$1: + /* set up the parameters */ + movq P, %rdi + + /* make the call on the C stack */ + SWITCH_ERLANG_TO_C + call $2 + TEST_GOT_MBUF + SWITCH_C_TO_ERLANG + + /* throw exception if failure, otherwise return */ + TEST_GOT_EXN + jz nbif_0_simple_exception + NBIF_RET(0) + HANDLE_GOT_MBUF(0) + .size $1,.-$1 + .type $1,@function +#endif') + +/* + * nofail_primop_interface_0(nbif_name, cbif_name) + * nofail_primop_interface_1(nbif_name, cbif_name) + * nofail_primop_interface_2(nbif_name, cbif_name) + * nofail_primop_interface_3(nbif_name, cbif_name) + * + * Generate native interface for a primop with implicit P + * parameter, 0-3 ordinary parameters and no failure mode. + * Also used for guard BIFs. + */ +define(nofail_primop_interface_0, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .section ".text" + .align 4 + .global $1 +$1: + /* set up the parameters */ + movq P, %rdi + + /* make the call on the C stack */ + SWITCH_ERLANG_TO_C + call $2 + TEST_GOT_MBUF + SWITCH_C_TO_ERLANG + + /* return */ + NBIF_RET(0) + HANDLE_GOT_MBUF(0) + .size $1,.-$1 + .type $1,@function +#endif') + +define(nofail_primop_interface_1, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .section ".text" + .align 4 + .global $1 +$1: + /* set up the parameters */ + movq P, %rdi + NBIF_ARG(%rsi,1,0) + + /* make the call on the C stack */ + SWITCH_ERLANG_TO_C + call $2 + TEST_GOT_MBUF + SWITCH_C_TO_ERLANG + + /* return */ + NBIF_RET(1) + HANDLE_GOT_MBUF(1) + .size $1,.-$1 + .type $1,@function +#endif') + +define(nofail_primop_interface_2, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .section ".text" + .align 4 + .global $1 +$1: + /* set up the parameters */ + movq P, %rdi + NBIF_ARG(%rsi,2,0) + NBIF_ARG(%rdx,2,1) + + /* make the call on the C stack */ + SWITCH_ERLANG_TO_C + call $2 + TEST_GOT_MBUF + SWITCH_C_TO_ERLANG + + /* return */ + NBIF_RET(2) + HANDLE_GOT_MBUF(2) + .size $1,.-$1 + .type $1,@function +#endif') + +define(nofail_primop_interface_3, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .section ".text" + .align 4 + .global $1 +$1: + /* set up the parameters */ + movq P, %rdi + NBIF_ARG(%rsi,3,0) + NBIF_ARG(%rdx,3,1) + NBIF_ARG(%rcx,3,2) + + /* make the call on the C stack */ + SWITCH_ERLANG_TO_C + call $2 + TEST_GOT_MBUF + SWITCH_C_TO_ERLANG + + /* return */ + NBIF_RET(3) + HANDLE_GOT_MBUF(3) + .size $1,.-$1 + .type $1,@function +#endif') + +/* + * nocons_nofail_primop_interface_0(nbif_name, cbif_name) + * nocons_nofail_primop_interface_1(nbif_name, cbif_name) + * nocons_nofail_primop_interface_2(nbif_name, cbif_name) + * nocons_nofail_primop_interface_3(nbif_name, cbif_name) + * nocons_nofail_primop_interface_5(nbif_name, cbif_name) + * + * Generate native interface for a primop with implicit P + * parameter, 0-3 or 5 ordinary parameters, and no failure mode. + * The primop cannot CONS or gc. + */ +define(nocons_nofail_primop_interface_0, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .section ".text" + .align 4 + .global $1 +$1: + /* set up the parameters */ + movq P, %rdi + + /* make the call on the C stack */ + SWITCH_ERLANG_TO_C_QUICK + call $2 + SWITCH_C_TO_ERLANG_QUICK + + /* return */ + NBIF_RET(0) + .size $1,.-$1 + .type $1,@function +#endif') + +define(nocons_nofail_primop_interface_1, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .section ".text" + .align 4 + .global $1 +$1: + /* set up the parameters */ + movq P, %rdi + NBIF_ARG(%rsi,1,0) + + /* make the call on the C stack */ + SWITCH_ERLANG_TO_C_QUICK + call $2 + SWITCH_C_TO_ERLANG_QUICK + + /* return */ + NBIF_RET(1) + .size $1,.-$1 + .type $1,@function +#endif') + +define(nocons_nofail_primop_interface_2, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .section ".text" + .align 4 + .global $1 +$1: + /* set up the parameters */ + movq P, %rdi + NBIF_ARG(%rsi,2,0) + NBIF_ARG(%rdx,2,1) + + /* make the call on the C stack */ + SWITCH_ERLANG_TO_C_QUICK + call $2 + SWITCH_C_TO_ERLANG_QUICK + + /* return */ + NBIF_RET(2) + .size $1,.-$1 + .type $1,@function +#endif') + +define(nocons_nofail_primop_interface_3, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .section ".text" + .align 4 + .global $1 +$1: + /* set up the parameters */ + movq P, %rdi + NBIF_ARG(%rsi,3,0) + NBIF_ARG(%rdx,3,1) + NBIF_ARG(%rcx,3,2) + + /* make the call on the C stack */ + SWITCH_ERLANG_TO_C_QUICK + call $2 + SWITCH_C_TO_ERLANG_QUICK + + /* return */ + NBIF_RET(3) + .size $1,.-$1 + .type $1,@function +#endif') + +define(nocons_nofail_primop_interface_5, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .section ".text" + .align 4 + .global $1 +$1: + /* set up the parameters */ + movq P, %rdi + NBIF_ARG(%rsi,5,0) + NBIF_ARG(%rdx,5,1) + NBIF_ARG(%rcx,5,2) + NBIF_ARG(%r8,5,3) + NBIF_ARG(%r9,5,4) + + /* make the call on the C stack */ + SWITCH_ERLANG_TO_C_QUICK + call $2 + SWITCH_C_TO_ERLANG_QUICK + + /* return */ + NBIF_RET(5) + .size $1,.-$1 + .type $1,@function +#endif') + +/* + * noproc_primop_interface_0(nbif_name, cbif_name) + * noproc_primop_interface_1(nbif_name, cbif_name) + * noproc_primop_interface_2(nbif_name, cbif_name) + * noproc_primop_interface_3(nbif_name, cbif_name) + * noproc_primop_interface_5(nbif_name, cbif_name) + * + * Generate native interface for a primop with no implicit P + * parameter, 0-3 or 5 ordinary parameters, and no failure mode. + * The primop cannot CONS or gc. + */ +define(noproc_primop_interface_0, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .section ".text" + .align 4 + .global $1 +$1: + /* make the call on the C stack */ + SWITCH_ERLANG_TO_C_QUICK + call $2 + SWITCH_C_TO_ERLANG_QUICK + + /* return */ + NBIF_RET(0) + .size $1,.-$1 + .type $1,@function +#endif') + +define(noproc_primop_interface_1, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .section ".text" + .align 4 + .global $1 +$1: + /* set up the parameters */ + NBIF_ARG(%rdi,1,0) + + /* make the call on the C stack */ + SWITCH_ERLANG_TO_C_QUICK + call $2 + SWITCH_C_TO_ERLANG_QUICK + + /* return */ + NBIF_RET(1) + .size $1,.-$1 + .type $1,@function +#endif') + +define(noproc_primop_interface_2, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .section ".text" + .align 4 + .global $1 +$1: + /* set up the parameters */ + NBIF_ARG(%rdi,2,0) + NBIF_ARG(%rsi,2,1) + + /* make the call on the C stack */ + SWITCH_ERLANG_TO_C_QUICK + call $2 + SWITCH_C_TO_ERLANG_QUICK + + /* return */ + NBIF_RET(2) + .size $1,.-$1 + .type $1,@function +#endif') + +define(noproc_primop_interface_3, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .section ".text" + .align 4 + .global $1 +$1: + /* set up the parameters */ + NBIF_ARG(%rdi,3,0) + NBIF_ARG(%rsi,3,1) + NBIF_ARG(%rdx,3,2) + + /* make the call on the C stack */ + SWITCH_ERLANG_TO_C_QUICK + call $2 + SWITCH_C_TO_ERLANG_QUICK + + /* return */ + NBIF_RET(3) + .size $1,.-$1 + .type $1,@function +#endif') + +define(noproc_primop_interface_5, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .section ".text" + .align 4 + .global $1 +$1: + /* set up the parameters */ + NBIF_ARG(%rdi,5,0) + NBIF_ARG(%rsi,5,1) + NBIF_ARG(%rdx,5,2) + NBIF_ARG(%rcx,5,3) + NBIF_ARG(%r8,5,4) + + /* make the call on the C stack */ + SWITCH_ERLANG_TO_C_QUICK + call $2 + SWITCH_C_TO_ERLANG_QUICK + + /* return */ + NBIF_RET(5) + .size $1,.-$1 + .type $1,@function +#endif') + +/* + * AMD64-specific primops. + */ +noproc_primop_interface_0(nbif_handle_fp_exception, erts_restore_fpu) + +/* + * Implement gc_bif_interface_0 as nofail_primop_interface_0. + */ +define(gc_bif_interface_0,`nofail_primop_interface_0($1, $2)') + +/* + * Implement gc_bif_interface_N as standard_bif_interface_N (N=1,2). + */ +define(gc_bif_interface_1,`standard_bif_interface_1($1, $2)') +define(gc_bif_interface_2,`standard_bif_interface_2($1, $2)') + +/* + * Implement gc_nofail_primop_interface_1 as nofail_primop_interface_1. + */ +define(gc_nofail_primop_interface_1,`nofail_primop_interface_1($1, $2)') + +include(`hipe/hipe_bif_list.m4') + +`#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif' diff --git a/erts/emulator/hipe/hipe_amd64_gc.h b/erts/emulator/hipe/hipe_amd64_gc.h new file mode 100644 index 0000000000..56650901d6 --- /dev/null +++ b/erts/emulator/hipe/hipe_amd64_gc.h @@ -0,0 +1,30 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2004-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + * Stack walking helpers for native stack GC procedures. + */ +#ifndef HIPE_AMD64_GC_H +#define HIPE_AMD64_GC_H + +#include "hipe_amd64_asm.h" /* for NR_ARG_REGS */ + +#define HIPE_X86_ASM_H +#include "hipe_x86_gc.h" + +#endif /* HIPE_AMD64_GC_H */ diff --git a/erts/emulator/hipe/hipe_amd64_glue.S b/erts/emulator/hipe/hipe_amd64_glue.S new file mode 100644 index 0000000000..872c5dc9e3 --- /dev/null +++ b/erts/emulator/hipe/hipe_amd64_glue.S @@ -0,0 +1,443 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2004-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* + * $Id$ + */ + +#include "hipe_amd64_asm.h" +#include "hipe_literals.h" +#define ASM +#include "hipe_mode_switch.h" + +/* + * Note: the mode-switch entry points in hipe_amd64_glue.S have + * the same names as in hipe_x86_glue.S. This is intentional, + * as it allows using hipe_x86_glue.h with AMD64. + */ + +/* + * Set up frame on C stack, + * save C callee-save registers, + * retrieve the process pointer from the parameters from C, + * SWITCH_C_TO_ERLANG. + * + * The end of the frame must be 16-byte aligned, otherwise + * calls to C may break. %rsp+8 is 16-byte aligned on entry, + * and six registers are to be saved, so a seventh word is + * added to make the resulting %rsp 16-byte aligned. + */ +#define ENTER_FROM_C \ + /* save C callee-save registers on the C stack */ \ + subq $(7*8), %rsp; \ + movq %r15, 40(%rsp); \ + movq %r14, 32(%rsp); \ + movq %r13, 24(%rsp); \ + movq %r12, 16(%rsp); \ + movq %rbx, 8(%rsp); \ + movq %rbp, (%rsp); \ + /* get the process pointer */ \ + movq %rdi, P; \ + /* switch to native stack */ \ + SWITCH_C_TO_ERLANG + + .section ".text" + +/* + * int x86_call_to_native(Process *p); + * Emulated code recursively calls native code. + */ + .align 4 + .global x86_call_to_native + .global nbif_return +x86_call_to_native: + ENTER_FROM_C + /* get argument registers */ + LOAD_ARG_REGS + /* call the target */ + NSP_CALL(*P_NCALLEE(P)) +/* + * We export this return address so that hipe_mode_switch() can discover + * when native code tailcalls emulated code. + * + * This is where native code returns to emulated code. + */ +nbif_return: + movq %rax, P_ARG0(P) # save retval + movl $HIPE_MODE_SWITCH_RES_RETURN, %eax +/* FALLTHROUGH to .flush_exit + * + * Return to the calling C function with result token in %eax. + * + * .nosave_exit saves no state + * .flush_exit saves cached P state + * .suspend_exit also saves RA + */ +.suspend_exit: + /* save RA, no-op on x86 */ +.flush_exit: + /* flush cached P state */ + SAVE_CACHED_STATE +.nosave_exit: + /* switch to C stack */ + SWITCH_ERLANG_TO_C_QUICK + /* restore C callee-save registers, drop frame, return */ + movq (%rsp), %rbp # kills P + movq 8(%rsp), %rbx + movq 16(%rsp), %r12 + movq 24(%rsp), %r13 + movq 32(%rsp), %r14 + movq 40(%rsp), %r15 # kills HP + addq $(7*8), %rsp + ret + +/* + * Native code calls emulated code via a linker-generated + * stub (hipe_x86_loader.erl) which should look as follows: + * + * stub for f/N: + * movq $<f's BEAM code address>, P_BEAM_IP(P) + * movb $<N>, P_ARITY(P) + * jmp nbif_callemu + * + * XXX: Different stubs for different number of register parameters? + */ + .align 4 + .global nbif_callemu +nbif_callemu: + STORE_ARG_REGS + movl $HIPE_MODE_SWITCH_RES_CALL, %eax + jmp .suspend_exit + +/* + * nbif_apply + */ + .align 4 + .global nbif_apply +nbif_apply: + STORE_ARG_REGS + movl $HIPE_MODE_SWITCH_RES_APPLY, %eax + jmp .suspend_exit + +/* + * Native code calls an emulated-mode closure via a stub defined below. + * + * The closure is appended as the last actual parameter, and parameters + * beyond the first few passed in registers are pushed onto the stack in + * left-to-right order. + * Hence, the location of the closure parameter only depends on the number + * of parameters in registers, not the total number of parameters. + */ +#if NR_ARG_REGS >= 6 + .align 4 + .global nbif_ccallemu6 +nbif_ccallemu6: + movq ARG5, P_ARG5(P) +#if NR_ARG_REGS > 6 + movq ARG6, ARG5 +#else + movq 8(NSP), ARG5 +#endif + /*FALLTHROUGH*/ +#endif + +#if NR_ARG_REGS >= 5 + .align 4 + .global nbif_ccallemu5 +nbif_ccallemu5: + movq ARG4, P_ARG4(P) +#if NR_ARG_REGS > 5 + movq ARG5, ARG4 +#else + movq 8(NSP), ARG4 +#endif + /*FALLTHROUGH*/ +#endif + +#if NR_ARG_REGS >= 4 + .align 4 + .global nbif_ccallemu4 +nbif_ccallemu4: + movq ARG3, P_ARG3(P) +#if NR_ARG_REGS > 4 + movq ARG4, ARG3 +#else + movq 8(NSP), ARG3 +#endif + /*FALLTHROUGH*/ +#endif + +#if NR_ARG_REGS >= 3 + .align 4 + .global nbif_ccallemu3 +nbif_ccallemu3: + movq ARG2, P_ARG2(P) +#if NR_ARG_REGS > 3 + movq ARG3, ARG2 +#else + movq 8(NSP), ARG2 +#endif + /*FALLTHROUGH*/ +#endif + +#if NR_ARG_REGS >= 2 + .align 4 + .global nbif_ccallemu2 +nbif_ccallemu2: + movq ARG1, P_ARG1(P) +#if NR_ARG_REGS > 2 + movq ARG2, ARG1 +#else + movq 8(NSP), ARG1 +#endif + /*FALLTHROUGH*/ +#endif + +#if NR_ARG_REGS >= 1 + .align 4 + .global nbif_ccallemu1 +nbif_ccallemu1: + movq ARG0, P_ARG0(P) +#if NR_ARG_REGS > 1 + movq ARG1, ARG0 +#else + movq 8(NSP), ARG0 +#endif + /*FALLTHROUGH*/ +#endif + + .align 4 + .global nbif_ccallemu0 +nbif_ccallemu0: + /* We use %rsi not ARG0 here because ARG0 is not + defined when NR_ARG_REGS == 0. */ +#if NR_ARG_REGS == 0 + movq 8(NSP), %rsi +#endif + movq %rsi, P_CLOSURE(P) + movl $HIPE_MODE_SWITCH_RES_CALL_CLOSURE, %eax + jmp .suspend_exit + +/* + * This is where native code suspends. + */ + .align 4 + .global nbif_suspend_0 +nbif_suspend_0: + movl $HIPE_MODE_SWITCH_RES_SUSPEND, %eax + jmp .suspend_exit + +/* + * Suspend from a receive (waiting for a message) + */ + .align 4 + .global nbif_suspend_msg +nbif_suspend_msg: + movl $HIPE_MODE_SWITCH_RES_WAIT, %eax + jmp .suspend_exit + +/* + * Suspend from a receive with a timeout (waiting for a message) + * if (!(p->flags & F_TIMO)) { suspend } + * else { return 0; } + */ + .align 4 + .global nbif_suspend_msg_timeout +nbif_suspend_msg_timeout: + movq P_FLAGS(P), %rax + /* this relies on F_TIMO (1<<2) fitting in a byte */ + testb $F_TIMO, %al # F_TIMO set? + jz .no_timeout # if not set, suspend + /* timeout has occurred */ + xorl %eax, %eax # return 0 to signal timeout + NSP_RET0 +.no_timeout: + movl $HIPE_MODE_SWITCH_RES_WAIT_TIMEOUT, %eax + jmp .suspend_exit + +/* + * int x86_return_to_native(Process *p); + * Emulated code returns to its native code caller. + */ + .align 4 + .global x86_return_to_native +x86_return_to_native: + ENTER_FROM_C + /* get return value */ + movq P_ARG0(P), %rax + /* + * Return using the stacked return address. + * The parameters were popped at the original native-to-emulated + * call (hipe_call_from_native_is_recursive), so a plain ret suffices. + */ + NSP_RET0 + +/* + * int x86_tailcall_to_native(Process *p); + * Emulated code tailcalls native code. + */ + .align 4 + .global x86_tailcall_to_native +x86_tailcall_to_native: + ENTER_FROM_C + /* get argument registers */ + LOAD_ARG_REGS + /* jump to the target label */ + jmp *P_NCALLEE(P) + +/* + * int x86_throw_to_native(Process *p); + * Emulated code throws an exception to its native code caller. + */ + .align 4 + .global x86_throw_to_native +x86_throw_to_native: + ENTER_FROM_C + /* invoke the handler */ + jmp *P_NCALLEE(P) # set by hipe_find_handler() + +/* + * This is the default exception handler for native code. + */ + .align 4 + .global nbif_fail +nbif_fail: + movl $HIPE_MODE_SWITCH_RES_THROW, %eax + jmp .flush_exit + + .global nbif_0_gc_after_bif + .global nbif_1_gc_after_bif + .global nbif_2_gc_after_bif + .global nbif_3_gc_after_bif + .align 4 +nbif_0_gc_after_bif: + xorl %edx, %edx + jmp .gc_after_bif + .align 4 +nbif_1_gc_after_bif: + movl $1, %edx + jmp .gc_after_bif + .align 4 +nbif_2_gc_after_bif: + movl $2, %edx + jmp .gc_after_bif + .align 4 +nbif_3_gc_after_bif: + movl $3, %edx + /*FALLTHROUGH*/ + .align 4 +.gc_after_bif: + movl %edx, P_NARITY(P) # Note: narity is a 32-bit field + subq $(16-8), %rsp + movq P, %rdi + movq %rax, %rsi + call erts_gc_after_bif_call + addq $(16-8), %rsp + movl $0, P_NARITY(P) # Note: narity is a 32-bit field + ret + +/* + * We end up here when a BIF called from native signals an + * exceptional condition. + * The stack/heap registers were just read from P. + */ + .global nbif_0_simple_exception + .global nbif_1_simple_exception + .global nbif_2_simple_exception + .global nbif_3_simple_exception + .align 4 +nbif_0_simple_exception: + xorl %eax, %eax + jmp .nbif_simple_exception + .align 4 +nbif_1_simple_exception: + movl $1, %eax + jmp .nbif_simple_exception + .align 4 +nbif_2_simple_exception: + movl $2, %eax + jmp .nbif_simple_exception + .align 4 +nbif_3_simple_exception: + movl $3, %eax + /*FALLTHROUGH*/ + .align 4 +.nbif_simple_exception: + cmpq $FREASON_TRAP, P_FREASON(P) + je .handle_trap + /* + * Find and invoke catch handler (it must exist). + * The stack/heap registers were just read from P. + * - %eax should contain the current call's arity + */ + movl %eax, P_NARITY(P) # Note: narity is a 32-bit field + /* find and prepare to invoke the handler */ + SWITCH_ERLANG_TO_C_QUICK # The cached state is clean and need not be saved. + movq P, %rdi + call hipe_handle_exception # Note: hipe_handle_exception() conses + SWITCH_C_TO_ERLANG # %rsp updated by hipe_find_handler() + /* now invoke the handler */ + jmp *P_NCALLEE(P) # set by hipe_find_handler() + + /* + * A BIF failed with freason TRAP: + * - the BIF's arity is in %rax + * - the native heap/stack/reds registers are saved in P + */ +.handle_trap: + movq %rax, P_NARITY(P) + movl $HIPE_MODE_SWITCH_RES_TRAP, %eax + jmp .nosave_exit + +/* + * nbif_stack_trap_ra: trap return address for maintaining + * the gray/white stack boundary + */ + .global nbif_stack_trap_ra + .align 4 +nbif_stack_trap_ra: # a return address, not a function + # This only handles a single return value. + # If we have more, we need to save them in the PCB. + movq %rax, TEMP_RV # save retval + SWITCH_ERLANG_TO_C_QUICK + movq P, %rdi + call hipe_handle_stack_trap # must not cons; preserves TEMP_RV + movq %rax, %rdx # original RA + SWITCH_C_TO_ERLANG_QUICK + movq TEMP_RV, %rax # restore retval + jmp *%rdx # resume at original RA + +/* + * nbif_inc_stack_0 + */ + .global nbif_inc_stack_0 + .align 4 +nbif_inc_stack_0: + SWITCH_ERLANG_TO_C_QUICK + STORE_ARG_REGS + movq P, %rdi + # hipe_inc_nstack reads and writes NSP and NSP_LIMIT, + # but does not access HP or FCALLS (or the non-amd64 NRA). + call hipe_inc_nstack + LOAD_ARG_REGS + SWITCH_C_TO_ERLANG_QUICK + NSP_RET0 + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/erts/emulator/hipe/hipe_amd64_glue.h b/erts/emulator/hipe/hipe_amd64_glue.h new file mode 100644 index 0000000000..c92eb842cb --- /dev/null +++ b/erts/emulator/hipe/hipe_amd64_glue.h @@ -0,0 +1,30 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2004-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* + * $Id$ + */ +#ifndef HIPE_AMD64_GLUE_H +#define HIPE_AMD64_GLUE_H + +#include "hipe_amd64_asm.h" /* for NR_ARG_REGS */ + +#define HIPE_X86_ASM_H +#include "hipe_x86_glue.h" + +#endif /* HIPE_AMD64_GLUE_H */ diff --git a/erts/emulator/hipe/hipe_amd64_primops.h b/erts/emulator/hipe/hipe_amd64_primops.h new file mode 100644 index 0000000000..dcfa8be92a --- /dev/null +++ b/erts/emulator/hipe/hipe_amd64_primops.h @@ -0,0 +1,23 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2005-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + */ +PRIMOP_LIST(am_inc_stack_0, &nbif_inc_stack_0) +PRIMOP_LIST(am_handle_fp_exception, &nbif_handle_fp_exception) +PRIMOP_LIST(am_sse2_fnegate_mask, &sse2_fnegate_mask) diff --git a/erts/emulator/hipe/hipe_arch.h b/erts/emulator/hipe/hipe_arch.h new file mode 100644 index 0000000000..7803543ef1 --- /dev/null +++ b/erts/emulator/hipe/hipe_arch.h @@ -0,0 +1,54 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2003-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + */ +#ifndef HIPE_ARCH_H +#define HIPE_ARCH_H + +extern const void *hipe_arch_primop_address(Eterm key); + +/* used by beam_load.c:patch(). patchtype == am_load_fe, Value is an ErlFunEntry* */ +extern void hipe_patch_address(Uint *address, Eterm patchtype, Uint value); +extern void hipe_patch_load_fe(Uint *address, Uint value); +extern int hipe_patch_insn(void *address, Uint value, Eterm type); +extern int hipe_patch_call(void *callAddress, void *destAddress, void *trampoline); + +extern void *hipe_make_native_stub(void *beamAddress, unsigned int beamArity); + +#if defined(__sparc__) +#include "hipe_sparc.h" +#endif +#if defined(__i386__) +#include "hipe_x86.h" +#endif +#if defined(__x86_64__) +#include "hipe_amd64.h" +#endif +#if defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__) +#include "hipe_ppc.h" +#endif +#if defined(__arm__) +#include "hipe_arm.h" +#endif + +#if !defined(AEXTERN) +#define AEXTERN(RET,NAME,PROTO) extern RET NAME PROTO +#endif + +#endif /* HIPE_ARCH_H */ diff --git a/erts/emulator/hipe/hipe_arm.c b/erts/emulator/hipe/hipe_arm.c new file mode 100644 index 0000000000..b70b32947b --- /dev/null +++ b/erts/emulator/hipe/hipe_arm.c @@ -0,0 +1,401 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2005-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + */ +#include <stddef.h> /* offsetof() */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#include "global.h" +#include "erl_binary.h" +#include <sys/mman.h> + +#include "hipe_arch.h" +#include "hipe_native_bif.h" /* nbif_callemu() */ +#include "hipe_bif0.h" + +/* Flush dcache and invalidate icache for a range of addresses. */ +void hipe_flush_icache_range(void *address, unsigned int nbytes) +{ +#if defined(__ARM_EABI__) + register unsigned long beg __asm__("r0") = (unsigned long)address; + register unsigned long end __asm__("r1") = (unsigned long)address + nbytes; + register unsigned long flg __asm__("r2") = 0; + register unsigned long scno __asm__("r7") = 0xf0002; + __asm__ __volatile__("swi 0" /* sys_cacheflush() */ + : "=r"(beg) + : "0"(beg), "r"(end), "r"(flg), "r"(scno)); +#else + register unsigned long beg __asm__("r0") = (unsigned long)address; + register unsigned long end __asm__("r1") = (unsigned long)address + nbytes; + register unsigned long flg __asm__("r2") = 0; + __asm__ __volatile__("swi 0x9f0002" /* sys_cacheflush() */ + : "=r"(beg) + : "0"(beg), "r"(end), "r"(flg)); +#endif +} + +void hipe_flush_icache_word(void *address) +{ + hipe_flush_icache_range(address, 4); +} + +/* + * Management of 32MB code segments for regular code and trampolines. + */ + +#define SEGMENT_NRBYTES (32*1024*1024) /* named constant, _not_ a tunable */ + +static struct segment { + unsigned int *base; /* [base,base+32MB[ */ + unsigned int *code_pos; /* INV: base <= code_pos <= tramp_pos */ + unsigned int *tramp_pos; /* INV: tramp_pos <= base+32MB */ + /* On ARM we always allocate a trampoline at base+32MB-8 for + nbif_callemu, so tramp_pos <= base+32MB-8. */ +} curseg; + +#define in_area(ptr,start,nbytes) \ + ((unsigned long)((char*)(ptr) - (char*)(start)) < (nbytes)) + +static void *new_code_mapping(void) +{ + return mmap(0, SEGMENT_NRBYTES, + PROT_EXEC|PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, + -1, 0); +} + +static int check_callees(Eterm callees) +{ + Eterm *tuple; + Uint arity; + Uint i; + + if (is_not_tuple(callees)) + return -1; + tuple = tuple_val(callees); + arity = arityval(tuple[0]); + for (i = 1; i <= arity; ++i) { + Eterm mfa = tuple[i]; + if (is_atom(mfa)) + continue; + if (is_not_tuple(mfa) || + tuple_val(mfa)[0] != make_arityval(3) || + is_not_atom(tuple_val(mfa)[1]) || + is_not_atom(tuple_val(mfa)[2]) || + is_not_small(tuple_val(mfa)[3]) || + unsigned_val(tuple_val(mfa)[3]) > 255) + return -1; + } + return arity; +} + +static unsigned int *try_alloc(Uint nrwords, int nrcallees, Eterm callees, unsigned int **trampvec) +{ + unsigned int *base, *address, *tramp_pos, nrfreewords; + int trampnr; + Eterm mfa, m, f; + unsigned int a, *trampoline; + + m = NIL; f = NIL; a = 0; /* silence stupid compiler warning */ + tramp_pos = curseg.tramp_pos; + address = curseg.code_pos; + nrfreewords = tramp_pos - address; + if (nrwords > nrfreewords) + return NULL; + curseg.code_pos = address + nrwords; + nrfreewords -= nrwords; + + base = curseg.base; + for (trampnr = 1; trampnr <= nrcallees; ++trampnr) { + mfa = tuple_val(callees)[trampnr]; + if (is_atom(mfa)) + trampoline = hipe_primop_get_trampoline(mfa); + else { + m = tuple_val(mfa)[1]; + f = tuple_val(mfa)[2]; + a = unsigned_val(tuple_val(mfa)[3]); + trampoline = hipe_mfa_get_trampoline(m, f, a); + } + if (!in_area(trampoline, base, SEGMENT_NRBYTES)) { + if (nrfreewords < 2) + return NULL; + nrfreewords -= 2; + tramp_pos = trampoline = tramp_pos - 2; + trampoline[0] = 0xE51FF004; /* ldr pc, [pc,#-4] */ + trampoline[1] = 0; /* callee's address */ + hipe_flush_icache_range(trampoline, 2*sizeof(int)); + if (is_atom(mfa)) + hipe_primop_set_trampoline(mfa, trampoline); + else + hipe_mfa_set_trampoline(m, f, a, trampoline); + } + trampvec[trampnr-1] = trampoline; + } + curseg.tramp_pos = tramp_pos; + return address; +} + +void *hipe_alloc_code(Uint nrbytes, Eterm callees, Eterm *trampolines, Process *p) +{ + Uint nrwords; + int nrcallees; + Eterm trampvecbin; + unsigned int **trampvec; + unsigned int *address; + unsigned int *base; + struct segment oldseg; + + if (nrbytes & 0x3) + return NULL; + nrwords = nrbytes >> 2; + + nrcallees = check_callees(callees); + if (nrcallees < 0) + return NULL; + trampvecbin = new_binary(p, NULL, nrcallees*sizeof(unsigned int*)); + trampvec = (unsigned int**)binary_bytes(trampvecbin); + + address = try_alloc(nrwords, nrcallees, callees, trampvec); + if (!address) { + base = new_code_mapping(); + if (base == MAP_FAILED) + return NULL; + oldseg = curseg; + curseg.base = base; + curseg.code_pos = base; + curseg.tramp_pos = (unsigned int*)((char*)base + SEGMENT_NRBYTES); +#if defined(__arm__) + curseg.tramp_pos -= 2; + curseg.tramp_pos[0] = 0xE51FF004; /* ldr pc, [pc,#-4] */ + curseg.tramp_pos[1] = (unsigned int)&nbif_callemu; +#endif + + address = try_alloc(nrwords, nrcallees, callees, trampvec); + if (!address) { + munmap(base, SEGMENT_NRBYTES); + curseg = oldseg; + return NULL; + } + /* commit to new segment, ignore leftover space in old segment */ + } + *trampolines = trampvecbin; + return address; +} + +static unsigned int *alloc_stub(Uint nrwords, unsigned int **tramp_callemu) +{ + unsigned int *address; + unsigned int *base; + struct segment oldseg; + + address = try_alloc(nrwords, 0, NIL, NULL); + if (!address) { + base = new_code_mapping(); + if (base == MAP_FAILED) + return NULL; + oldseg = curseg; + curseg.base = base; + curseg.code_pos = base; + curseg.tramp_pos = (unsigned int*)((char*)base + SEGMENT_NRBYTES); +#if defined(__arm__) + curseg.tramp_pos -= 2; + curseg.tramp_pos[0] = 0xE51FF004; /* ldr pc, [pc,#-4] */ + curseg.tramp_pos[1] = (unsigned int)&nbif_callemu; +#endif + + address = try_alloc(nrwords, 0, NIL, NULL); + if (!address) { + munmap(base, SEGMENT_NRBYTES); + curseg = oldseg; + return NULL; + } + /* commit to new segment, ignore leftover space in old segment */ + } + *tramp_callemu = (unsigned int*)((char*)curseg.base + SEGMENT_NRBYTES) - 2; + return address; +} + +/* + * ARMv5's support for 32-bit immediates is effectively non-existent. + * Hence, every 32-bit immediate is stored in memory and loaded via + * a PC-relative addressing mode. Relocation entries refer to those + * data words, NOT the load instructions, so patching is trivial. + */ +static void patch_imm32(Uint32 *address, unsigned int imm32) +{ + *address = imm32; + hipe_flush_icache_word(address); +} + +void hipe_patch_load_fe(Uint32 *address, Uint value) +{ + patch_imm32(address, value); +} + +int hipe_patch_insn(void *address, Uint32 value, Eterm type) +{ + switch (type) { + case am_closure: + case am_constant: + case am_atom: + case am_c_const: + break; + default: + return -1; + } + patch_imm32((Uint32*)address, value); + return 0; +} + +/* called from hipe_bif0.c:hipe_bifs_make_native_stub_2() + and hipe_bif0.c:hipe_make_stub() */ +void *hipe_make_native_stub(void *beamAddress, unsigned int beamArity) +{ + unsigned int *code; +#if defined(__arm__) + unsigned int *tramp_callemu; + int callemu_offset; +#endif + + /* + * Native code calls BEAM via a stub looking as follows: + * + * mov r0, #beamArity + * ldr r8, [pc,#0] // beamAddress + * b nbif_callemu + * .long beamAddress + * + * I'm using r0 and r8 since they aren't used for + * parameter passing in native code. The branch to + * nbif_callemu may need to go via a trampoline. + * (Trampolines are allowed to modify r12, but they don't.) + */ + +#if !defined(__arm__) + /* verify that 'ba' can reach nbif_callemu */ + if ((unsigned long)&nbif_callemu & ~0x01FFFFFCUL) + abort(); +#endif + +#if defined(__arm__) + code = alloc_stub(4, &tramp_callemu); + callemu_offset = ((int)&nbif_callemu - ((int)&code[2] + 8)) >> 2; + if (!(callemu_offset >= -0x00800000 && callemu_offset <= 0x007FFFFF)) { + callemu_offset = ((int)tramp_callemu - ((int)&code[2] + 8)) >> 2; + if (!(callemu_offset >= -0x00800000 && callemu_offset <= 0x007FFFFF)) + abort(); + } +#else + code = alloc_stub(4, &trampoline); +#endif + +#if defined(__arm__) + /* mov r0, #beamArity */ + code[0] = 0xE3A00000 | (beamArity & 0xFF); + /* ldr r8, [pc,#0] // beamAddress */ + code[1] = 0xE59F8000; + /* b nbif_callemu */ + code[2] = 0xEA000000 | (callemu_offset & 0x00FFFFFF); + /* .long beamAddress */ + code[3] = (unsigned int)beamAddress; +#else + /* addi r12,0,beamAddress@l */ + code[0] = 0x39800000 | ((unsigned long)beamAddress & 0xFFFF); + /* addi r0,0,beamArity */ + code[1] = 0x38000000 | (beamArity & 0x7FFF); + /* addis r12,r12,beamAddress@ha */ + code[2] = 0x3D8C0000 | at_ha((unsigned long)beamAddress); + /* ba nbif_callemu */ + code[3] = 0x48000002 | (unsigned long)&nbif_callemu; +#endif + + hipe_flush_icache_range(code, 4*sizeof(int)); + + return code; +} + +static void patch_b(Uint32 *address, Sint32 offset, Uint32 AA) +{ + Uint32 oldI = *address; +#if defined(__arm__) + Uint32 newI = (oldI & 0xFF000000) | (offset & 0x00FFFFFF); +#else + Uint32 newI = (oldI & 0xFC000001) | ((offset & 0x00FFFFFF) << 2) | (AA & 2); +#endif + *address = newI; + hipe_flush_icache_word(address); +} + +int hipe_patch_call(void *callAddress, void *destAddress, void *trampoline) +{ +#if !defined(__arm__) + if ((Uint32)destAddress == ((Uint32)destAddress & 0x01FFFFFC)) { + /* The destination is in the [0,32MB[ range. + We can reach it with a ba/bla instruction. + This is the typical case for BIFs and primops. + It's also common for trap-to-BEAM stubs (on ppc32). */ + patch_b((Uint32*)callAddress, (Uint32)destAddress >> 2, 2); + } else { +#endif +#if defined(__arm__) + Sint32 destOffset = ((Sint32)destAddress - ((Sint32)callAddress+8)) >> 2; +#else + Sint32 destOffset = ((Sint32)destAddress - (Sint32)callAddress) >> 2; +#endif + if (destOffset >= -0x800000 && destOffset <= 0x7FFFFF) { + /* The destination is within a [-32MB,+32MB[ range from us. + We can reach it with a b/bl instruction. + This is typical for nearby Erlang code. */ + patch_b((Uint32*)callAddress, destOffset, 0); + } else { + /* The destination is too distant for b/bl/ba/bla. + Must do a b/bl to the trampoline. */ +#if defined(__arm__) + Sint32 trampOffset = ((Sint32)trampoline - ((Sint32)callAddress+8)) >> 2; +#else + Sint32 trampOffset = ((Sint32)trampoline - (Sint32)callAddress) >> 2; +#endif + if (trampOffset >= -0x800000 && trampOffset <= 0x7FFFFF) { + /* Update the trampoline's address computation. + (May be redundant, but we can't tell.) */ +#if defined(__arm__) + patch_imm32((Uint32*)trampoline+1, (Uint32)destAddress); +#else + patch_li((Uint32*)trampoline, (Uint32)destAddress); +#endif + /* Update this call site. */ + patch_b((Uint32*)callAddress, trampOffset, 0); + } else + return -1; + } +#if !defined(__arm__) + } +#endif + return 0; +} + +void hipe_arch_print_pcb(struct hipe_process_state *p) +{ +#define U(n,x) \ + printf(" % 4d | %s | 0x%0*lx | %*s |\r\n", (int)offsetof(struct hipe_process_state,x), n, 2*(int)sizeof(long), (unsigned long)p->x, 2+2*(int)sizeof(long), "") + U("nra ", nra); + U("narity ", narity); +#undef U +} diff --git a/erts/emulator/hipe/hipe_arm.h b/erts/emulator/hipe/hipe_arm.h new file mode 100644 index 0000000000..84f58a681f --- /dev/null +++ b/erts/emulator/hipe/hipe_arm.h @@ -0,0 +1,47 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2005-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + */ +#ifndef HIPE_ARM_H +#define HIPE_ARM_H + +extern void hipe_flush_icache_word(void *address); +extern void hipe_flush_icache_range(void *address, unsigned int nbytes); + +/* for stack descriptor hash lookup */ +#define HIPE_RA_LSR_COUNT 2 /* low 2 bits are always zero */ + +/* for hipe_bifs_{read,write}_{s,u}32 */ +static __inline__ int hipe_word32_address_ok(void *address) +{ + return ((unsigned long)address & 0x3) == 0; +} + +/* Native stack growth direction. */ +#define HIPE_NSTACK_GROWS_DOWN + +#define hipe_arch_name am_arm + +extern void hipe_arm_inc_stack(void); + +/* for hipe_bifs_enter_code_2 */ +extern void *hipe_alloc_code(Uint nrbytes, Eterm callees, Eterm *trampolines, Process *p); +#define HIPE_ALLOC_CODE(n,c,t,p) hipe_alloc_code((n),(c),(t),(p)) + +#endif /* HIPE_ARM_H */ diff --git a/erts/emulator/hipe/hipe_arm.tab b/erts/emulator/hipe/hipe_arm.tab new file mode 100644 index 0000000000..81626796a7 --- /dev/null +++ b/erts/emulator/hipe/hipe_arm.tab @@ -0,0 +1,23 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 2005-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# +# $Id$ +# ARM-specific atoms and bifs + +atom arm +atom inc_stack_0 diff --git a/erts/emulator/hipe/hipe_arm_abi.txt b/erts/emulator/hipe/hipe_arm_abi.txt new file mode 100644 index 0000000000..6868704d62 --- /dev/null +++ b/erts/emulator/hipe/hipe_arm_abi.txt @@ -0,0 +1,95 @@ + + %CopyrightBegin% + %CopyrightEnd% + +$Id$ + +HiPE ARM ABI +================ +This document describes aspects of HiPE's runtime system +that are specific for the ARM architecture. + +Register Usage +-------------- +r13 is reserved for the C runtime system. +XXX: r10 should be reserved too if stack checking is enabled + +r9-r11 and r15 are fixed (unallocatable). +r9 (HP) is the current process' heap pointer. +r10 (NSP) is the current process' native stack pointer. +r11 (P) is the current process' "Process" pointer. +r15 (pc) is the program counter. + +r0-r8, r12, and r14 (lr) are caller-save. They are used as temporary +scratch registers and for function call parameters and results. + +The runtime system uses temporaries in specific contexts: +r8 (TEMP_LR) is used to preserve lr around BIF calls, +and to pass the callee address in native-to-BEAM traps. +r7 (TEMP_ARG0) is used to preserve the return value in nbif_stack_trap_ra, +and lr in hipe_arm_inc_stack (the caller saved its lr in TEMP_LR). +r1 (ARG0) is used for MBUF-after-BIF checks, for storing the +arity if a BIF that throws an exception or does GC due to MBUF, +and for checking P->flags for pending timeout. +r0 is used to inspect the type of a thrown exception, return a +result token from glue.S back to hipe_mode_switch(), and to pass +the callee arity in native-to-BEAM traps. + +Calling Convention +------------------ +The first NR_ARG_REGS parameters (a tunable parameter between 0 and 6, +inclusive) are passed in r1-r6. + +r0 is not used for parameter passing. This allows the BIF wrappers to +simply move P to r0 without shifting the remaining parameter registers. + +r12 is not used for parameter passing since it may be modified +during function linkage. + +r14 contains the return address during function calls. + +The return value from a function is placed in r0. + +Notes: +- We could pass more parameters in r7, r8, r0, and r12. However: + * distant call and trap-to-BEAM trampolines may need scratch registers + * using >6 argument registers complicates the mode-switch interface + (needs hacks and special-case optimisations) + * it is questionable whether using more than 6 improves performance; + it may be better to just cache more P state in registers + +Stack Frame Layout +------------------ +[From top to bottom: formals in left-to-right order, incoming return +address, fixed-size chunk for locals & spills, variable-size area +for actuals, outgoing return address. NSP normally points at the +bottom of the fixed-size chunk, except during a recursive call. +The callee pops the actuals, so no NSP adjustment at return.] + +Stack Descriptors +----------------- +sdesc_fsize() is the frame size excluding the return address word. + +Standard Linux ARM Calling Conventions +====================================== + +Reg Status Role +--- ------ ---- +r0-r3 calleR-save Argument/result/scratch registers. +r4-r8 calleE-save Local variables. +r9 calleE-save PIC base if PIC and stack checking are both enabled. + Otherwise a local variable. +r10 calleE-save (sl) Stack limit (fixed) if stack checking is enabled. + PIC base if PIC is enabled and stack checking is not. + Otherwise a local variable. +r11 calleE-save (fp) Local variable or frame pointer. +r12 calleR-save (ip) Scratch register, may be modified during + function linkage. +r13 calleE-save (sp) Stack pointer (fixed). Must be 4-byte aligned + at all times. Must be 8-byte aligned during transfers + to/from functions. +r14 calleR-save (lr) Link register or scratch variable. +r15 fixed (pc) Program counter. + +The stack grows from high to low addresses. +Excess parameters are stored on the stack, at SP+0 and up. diff --git a/erts/emulator/hipe/hipe_arm_asm.m4 b/erts/emulator/hipe/hipe_arm_asm.m4 new file mode 100644 index 0000000000..b9a696ffff --- /dev/null +++ b/erts/emulator/hipe/hipe_arm_asm.m4 @@ -0,0 +1,199 @@ +changecom(`/*', `*/')dnl +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2005-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* + * $Id$ + */ +`#ifndef HIPE_ARM_ASM_H +#define HIPE_ARM_ASM_H' + +/* + * Tunables. + */ +define(LEAF_WORDS,16)dnl number of stack words for leaf functions +define(NR_ARG_REGS,3)dnl admissible values are 0 to 6, inclusive + +`#define ARM_LEAF_WORDS 'LEAF_WORDS + +/* + * Reserved registers. + */ +`#define P r11' +`#define NSP r10' +`#define HP r9' +`#define TEMP_LR r8' + +/* + * Context switching macros. + * + * RESTORE_CONTEXT and RESTORE_CONTEXT_QUICK do not affect + * the condition register. + */ +`#define SAVE_CONTEXT_QUICK \ + mov TEMP_LR, lr' + +`#define RESTORE_CONTEXT_QUICK \ + mov lr, TEMP_LR' + +`#define SAVE_CACHED_STATE \ + str HP, [P, #P_HP]; \ + str NSP, [P, #P_NSP]' + +`#define RESTORE_CACHED_STATE \ + ldr HP, [P, #P_HP]; \ + ldr NSP, [P, #P_NSP]' + +`#define SAVE_CONTEXT_BIF \ + mov TEMP_LR, lr; \ + str HP, [P, #P_HP]' + +`#define RESTORE_CONTEXT_BIF \ + ldr HP, [P, #P_HP]' + +`#define SAVE_CONTEXT_GC \ + mov TEMP_LR, lr; \ + str lr, [P, #P_NRA]; \ + str NSP, [P, #P_NSP]; \ + str HP, [P, #P_HP]' + +`#define RESTORE_CONTEXT_GC \ + ldr HP, [P, #P_HP]' + +/* + * Argument (parameter) registers. + */ +`#define ARM_NR_ARG_REGS 'NR_ARG_REGS +`#define NR_ARG_REGS 'NR_ARG_REGS + +define(defarg,`define(ARG$1,`$2')dnl +#`define ARG'$1 $2' +)dnl + +ifelse(eval(NR_ARG_REGS >= 1),0,, +`defarg(0,`r1')')dnl +ifelse(eval(NR_ARG_REGS >= 2),0,, +`defarg(1,`r2')')dnl +ifelse(eval(NR_ARG_REGS >= 3),0,, +`defarg(2,`r3')')dnl +ifelse(eval(NR_ARG_REGS >= 4),0,, +`defarg(3,`r4')')dnl +ifelse(eval(NR_ARG_REGS >= 5),0,, +`defarg(4,`r5')')dnl +ifelse(eval(NR_ARG_REGS >= 6),0,, +`defarg(5,`r6')')dnl + +/* + * TEMP_ARG0: + * Used in nbif_stack_trap_ra to preserve the return value. + * Must be a C callee-save register. + * Must be otherwise unused in the return path. + * + * TEMP_ARG0: + * Used in hipe_arm_inc_stack to preserve the return address + * (TEMP_LR contains the caller's saved return address). + * Must be a C callee-save register. + * Must be otherwise unused in the call path. + */ +`#define TEMP_ARG0 r7' + +dnl XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +dnl X X +dnl X hipe_arm_glue.S support X +dnl X X +dnl XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX + +dnl +dnl LOAD_ARG_REGS +dnl +define(LAR_1,`ldr ARG$1, [P, #P_ARG$1] ; ')dnl +define(LAR_N,`ifelse(eval($1 >= 0),0,,`LAR_N(eval($1-1))LAR_1($1)')')dnl +define(LOAD_ARG_REGS,`LAR_N(eval(NR_ARG_REGS-1))')dnl +`#define LOAD_ARG_REGS 'LOAD_ARG_REGS + +dnl +dnl STORE_ARG_REGS +dnl +define(SAR_1,`str ARG$1, [P, #P_ARG$1] ; ')dnl +define(SAR_N,`ifelse(eval($1 >= 0),0,,`SAR_N(eval($1-1))SAR_1($1)')')dnl +define(STORE_ARG_REGS,`SAR_N(eval(NR_ARG_REGS-1))')dnl +`#define STORE_ARG_REGS 'STORE_ARG_REGS + +dnl XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +dnl X X +dnl X hipe_arm_bifs.m4 support X +dnl X X +dnl XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX + +dnl +dnl NBIF_ARG(DST,ARITY,ARGNO) +dnl Access a formal parameter. +dnl It will be a memory load via NSP when ARGNO >= NR_ARG_REGS. +dnl It will be a register move when 0 <= ARGNO < NR_ARG_REGS; if +dnl the source and destination are the same, the move is suppressed. +dnl +define(NBIF_MOVE_REG,`ifelse($1,$2,`# mov $1, $2',`mov $1, $2')')dnl +define(NBIF_REG_ARG,`NBIF_MOVE_REG($1,ARG$2)')dnl +define(NBIF_STK_LOAD,`ldr $1, [NSP, #$2]')dnl +define(NBIF_STK_ARG,`NBIF_STK_LOAD($1,eval(4*(($2-$3)-1)))')dnl +define(NBIF_ARG,`ifelse(eval($3 >= NR_ARG_REGS),0,`NBIF_REG_ARG($1,$3)',`NBIF_STK_ARG($1,$2,$3)')')dnl +`/* #define NBIF_ARG_1_0 'NBIF_ARG(r1,1,0)` */' +`/* #define NBIF_ARG_2_0 'NBIF_ARG(r1,2,0)` */' +`/* #define NBIF_ARG_2_1 'NBIF_ARG(r2,2,1)` */' +`/* #define NBIF_ARG_3_0 'NBIF_ARG(r1,3,0)` */' +`/* #define NBIF_ARG_3_1 'NBIF_ARG(r2,3,1)` */' +`/* #define NBIF_ARG_3_2 'NBIF_ARG(r3,3,2)` */' +`/* #define NBIF_ARG_5_0 'NBIF_ARG(r1,5,0)` */' +`/* #define NBIF_ARG_5_1 'NBIF_ARG(r2,5,1)` */' +`/* #define NBIF_ARG_5_2 'NBIF_ARG(r3,5,2)` */' +`/* #define NBIF_ARG_5_3 'NBIF_ARG(r4,5,3)` */' +`/* #define NBIF_ARG_5_4 'NBIF_ARG(r5,5,4)` */' + +dnl +dnl NBIF_RET(ARITY) +dnl Generates a return from a native BIF, taking care to pop +dnl any stacked formal parameters. +dnl May only be used in BIF/primop wrappers where SAVE_CONTEXT +dnl has saved LR in TEMP_LR. +dnl +define(NSP_RETN,`add NSP, NSP, #$1 + mov pc, TEMP_LR')dnl +define(NSP_RET0,`mov pc, TEMP_LR')dnl +define(RET_POP,`ifelse(eval($1 > NR_ARG_REGS),0,0,eval(4*($1 - NR_ARG_REGS)))')dnl +define(NBIF_RET_N,`ifelse(eval($1),0,`NSP_RET0',`NSP_RETN($1)')')dnl +define(NBIF_RET,`NBIF_RET_N(eval(RET_POP($1)))')dnl +`/* #define NBIF_RET_0 'NBIF_RET(0)` */' +`/* #define NBIF_RET_1 'NBIF_RET(1)` */' +`/* #define NBIF_RET_2 'NBIF_RET(2)` */' +`/* #define NBIF_RET_3 'NBIF_RET(3)` */' +`/* #define NBIF_RET_5 'NBIF_RET(5)` */' + +dnl +dnl QUICK_CALL_RET(CFUN,ARITY) +dnl Used in nocons_nofail and noproc primop interfaces to optimise +dnl SAVE_CONTEXT_QUICK; bl CFUN; RESTORE_CONTEXT_QUICK; NBIF_RET(ARITY). +dnl +define(NBIF_POP_N,`ifelse(eval($1),0,`',`add NSP, NSP, #$1 ; ')')dnl +define(QUICK_CALL_RET,`NBIF_POP_N(eval(RET_POP($2)))b $1')dnl +`/* #define QUICK_CALL_RET_F_0 'QUICK_CALL_RET(F,0)` */' +`/* #define QUICK_CALL_RET_F_1 'QUICK_CALL_RET(F,1)` */' +`/* #define QUICK_CALL_RET_F_2 'QUICK_CALL_RET(F,2)` */' +`/* #define QUICK_CALL_RET_F_3 'QUICK_CALL_RET(F,3)` */' +`/* #define QUICK_CALL_RET_F_5 'QUICK_CALL_RET(F,5)` */' + +`#endif /* HIPE_ARM_ASM_H */' diff --git a/erts/emulator/hipe/hipe_arm_bifs.m4 b/erts/emulator/hipe/hipe_arm_bifs.m4 new file mode 100644 index 0000000000..4d8636e711 --- /dev/null +++ b/erts/emulator/hipe/hipe_arm_bifs.m4 @@ -0,0 +1,549 @@ +changecom(`/*', `*/')dnl +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2005-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* + * $Id$ + */ + +include(`hipe/hipe_arm_asm.m4') +#`include' "hipe_literals.h" + + .text + .p2align 2 + +`#define JOIN3(A,B,C) A##B##C +#define TEST_GOT_MBUF(ARITY) ldr r1, [P, #P_MBUF]; cmp r1, #0; blne JOIN3(nbif_,ARITY,_gc_after_bif)' + +/* + * standard_bif_interface_1(nbif_name, cbif_name) + * standard_bif_interface_2(nbif_name, cbif_name) + * standard_bif_interface_3(nbif_name, cbif_name) + * + * Generate native interface for a BIF with 1-3 parameters and + * standard failure mode. + */ +define(standard_bif_interface_1, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + mov r0, P + NBIF_ARG(r1,1,0) + + /* Save caller-save registers and call the C function. */ + SAVE_CONTEXT_BIF + bl $2 + TEST_GOT_MBUF(1) + + /* Restore registers. Check for exception. */ + cmp r0, #THE_NON_VALUE + RESTORE_CONTEXT_BIF + beq nbif_1_simple_exception + NBIF_RET(1) + .size $1, .-$1 + .type $1, %function +#endif') + +define(standard_bif_interface_2, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + mov r0, P + NBIF_ARG(r1,2,0) + NBIF_ARG(r2,2,1) + + /* Save caller-save registers and call the C function. */ + SAVE_CONTEXT_BIF + bl $2 + TEST_GOT_MBUF(2) + + /* Restore registers. Check for exception. */ + cmp r0, #THE_NON_VALUE + RESTORE_CONTEXT_BIF + beq nbif_2_simple_exception + NBIF_RET(2) + .size $1, .-$1 + .type $1, %function +#endif') + +define(standard_bif_interface_3, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + mov r0, P + NBIF_ARG(r1,3,0) + NBIF_ARG(r2,3,1) + NBIF_ARG(r3,3,2) + + /* Save caller-save registers and call the C function. */ + SAVE_CONTEXT_BIF + bl $2 + TEST_GOT_MBUF(3) + + /* Restore registers. Check for exception. */ + cmp r0, #THE_NON_VALUE + RESTORE_CONTEXT_BIF + beq nbif_3_simple_exception + NBIF_RET(3) + .size $1, .-$1 + .type $1, %function +#endif') + +/* + * fail_bif_interface_0(nbif_name, cbif_name) + * + * Generate native interface for a BIF with 0 parameters and + * standard failure mode. + */ +define(fail_bif_interface_0, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + mov r0, P + + /* Save caller-save registers and call the C function. */ + SAVE_CONTEXT_BIF + bl $2 + TEST_GOT_MBUF(0) + + /* Restore registers. Check for exception. */ + cmp r0, #THE_NON_VALUE + RESTORE_CONTEXT_BIF + beq nbif_0_simple_exception + NBIF_RET(0) + .size $1, .-$1 + .type $1, %function +#endif') + +/* + * gc_bif_interface_0(nbif_name, cbif_name) + * gc_bif_interface_1(nbif_name, cbif_name) + * gc_bif_interface_2(nbif_name, cbif_name) + * + * Generate native interface for a BIF with 0-2 parameters and + * standard failure mode. + * The BIF may do a GC. + */ +define(gc_bif_interface_0, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + mov r0, P + + /* Save caller-save registers and call the C function. */ + SAVE_CONTEXT_GC + bl $2 + TEST_GOT_MBUF(0) + + /* Restore registers. */ + RESTORE_CONTEXT_GC + NBIF_RET(0) + .size $1, .-$1 + .type $1, %function +#endif') + +define(gc_bif_interface_1, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + mov r0, P + NBIF_ARG(r1,1,0) + + /* Save caller-save registers and call the C function. */ + SAVE_CONTEXT_GC + bl $2 + TEST_GOT_MBUF(1) + + /* Restore registers. Check for exception. */ + cmp r0, #THE_NON_VALUE + RESTORE_CONTEXT_GC + beq nbif_1_simple_exception + NBIF_RET(1) + .size $1, .-$1 + .type $1, %function +#endif') + +define(gc_bif_interface_2, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + mov r0, P + NBIF_ARG(r1,2,0) + NBIF_ARG(r2,2,1) + + /* Save caller-save registers and call the C function. */ + SAVE_CONTEXT_GC + bl $2 + TEST_GOT_MBUF(2) + + /* Restore registers. Check for exception. */ + cmp r0, #THE_NON_VALUE + RESTORE_CONTEXT_GC + beq nbif_2_simple_exception + NBIF_RET(2) + .size $1, .-$1 + .type $1, %function +#endif') + +/* + * gc_nofail_primop_interface_1(nbif_name, cbif_name) + * + * Generate native interface for a primop with implicit P + * parameter, 1 ordinary parameter and no failure mode. + * The primop may do a GC. + */ +define(gc_nofail_primop_interface_1, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + mov r0, P + NBIF_ARG(r1,1,0) + + /* Save caller-save registers and call the C function. */ + SAVE_CONTEXT_GC + bl $2 + + /* Restore registers. */ + RESTORE_CONTEXT_GC + NBIF_RET(1) + .size $1, .-$1 + .type $1, %function +#endif') + +/* + * nofail_primop_interface_0(nbif_name, cbif_name) + * nofail_primop_interface_1(nbif_name, cbif_name) + * nofail_primop_interface_2(nbif_name, cbif_name) + * nofail_primop_interface_3(nbif_name, cbif_name) + * + * Generate native interface for a primop with implicit P + * parameter, 0-3 ordinary parameters and no failure mode. + * Also used for guard BIFs. + */ +define(nofail_primop_interface_0, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + mov r0, P + + /* Save caller-save registers and call the C function. */ + SAVE_CONTEXT_BIF + bl $2 + TEST_GOT_MBUF(0) + + /* Restore registers. */ + RESTORE_CONTEXT_BIF + NBIF_RET(0) + .size $1, .-$1 + .type $1, %function +#endif') + +define(nofail_primop_interface_1, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + mov r0, P + NBIF_ARG(r1,1,0) + + /* Save caller-save registers and call the C function. */ + SAVE_CONTEXT_BIF + bl $2 + TEST_GOT_MBUF(1) + + /* Restore registers. */ + RESTORE_CONTEXT_BIF + NBIF_RET(1) + .size $1, .-$1 + .type $1, %function +#endif') + +define(nofail_primop_interface_2, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + mov r0, P + NBIF_ARG(r1,2,0) + NBIF_ARG(r2,2,1) + + /* Save caller-save registers and call the C function. */ + SAVE_CONTEXT_BIF + bl $2 + TEST_GOT_MBUF(2) + + /* Restore registers. */ + RESTORE_CONTEXT_BIF + NBIF_RET(2) + .size $1, .-$1 + .type $1, %function +#endif') + +define(nofail_primop_interface_3, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + mov r0, P + NBIF_ARG(r1,3,0) + NBIF_ARG(r2,3,1) + NBIF_ARG(r3,3,2) + + /* Save caller-save registers and call the C function. */ + SAVE_CONTEXT_BIF + bl $2 + TEST_GOT_MBUF(3) + + /* Restore registers. */ + RESTORE_CONTEXT_BIF + NBIF_RET(3) + .size $1, .-$1 + .type $1, %function +#endif') + +/* + * nocons_nofail_primop_interface_0(nbif_name, cbif_name) + * nocons_nofail_primop_interface_1(nbif_name, cbif_name) + * nocons_nofail_primop_interface_2(nbif_name, cbif_name) + * nocons_nofail_primop_interface_3(nbif_name, cbif_name) + * nocons_nofail_primop_interface_5(nbif_name, cbif_name) + * + * Generate native interface for a primop with implicit P + * parameter, 0-3 or 5 ordinary parameters, and no failure mode. + * The primop cannot CONS or gc. + */ +define(nocons_nofail_primop_interface_0, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + mov r0, P + + /* Perform a quick save;call;restore;ret sequence. */ + QUICK_CALL_RET($2,0) + .size $1, .-$1 + .type $1, %function +#endif') + +define(nocons_nofail_primop_interface_1, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + mov r0, P + NBIF_ARG(r1,1,0) + + /* Perform a quick save;call;restore;ret sequence. */ + QUICK_CALL_RET($2,1) + .size $1, .-$1 + .type $1, %function +#endif') + +define(nocons_nofail_primop_interface_2, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + mov r0, P + NBIF_ARG(r1,2,0) + NBIF_ARG(r2,2,1) + + /* Perform a quick save;call;restore;ret sequence. */ + QUICK_CALL_RET($2,2) + .size $1, .-$1 + .type $1, %function +#endif') + +define(nocons_nofail_primop_interface_3, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + mov r0, P + NBIF_ARG(r1,3,0) + NBIF_ARG(r2,3,1) + NBIF_ARG(r3,3,2) + + /* Perform a quick save;call;restore;ret sequence. */ + QUICK_CALL_RET($2,3) + .size $1, .-$1 + .type $1, %function +#endif') + +define(nocons_nofail_primop_interface_5, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument stack. */ + NBIF_ARG(r0,5,3) + str r0, [sp, #0] + NBIF_ARG(r0,5,4) + str r0, [sp, #4] + + /* Set up C argument registers. */ + mov r0, P + NBIF_ARG(r1,5,0) + NBIF_ARG(r2,5,1) + NBIF_ARG(r3,5,2) + + /* Perform a quick save;call;restore;ret sequence. */ + QUICK_CALL_RET($2,5) + .size $1, .-$1 + .type $1, %function +#endif') + +/* + * noproc_primop_interface_0(nbif_name, cbif_name) + * noproc_primop_interface_1(nbif_name, cbif_name) + * noproc_primop_interface_2(nbif_name, cbif_name) + * noproc_primop_interface_3(nbif_name, cbif_name) + * noproc_primop_interface_5(nbif_name, cbif_name) + * + * Generate native interface for a primop with no implicit P + * parameter, 0-3 or 5 ordinary parameters, and no failure mode. + * The primop cannot CONS or gc. + */ +define(noproc_primop_interface_0, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* XXX: this case is always trivial; how to suppress the branch? */ + /* Perform a quick save;call;restore;ret sequence. */ + QUICK_CALL_RET($2,0) + .size $1, .-$1 + .type $1, %function +#endif') + +define(noproc_primop_interface_1, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + NBIF_ARG(r0,1,0) + + /* Perform a quick save;call;restore;ret sequence. */ + QUICK_CALL_RET($2,1) + .size $1, .-$1 + .type $1, %function +#endif') + +define(noproc_primop_interface_2, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + NBIF_ARG(r0,2,0) + NBIF_ARG(r1,2,1) + + /* Perform a quick save;call;restore;ret sequence. */ + QUICK_CALL_RET($2,2) + .size $1, .-$1 + .type $1, %function +#endif') + +define(noproc_primop_interface_3, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + NBIF_ARG(r0,3,0) + NBIF_ARG(r1,3,1) + NBIF_ARG(r2,3,2) + + /* Perform a quick save;call;restore;ret sequence. */ + QUICK_CALL_RET($2,3) + .size $1, .-$1 + .type $1, %function +#endif') + +define(noproc_primop_interface_5, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + NBIF_ARG(r0,5,0) + NBIF_ARG(r1,5,1) + NBIF_ARG(r2,5,2) + NBIF_ARG(r3,5,3) + NBIF_ARG(r4,5,4) + str r4, [sp, #0] + + /* Perform a quick save;call;restore;ret sequence. */ + QUICK_CALL_RET($2,5) + .size $1, .-$1 + .type $1, %function +#endif') + +include(`hipe/hipe_bif_list.m4') + +`#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif' diff --git a/erts/emulator/hipe/hipe_arm_gc.h b/erts/emulator/hipe/hipe_arm_gc.h new file mode 100644 index 0000000000..a2a919e3d7 --- /dev/null +++ b/erts/emulator/hipe/hipe_arm_gc.h @@ -0,0 +1,29 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2005-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + * Stack walking helpers for native stack GC procedures. + * ARM version. + */ +#ifndef HIPE_ARM_GC_H +#define HIPE_ARM_GC_H + +#include "hipe_arm_asm.h" /* for NR_ARG_REGS */ +#include "hipe_risc_gc.h" + +#endif /* HIPE_ARM_GC_H */ diff --git a/erts/emulator/hipe/hipe_arm_glue.S b/erts/emulator/hipe/hipe_arm_glue.S new file mode 100644 index 0000000000..5d626a5f69 --- /dev/null +++ b/erts/emulator/hipe/hipe_arm_glue.S @@ -0,0 +1,417 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2005-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + */ +#include "hipe_arm_asm.h" +#include "hipe_literals.h" +#define ASM +#include "hipe_mode_switch.h" + + .text + .p2align 2 + +/* + * Enter Erlang from C. + * Create a new frame on the C stack. + * Save C callee-save registers in the frame. + * Do not clobber the C argument registers. + * Retrieve the process pointer from the C argument registers. + * + * Our C frame includes: + * - 9*4 == 36 bytes for saving r4-r11 and lr + * - 2*4 == 8 bytes for calls to hipe_bs_put_{big_integer,small_float}. + * They take 5-6 parameter words: 4 in registers and 1-2 on the stack. + * (They take 5 regular parameters, and an additional P parameter on SMP.) + * - 4 bytes to pad the frame size to a multiple of 8 + */ +#define ENTER_FROM_C \ + stmfd sp!, {r4,r5,r6,r7,r8,r9,r10,r11,lr}; \ + sub sp, sp, #12; \ + mov P, r0; \ + RESTORE_CACHED_STATE + +/* + * Return to the calling C function. + * The return value is in r0. + * + * .nosave_exit saves no state + * .flush_exit saves NSP and other cached P state. + * .suspend_exit also saves RA. + */ +.suspend_exit: + /* save RA, so we can be resumed */ + str lr, [P, #P_NRA] +.flush_exit: + /* flush cached P state */ + SAVE_CACHED_STATE +.nosave_exit: + /* restore callee-save registers, drop frame, return */ + add sp, sp, #12 + ldmfd sp!, {r4,r5,r6,r7,r8,r9,r10,r11,pc} + +/* + * int hipe_arm_call_to_native(Process *p); + * Emulated code recursively calls native code. + */ + .global hipe_arm_call_to_native +hipe_arm_call_to_native: + ENTER_FROM_C + /* get argument registers */ + LOAD_ARG_REGS + /* call the target */ + mov lr, pc + ldr pc, [P, #P_NCALLEE] +/* FALLTHROUGH + * + * We export this return address so that hipe_mode_switch() can discover + * when native code tailcalls emulated code. + * + * This is where native code returns to emulated code. + */ + .global nbif_return +nbif_return: + str r0, [P, #P_ARG0] /* save retval */ + mov r0, #HIPE_MODE_SWITCH_RES_RETURN + b .flush_exit + +/* + * int hipe_arm_return_to_native(Process *p); + * Emulated code returns to its native code caller. + */ + .global hipe_arm_return_to_native +hipe_arm_return_to_native: + ENTER_FROM_C + /* get return value */ + ldr r0, [P, #P_ARG0] + /* + * Return using the current return address. + * The parameters were popped at the original native-to-emulated + * call (hipe_call_from_native_is_recursive), so a plain ret suffices. + */ + ldr pc, [P, #P_NRA] + +/* + * int hipe_arm_tailcall_to_native(Process *p); + * Emulated code tailcalls native code. + */ + .global hipe_arm_tailcall_to_native +hipe_arm_tailcall_to_native: + ENTER_FROM_C + /* get argument registers */ + LOAD_ARG_REGS + /* restore return address */ + ldr lr, [P, #P_NRA] + /* call the target */ + ldr pc, [P, #P_NCALLEE] + +/* + * int hipe_arm_throw_to_native(Process *p); + * Emulated code throws an exception to its native code caller. + */ + .global hipe_arm_throw_to_native +hipe_arm_throw_to_native: + ENTER_FROM_C + /* invoke the handler */ + ldr pc, [P, #P_NCALLEE] /* set by hipe_find_handler() */ + +/* + * Native code calls emulated code via a stub + * which should look as follows: + * + * stub for f/N: + * <set r8 to f's BEAM code address> + * <set r0 to N> + * b nbif_callemu + * + * XXX: Different stubs for different number of register parameters? + */ + .global nbif_callemu +nbif_callemu: + str r8, [P, #P_BEAM_IP] + str r0, [P, #P_ARITY] + STORE_ARG_REGS + mov r0, #HIPE_MODE_SWITCH_RES_CALL + b .suspend_exit + +/* + * nbif_apply + */ + .global nbif_apply +nbif_apply: + STORE_ARG_REGS + mov r0, #HIPE_MODE_SWITCH_RES_APPLY + b .suspend_exit + +/* + * Native code calls an emulated-mode closure via a stub defined below. + * + * The closure is appended as the last actual parameter, and parameters + * beyond the first few passed in registers are pushed onto the stack in + * left-to-right order. + * Hence, the location of the closure parameter only depends on the number + * of parameters in registers, not the total number of parameters. + */ +#if NR_ARG_REGS >= 6 + .global nbif_ccallemu6 +nbif_ccallemu6: + str ARG5, [P, #P_ARG5] +#if NR_ARG_REGS > 6 + mov ARG5, ARG6 +#else + ldr ARG5, [NSP, #0] +#endif + /*FALLTHROUGH*/ +#endif + +#if NR_ARG_REGS >= 5 + .global nbif_ccallemu5 +nbif_ccallemu5: + str ARG4, [P, #P_ARG4] +#if NR_ARG_REGS > 5 + mov ARG4, ARG5 +#else + ldr ARG4, [NSP, #0] +#endif + /*FALLTHROUGH*/ +#endif + +#if NR_ARG_REGS >= 4 + .global nbif_ccallemu4 +nbif_ccallemu4: + str ARG3, [P, #P_ARG3] +#if NR_ARG_REGS > 4 + mov ARG3, ARG4 +#else + ldr ARG3, [NSP, #0] +#endif + /*FALLTHROUGH*/ +#endif + +#if NR_ARG_REGS >= 3 + .global nbif_ccallemu3 +nbif_ccallemu3: + str ARG2, [P, #P_ARG2] +#if NR_ARG_REGS > 3 + mov ARG2, ARG3 +#else + ldr ARG2, [NSP, #0] +#endif + /*FALLTHROUGH*/ +#endif + +#if NR_ARG_REGS >= 2 + .global nbif_ccallemu2 +nbif_ccallemu2: + str ARG1, [P, #P_ARG1] +#if NR_ARG_REGS > 2 + mov ARG1, ARG2 +#else + ldr ARG1, [NSP, #0] +#endif + /*FALLTHROUGH*/ +#endif + +#if NR_ARG_REGS >= 1 + .global nbif_ccallemu1 +nbif_ccallemu1: + str ARG0, [P, #P_ARG0] +#if NR_ARG_REGS > 1 + mov ARG0, ARG1 +#else + ldr ARG0, [NSP, #0] +#endif + /*FALLTHROUGH*/ +#endif + + .global nbif_ccallemu0 +nbif_ccallemu0: + /* We use r1 not ARG0 here because ARG0 is not + defined when NR_ARG_REGS == 0. */ +#if NR_ARG_REGS == 0 + ldr r1, [NSP, #0] /* get the closure */ +#endif + str r1, [P, #P_CLOSURE] /* save the closure */ + mov r0, #HIPE_MODE_SWITCH_RES_CALL_CLOSURE + b .suspend_exit + +/* + * This is where native code suspends. + */ + .global nbif_suspend_0 +nbif_suspend_0: + mov r0, #HIPE_MODE_SWITCH_RES_SUSPEND + b .suspend_exit + +/* + * Suspend from a receive (waiting for a message) + */ + .global nbif_suspend_msg +nbif_suspend_msg: + mov r0, #HIPE_MODE_SWITCH_RES_WAIT + b .suspend_exit + +/* + * Suspend from a receive with a timeout (waiting for a message) + * if (!(p->flags & F_TIMO)) { suspend } + * else { return 0; } + */ + .global nbif_suspend_msg_timeout +nbif_suspend_msg_timeout: + ldr r1, [P, #P_FLAGS] + mov r0, #HIPE_MODE_SWITCH_RES_WAIT_TIMEOUT + /* this relies on F_TIMO (1<<2) fitting in a uimm16 */ + tst r1, #F_TIMO + beq .suspend_exit + /* timeout has occurred */ + mov r0, #0 + mov pc, lr + +/* + * This is the default exception handler for native code. + */ + .global nbif_fail +nbif_fail: + mov r0, #HIPE_MODE_SWITCH_RES_THROW + b .flush_exit /* no need to save RA */ + + .global nbif_0_gc_after_bif + .global nbif_1_gc_after_bif + .global nbif_2_gc_after_bif + .global nbif_3_gc_after_bif +nbif_0_gc_after_bif: + mov r1, #0 + b .gc_after_bif +nbif_1_gc_after_bif: + mov r1, #1 + b .gc_after_bif +nbif_2_gc_after_bif: + mov r1, #2 + b .gc_after_bif +nbif_3_gc_after_bif: + mov r1, #3 + /*FALLTHROUGH*/ +.gc_after_bif: + str r1, [P, #P_NARITY] + str TEMP_LR, [P, #P_NRA] + str NSP, [P, #P_NSP] + mov TEMP_LR, lr + mov r1, r0 + mov r0, P + bl erts_gc_after_bif_call + mov lr, TEMP_LR + ldr TEMP_LR, [P, #P_NRA] + mov r1, #0 + str r1, [P, #P_NARITY] + mov pc, lr + +/* + * We end up here when a BIF called from native signals an + * exceptional condition. + * HP was just read from P. + * NSP has not been saved in P. + * TEMP_LR contains a copy of LR + */ + .global nbif_0_simple_exception +nbif_0_simple_exception: + mov r1, #0 + b .nbif_simple_exception + .global nbif_1_simple_exception +nbif_1_simple_exception: + mov r1, #1 + b .nbif_simple_exception + .global nbif_2_simple_exception +nbif_2_simple_exception: + mov r1, #2 + b .nbif_simple_exception + .global nbif_3_simple_exception +nbif_3_simple_exception: + mov r1, #3 + /*FALLTHROUGH*/ +.nbif_simple_exception: + ldr r0, [P, #P_FREASON] + cmp r0, #FREASON_TRAP + beq .handle_trap + /* + * Find and invoke catch handler (it must exist). + * HP was just read from P. + * NSP has not been saved in P. + * TEMP_LR should contain the current call's return address. + * r1 should contain the current call's arity. + */ + str NSP, [P, #P_NSP] + str TEMP_LR, [P, #P_NRA] + str r1, [P, #P_NARITY] + /* find and prepare to invoke the handler */ + mov r0, P + bl hipe_handle_exception /* Note: hipe_handle_exception() conses */ + RESTORE_CACHED_STATE /* NSP updated by hipe_find_handler() */ + /* now invoke the handler */ + ldr pc, [P, #P_NCALLEE] /* set by hipe_find_handler() */ + + /* + * A BIF failed with freason TRAP: + * - the BIF's arity is in r1 + * - the native RA was saved in TEMP_LR before the BIF call + * - HP was just read from P + * - NSP has not been saved in P + */ +.handle_trap: + mov r0, #HIPE_MODE_SWITCH_RES_TRAP + str NSP, [P, #P_NSP] + str r1, [P, #P_NARITY] + str TEMP_LR, [P, #P_NRA] + b .nosave_exit + +/* + * nbif_stack_trap_ra: trap return address for maintaining + * the gray/white stack boundary + */ + .global nbif_stack_trap_ra +nbif_stack_trap_ra: /* a return address, not a function */ + # This only handles a single return value. + # If we have more, we need to save them in the PCB. + mov TEMP_ARG0, r0 /* save retval */ + str NSP, [P, #P_NSP] + mov r0, P + bl hipe_handle_stack_trap /* must not cons */ + mov lr, r0 /* original RA */ + mov r0, TEMP_ARG0 /* restore retval */ + mov pc, lr /* resume at original RA */ + +/* + * hipe_arm_inc_stack + * Caller saved its LR in TEMP_LR (== TEMP1) before calling us. + */ + .global hipe_arm_inc_stack +hipe_arm_inc_stack: + STORE_ARG_REGS + mov TEMP_ARG0, lr + str NSP, [P, #P_NSP] + mov r0, P + # hipe_inc_nstack reads and writes NSP and NSP_LIMIT, + # but does not access LR/RA, HP, or FCALLS. + bl hipe_inc_nstack + ldr NSP, [P, #P_NSP] + LOAD_ARG_REGS + # this relies on LOAD_ARG_REGS not clobbering TEMP_ARG0 + mov pc, TEMP_ARG0 + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/erts/emulator/hipe/hipe_arm_glue.h b/erts/emulator/hipe/hipe_arm_glue.h new file mode 100644 index 0000000000..e840c3dc0f --- /dev/null +++ b/erts/emulator/hipe/hipe_arm_glue.h @@ -0,0 +1,32 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2005-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + */ +#ifndef HIPE_ARM_GLUE_H +#define HIPE_ARM_GLUE_H + +#include "hipe_arm_asm.h" /* for NR_ARG_REGS, ARM_LEAF_WORDS */ +#define NR_LEAF_WORDS ARM_LEAF_WORDS +#define HIPE_ARCH_CALL_TO_NATIVE hipe_arm_call_to_native +#define HIPE_ARCH_RETURN_TO_NATIVE hipe_arm_return_to_native +#define HIPE_ARCH_TAILCALL_TO_NATIVE hipe_arm_tailcall_to_native +#define HIPE_ARCH_THROW_TO_NATIVE hipe_arm_throw_to_native +#include "hipe_risc_glue.h" + +#endif /* HIPE_ARM_GLUE_H */ diff --git a/erts/emulator/hipe/hipe_arm_primops.h b/erts/emulator/hipe/hipe_arm_primops.h new file mode 100644 index 0000000000..a28b509eee --- /dev/null +++ b/erts/emulator/hipe/hipe_arm_primops.h @@ -0,0 +1,21 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2005-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + */ +PRIMOP_LIST(am_inc_stack_0, &hipe_arm_inc_stack) diff --git a/erts/emulator/hipe/hipe_bif0.c b/erts/emulator/hipe/hipe_bif0.c new file mode 100644 index 0000000000..032bf2e896 --- /dev/null +++ b/erts/emulator/hipe/hipe_bif0.c @@ -0,0 +1,1945 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2001-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + * hipe_bif0.c + * + * Compiler and linker support. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#include "sys.h" +#include "error.h" +#include "erl_vm.h" +#include "global.h" +#include "erl_process.h" +#include "bif.h" +#include "big.h" +#include "beam_load.h" +#include "erl_db.h" +#include "hash.h" +#include "erl_bits.h" +#include "erl_binary.h" +#ifdef HIPE +#include <stddef.h> /* offsetof() */ +#include "hipe_arch.h" +#include "hipe_stack.h" +#include "hipe_mode_switch.h" +#include "hipe_native_bif.h" +#include "hipe_bif0.h" +/* We need hipe_literals.h for HIPE_SYSTEM_CRC, but it redefines + a few constants. #undef them here to avoid warnings. */ +#undef F_TIMO +#undef THE_NON_VALUE +#undef ERL_FUN_SIZE +#include "hipe_literals.h" +#endif + +#define BeamOpCode(Op) ((Uint)BeamOp(Op)) + +int term_to_Sint32(Eterm term, Sint *sp) +{ + Sint val; + + if (!term_to_Sint(term, &val)) + return 0; + if ((Sint)(Sint32)val != val) + return 0; + *sp = val; + return 1; +} + +static Eterm Uint_to_term(Uint x, Process *p) +{ + if (IS_USMALL(0, x)) { + return make_small(x); + } else { + Eterm *hp = HAlloc(p, BIG_UINT_HEAP_SIZE); + return uint_to_big(x, hp); + } +} + +void *term_to_address(Eterm arg) +{ + Uint u; + return term_to_Uint(arg, &u) ? (void*)u : NULL; +} + +static Eterm address_to_term(const void *address, Process *p) +{ + return Uint_to_term((Uint)address, p); +} + +/* + * BIFs for reading and writing memory. Used internally by HiPE. + */ +#if 0 /* XXX: unused */ +BIF_RETTYPE hipe_bifs_read_u8_1(BIF_ALIST_1) +{ + unsigned char *address = term_to_address(BIF_ARG_1); + if (!address) + BIF_ERROR(BIF_P, BADARG); + BIF_RET(make_small(*address)); +} +#endif + +#if 0 /* XXX: unused */ +BIF_RETTYPE hipe_bifs_read_u32_1(BIF_ALIST_1) +{ + Uint32 *address = term_to_address(BIF_ARG_1); + if (!address || !hipe_word32_address_ok(address)) + BIF_ERROR(BIF_P, BADARG); + BIF_RET(Uint_to_term(*address, BIF_P)); +} +#endif + +BIF_RETTYPE hipe_bifs_write_u8_2(BIF_ALIST_2) +{ + unsigned char *address; + + address = term_to_address(BIF_ARG_1); + if (!address || is_not_small(BIF_ARG_2)) + BIF_ERROR(BIF_P, BADARG); + *address = unsigned_val(BIF_ARG_2); + BIF_RET(NIL); +} + +#if 0 /* XXX: unused */ +BIF_RETTYPE hipe_bifs_write_s32_2(BIF_ALIST_2) +{ + Sint32 *address; + Sint value; + + address = term_to_address(BIF_ARG_1); + if (!address || !hipe_word32_address_ok(address)) + BIF_ERROR(BIF_P, BADARG); + if (!term_to_Sint32(BIF_ARG_2, &value)) + BIF_ERROR(BIF_P, BADARG); + *address = value; + BIF_RET(NIL); +} +#endif + +BIF_RETTYPE hipe_bifs_write_u32_2(BIF_ALIST_2) +{ + Uint32 *address; + Uint value; + + address = term_to_address(BIF_ARG_1); + if (!address || !hipe_word32_address_ok(address)) + BIF_ERROR(BIF_P, BADARG); + if (!term_to_Uint(BIF_ARG_2, &value)) + BIF_ERROR(BIF_P, BADARG); + if ((Uint)(Uint32)value != value) + BIF_ERROR(BIF_P, BADARG); + *address = value; + hipe_flush_icache_word(address); + BIF_RET(NIL); +} + +/* + * BIFs for mutable bytearrays. + */ +BIF_RETTYPE hipe_bifs_bytearray_2(BIF_ALIST_2) +{ + Sint nelts; + Eterm bin; + + if (is_not_small(BIF_ARG_1) || + (nelts = signed_val(BIF_ARG_1)) < 0 || + !is_byte(BIF_ARG_2)) + BIF_ERROR(BIF_P, BADARG); + bin = new_binary(BIF_P, NULL, nelts); + memset(binary_bytes(bin), unsigned_val(BIF_ARG_2), nelts); + BIF_RET(bin); +} + +static inline unsigned char *bytearray_lvalue(Eterm bin, Eterm idx) +{ + Sint i; + unsigned char *bytes; + Uint bitoffs; + Uint bitsize; + + if (is_not_binary(bin) || + is_not_small(idx) || + (i = unsigned_val(idx)) >= binary_size(bin)) + return NULL; + ERTS_GET_BINARY_BYTES(bin, bytes, bitoffs, bitsize); + ASSERT(bitoffs == 0); + ASSERT(bitsize == 0); + return bytes + i; +} + +BIF_RETTYPE hipe_bifs_bytearray_sub_2(BIF_ALIST_2) +{ + unsigned char *bytep; + + bytep = bytearray_lvalue(BIF_ARG_1, BIF_ARG_2); + if (!bytep) + BIF_ERROR(BIF_P, BADARG); + BIF_RET(make_small(*bytep)); +} + +BIF_RETTYPE hipe_bifs_bytearray_update_3(BIF_ALIST_3) +{ + unsigned char *bytep; + + bytep = bytearray_lvalue(BIF_ARG_1, BIF_ARG_2); + if (!bytep || !is_byte(BIF_ARG_3)) + BIF_ERROR(BIF_P, BADARG); + *bytep = unsigned_val(BIF_ARG_3); + BIF_RET(BIF_ARG_1); +} + +BIF_RETTYPE hipe_bifs_bitarray_2(BIF_ALIST_2) +{ + Sint nbits; + Uint nbytes; + Eterm bin; + int bytemask; + + if (is_not_small(BIF_ARG_1)) + BIF_ERROR(BIF_P, BADARG); + nbits = signed_val(BIF_ARG_1); + if (nbits < 0) + BIF_ERROR(BIF_P, BADARG); + if (BIF_ARG_2 == am_false) + bytemask = 0; + else if (BIF_ARG_2 == am_true) + bytemask = ~0; + else + BIF_ERROR(BIF_P, BADARG); + nbytes = ((Uint)nbits + ((1 << 3) - 1)) >> 3; + bin = new_binary(BIF_P, NULL, nbytes); + memset(binary_bytes(bin), bytemask, nbytes); + BIF_RET(bin); +} + +BIF_RETTYPE hipe_bifs_bitarray_update_3(BIF_ALIST_3) +{ + unsigned char *bytes, bytemask; + Uint bitoffs, bitsize; + Uint bitnr, bytenr; + int set; + + if (is_not_binary(BIF_ARG_1)) + BIF_ERROR(BIF_P, BADARG); + if (is_not_small(BIF_ARG_2)) + BIF_ERROR(BIF_P, BADARG); + bitnr = unsigned_val(BIF_ARG_2); + bytenr = bitnr >> 3; + if (bytenr >= binary_size(BIF_ARG_1)) + BIF_ERROR(BIF_P, BADARG); + if (BIF_ARG_3 == am_false) + set = 0; + else if (BIF_ARG_3 == am_true) + set = 1; + else + BIF_ERROR(BIF_P, BADARG); + ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); + ASSERT(bitoffs == 0); + ASSERT(bitsize == 0); + bytemask = 1 << (bitnr & ((1 << 3) - 1)); + if (set) + bytes[bytenr] |= bytemask; + else + bytes[bytenr] &= ~bytemask; + BIF_RET(BIF_ARG_1); +} + +BIF_RETTYPE hipe_bifs_bitarray_sub_2(BIF_ALIST_2) +{ + unsigned char *bytes, bytemask; + Uint bitoffs, bitsize; + Uint bitnr, bytenr; + + if (is_not_binary(BIF_ARG_1)) + BIF_ERROR(BIF_P, BADARG); + if (is_not_small(BIF_ARG_2)) + BIF_ERROR(BIF_P, BADARG); + bitnr = unsigned_val(BIF_ARG_2); + bytenr = bitnr >> 3; + if (bytenr >= binary_size(BIF_ARG_1)) + BIF_ERROR(BIF_P, BADARG); + ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); + ASSERT(bitoffs == 0); + ASSERT(bitsize == 0); + bytemask = 1 << (bitnr & ((1 << 3) - 1)); + if ((bytes[bytenr] & bytemask) == 0) + BIF_RET(am_false); + else + BIF_RET(am_true); +} + +/* + * BIFs for SML-like mutable arrays and reference cells. + * For now, limited to containing immediate data. + */ +#if 1 /* use bignums as carriers, easier on the gc */ +#define make_array_header(sz) make_pos_bignum_header((sz)) +#define array_header_arity(h) header_arity((h)) +#define make_array(hp) make_big((hp)) +#define is_not_array(x) is_not_big((x)) +#define array_val(x) big_val((x)) +#else /* use tuples as carriers, easier debugging, harder on the gc */ +#define make_array_header(sz) make_arityval((sz)) +#define array_header_arity(h) arityval((h)) +#define make_array(hp) make_tuple((hp)) +#define is_not_array(x) is_not_tuple((x)) +#define array_val(x) tuple_val((x)) +#endif +#define array_length(a) array_header_arity(array_val((a))[0]) + +BIF_RETTYPE hipe_bifs_array_2(BIF_ALIST_2) +{ + Eterm *hp; + Sint nelts, i; + + if (is_not_small(BIF_ARG_1) || + (nelts = signed_val(BIF_ARG_1)) < 0 || + is_not_immed(BIF_ARG_2)) + BIF_ERROR(BIF_P, BADARG); + if (nelts == 0) /* bignums must not be empty */ + BIF_RET(make_small(0)); + hp = HAlloc(BIF_P, 1+nelts); + hp[0] = make_array_header(nelts); + for (i = 1; i <= nelts; ++i) + hp[i] = BIF_ARG_2; + BIF_RET(make_array(hp)); +} + +BIF_RETTYPE hipe_bifs_array_length_1(BIF_ALIST_1) +{ + if (is_not_array(BIF_ARG_1)) { + if (BIF_ARG_1 == make_small(0)) /* fixnum 0 represents empty arrays */ + BIF_RET(make_small(0)); + BIF_ERROR(BIF_P, BADARG); + } + BIF_RET(make_small(array_header_arity(array_val(BIF_ARG_1)[0]))); +} + +BIF_RETTYPE hipe_bifs_array_sub_2(BIF_ALIST_2) +{ + Uint i; + + if (is_not_small(BIF_ARG_2) || + is_not_array(BIF_ARG_1) || + (i = unsigned_val(BIF_ARG_2)) >= array_length(BIF_ARG_1)) + BIF_ERROR(BIF_P, BADARG); + BIF_RET(array_val(BIF_ARG_1)[i+1]); +} + +BIF_RETTYPE hipe_bifs_array_update_3(BIF_ALIST_3) +{ + Uint i; + + if (is_not_immed(BIF_ARG_3) || + is_not_small(BIF_ARG_2) || + is_not_array(BIF_ARG_1) || + (i = unsigned_val(BIF_ARG_2)) >= array_length(BIF_ARG_1)) + BIF_ERROR(BIF_P, BADARG); + array_val(BIF_ARG_1)[i+1] = BIF_ARG_3; + BIF_RET(BIF_ARG_1); +} + +BIF_RETTYPE hipe_bifs_ref_1(BIF_ALIST_1) +{ + Eterm *hp; + + if (is_not_immed(BIF_ARG_1)) + BIF_RET(BADARG); + hp = HAlloc(BIF_P, 1+1); + hp[0] = make_array_header(1); + hp[1] = BIF_ARG_1; + BIF_RET(make_array(hp)); +} + +BIF_RETTYPE hipe_bifs_ref_get_1(BIF_ALIST_1) +{ + if (is_not_array(BIF_ARG_1) || + array_val(BIF_ARG_1)[0] != make_array_header(1)) + BIF_ERROR(BIF_P, BADARG); + BIF_RET(array_val(BIF_ARG_1)[1]); +} + +BIF_RETTYPE hipe_bifs_ref_set_2(BIF_ALIST_2) +{ + if (is_not_immed(BIF_ARG_2) || + is_not_array(BIF_ARG_1) || + array_val(BIF_ARG_1)[0] != make_array_header(1)) + BIF_ERROR(BIF_P, BADARG); + array_val(BIF_ARG_1)[1] = BIF_ARG_2; + BIF_RET(BIF_ARG_1); +} + +/* + * Allocate memory and copy machine code to it. + */ +BIF_RETTYPE hipe_bifs_enter_code_2(BIF_ALIST_2) +{ + Uint nrbytes; + void *bytes; + void *address; + Uint bitoffs; + Uint bitsize; + Eterm trampolines; + Eterm *hp; + + if (is_not_binary(BIF_ARG_1)) + BIF_ERROR(BIF_P, BADARG); + nrbytes = binary_size(BIF_ARG_1); + ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); + ASSERT(bitoffs == 0); + ASSERT(bitsize == 0); + trampolines = NIL; +#ifdef HIPE_ALLOC_CODE + address = HIPE_ALLOC_CODE(nrbytes, BIF_ARG_2, &trampolines, BIF_P); + if (!address) + BIF_ERROR(BIF_P, BADARG); +#else + if (is_not_nil(BIF_ARG_2)) + BIF_ERROR(BIF_P, BADARG); + address = erts_alloc(ERTS_ALC_T_HIPE, nrbytes); +#endif + memcpy(address, bytes, nrbytes); + hipe_flush_icache_range(address, nrbytes); + hp = HAlloc(BIF_P, 3); + hp[0] = make_arityval(2); + hp[1] = address_to_term(address, BIF_P); + hp[2] = trampolines; + BIF_RET(make_tuple(hp)); +} + +/* + * Allocate memory for arbitrary non-Erlang data. + */ +BIF_RETTYPE hipe_bifs_alloc_data_2(BIF_ALIST_2) +{ + Uint align, nrbytes; + void *block; + + if (is_not_small(BIF_ARG_1) || is_not_small(BIF_ARG_2) || + (align = unsigned_val(BIF_ARG_1), + align != sizeof(long) && align != sizeof(double))) + BIF_ERROR(BIF_P, BADARG); + nrbytes = unsigned_val(BIF_ARG_2); + block = erts_alloc(ERTS_ALC_T_HIPE, nrbytes); + if ((unsigned long)block & (align-1)) + fprintf(stderr, "Yikes! erts_alloc() returned misaligned address %p\r\n", block); + BIF_RET(address_to_term(block, BIF_P)); +} + +/* + * Memory area for constant Erlang terms. + * + * These constants must not be forwarded by the gc. + * Therefore, the gc needs to be able to distinguish between + * collectible objects and constants. Unfortunately, an Erlang + * process' collectible objects are scattered around in two + * heaps and a list of message buffers, so testing "is X a + * collectible object?" can be expensive. + * + * Instead, constants are placed in a single contiguous area, + * which allows for an inexpensive "is X a constant?" test. + * + * XXX: Allow this area to be grown. + */ + +/* not static, needed by garbage collector */ +Eterm *hipe_constants_start = NULL; +Eterm *hipe_constants_next = NULL; +static unsigned constants_avail_words = 0; +#define CONSTANTS_BYTES (1536*1024*sizeof(Eterm)) /* 1.5 M words */ + +static Eterm *constants_alloc(unsigned nwords) +{ + Eterm *next; + + /* initialise at the first call */ + if ((next = hipe_constants_next) == NULL) { + next = (Eterm*)erts_alloc(ERTS_ALC_T_HIPE, CONSTANTS_BYTES); + hipe_constants_start = next; + hipe_constants_next = next; + constants_avail_words = CONSTANTS_BYTES / sizeof(Eterm); + } + if (nwords > constants_avail_words) { + fprintf(stderr, "Native code constants pool depleted!\r\n"); + /* Must terminate immediately. erl_exit() seems to + continue running some code which then SIGSEGVs. */ + exit(1); + } + constants_avail_words -= nwords; + hipe_constants_next = next + nwords; + return next; +} + +BIF_RETTYPE hipe_bifs_constants_size_0(BIF_ALIST_0) +{ + BIF_RET(make_small(hipe_constants_next - hipe_constants_start)); +} + +/* + * Merging constant Erlang terms. + * Uses the constants pool and a hash table of all top-level + * terms merged so far. (Sub-terms are not merged.) + */ +struct const_term { + HashBucket bucket; + Eterm val; /* tagged pointer to mem[0] */ + Eterm mem[1]; /* variable size */ +}; + +static Hash const_term_table; +static ErlOffHeap const_term_table_off_heap; + +static HashValue const_term_hash(void *tmpl) +{ + return make_hash2((Eterm)tmpl); +} + +static int const_term_cmp(void *tmpl, void *bucket) +{ + return !eq((Eterm)tmpl, ((struct const_term*)bucket)->val); +} + +static void *const_term_alloc(void *tmpl) +{ + Eterm obj; + Uint size; + Eterm *hp; + struct const_term *p; + + obj = (Eterm)tmpl; + ASSERT(is_not_immed(obj)); + size = size_object(obj); + + p = (struct const_term*)constants_alloc(size + (offsetof(struct const_term, mem)/sizeof(Eterm))); + + /* I have absolutely no idea if having a private 'off_heap' + works or not. _Some_ off_heap object is required for + REFC_BINARY and FUN values, but _where_ it should be is + a complete mystery to me. */ + hp = &p->mem[0]; + p->val = copy_struct(obj, size, &hp, &const_term_table_off_heap); + + return &p->bucket; +} + +static void init_const_term_table(void) +{ + HashFunctions f; + f.hash = (H_FUN) const_term_hash; + f.cmp = (HCMP_FUN) const_term_cmp; + f.alloc = (HALLOC_FUN) const_term_alloc; + f.free = (HFREE_FUN) NULL; + hash_init(ERTS_ALC_T_HIPE, &const_term_table, "const_term_table", 97, f); +} + +BIF_RETTYPE hipe_bifs_merge_term_1(BIF_ALIST_1) +{ + static int init_done = 0; + struct const_term *p; + Eterm val; + + val = BIF_ARG_1; + if (is_not_immed(val)) { + if (!init_done) { + init_const_term_table(); + init_done = 1; + } + p = (struct const_term*)hash_put(&const_term_table, (void*)val); + val = p->val; + } + BIF_RET(val); +} + +struct mfa { + Eterm mod; + Eterm fun; + Uint ari; +}; + +static int term_to_mfa(Eterm term, struct mfa *mfa) +{ + Eterm mod, fun, a; + Uint ari; + + if (is_not_tuple(term)) + return 0; + if (tuple_val(term)[0] != make_arityval(3)) + return 0; + mod = tuple_val(term)[1]; + if (is_not_atom(mod)) + return 0; + mfa->mod = mod; + fun = tuple_val(term)[2]; + if (is_not_atom(fun)) + return 0; + mfa->fun = fun; + a = tuple_val(term)[3]; + if (is_not_small(a)) + return 0; + ari = unsigned_val(a); + if (ari > 255) + return 0; + mfa->ari = ari; + return 1; +} + +#ifdef DEBUG_LINKER +static void print_mfa(Eterm mod, Eterm fun, unsigned int ari) +{ + erts_printf("%T:%T/%u", mod, fun, ari); +} +#endif + +/* + * Convert {M,F,A} to pointer to first insn after initial func_info. + */ +static Uint *hipe_find_emu_address(Eterm mod, Eterm name, unsigned int arity) +{ + Module *modp; + Uint *code_base; + int i, n; + + modp = erts_get_module(mod); + if (modp == NULL || (code_base = modp->code) == NULL) + return NULL; + n = code_base[MI_NUM_FUNCTIONS]; + for (i = 0; i < n; ++i) { + Uint *code_ptr = (Uint*)code_base[MI_FUNCTIONS+i]; + ASSERT(code_ptr[0] == BeamOpCode(op_i_func_info_IaaI)); + if (code_ptr[3] == name && code_ptr[4] == arity) + return code_ptr+5; + } + return NULL; +} + +Uint *hipe_bifs_find_pc_from_mfa(Eterm term) +{ + struct mfa mfa; + + if (!term_to_mfa(term, &mfa)) + return NULL; + return hipe_find_emu_address(mfa.mod, mfa.fun, mfa.ari); +} + +BIF_RETTYPE hipe_bifs_fun_to_address_1(BIF_ALIST_1) +{ + Eterm *pc = hipe_bifs_find_pc_from_mfa(BIF_ARG_1); + if (!pc) + BIF_ERROR(BIF_P, BADARG); + BIF_RET(address_to_term(pc, BIF_P)); +} + +static void *hipe_get_emu_address(Eterm m, Eterm f, unsigned int arity, int is_remote) +{ + void *address = NULL; + if (!is_remote) + address = hipe_find_emu_address(m, f, arity); + if (!address) { + /* if not found, stub it via the export entry */ + Export *export_entry = erts_export_get_or_make_stub(m, f, arity); + address = export_entry->address; + } + return address; +} + +#if 0 /* XXX: unused */ +BIF_RETTYPE hipe_bifs_get_emu_address_1(BIF_ALIST_1) +{ + struct mfa mfa; + void *address; + + if (!term_to_mfa(BIF_ARG_1, &mfa)) + BIF_ERROR(BIF_P, BADARG); + address = hipe_get_emu_address(mfa.mod, mfa.fun, mfa.ari); + BIF_RET(address_to_term(address, BIF_P)); +} +#endif + +BIF_RETTYPE hipe_bifs_set_native_address_3(BIF_ALIST_3) +{ + Eterm *pc; + void *address; + int is_closure; + struct mfa mfa; + + switch (BIF_ARG_3) { + case am_false: + is_closure = 0; + break; + case am_true: + is_closure = 1; + break; + default: + BIF_ERROR(BIF_P, BADARG); + } + address = term_to_address(BIF_ARG_2); + if (!address) + BIF_ERROR(BIF_P, BADARG); + + /* The mfa is needed again later, otherwise we could + simply have called hipe_bifs_find_pc_from_mfa(). */ + if (!term_to_mfa(BIF_ARG_1, &mfa)) + BIF_ERROR(BIF_P, BADARG); + pc = hipe_find_emu_address(mfa.mod, mfa.fun, mfa.ari); + + if (pc) { + hipe_mfa_save_orig_beam_op(mfa.mod, mfa.fun, mfa.ari, pc); +#if HIPE +#ifdef DEBUG_LINKER + printf("%s: ", __FUNCTION__); + print_mfa(mfa.mod, mfa.fun, mfa.ari); + printf(": planting call trap to %p at BEAM pc %p\r\n", address, pc); +#endif + hipe_set_call_trap(pc, address, is_closure); + BIF_RET(am_true); +#endif + } +#ifdef DEBUG_LINKER + printf("%s: ", __FUNCTION__); + print_mfa(mfa.mod, mfa.fun, mfa.ari); + printf(": no BEAM pc found\r\n"); +#endif + BIF_RET(am_false); +} + +#if 0 /* XXX: unused */ +/* + * hipe_bifs_address_to_fun(Address) + * - Address is the address of the start of a emu function's code + * - returns {Module, Function, Arity} + */ +BIF_RETTYPE hipe_bifs_address_to_fun_1(BIF_ALIST_1) +{ + Eterm *pc; + Eterm *funcinfo; + Eterm *hp; + + pc = term_to_address(BIF_ARG_1); + if (!pc) + BIF_ERROR(BIF_P, BADARG); + funcinfo = find_function_from_pc(pc); + if (!funcinfo) + BIF_RET(am_false); + hp = HAlloc(BIF_P, 4); + hp[0] = make_arityval(3); + hp[1] = funcinfo[0]; + hp[2] = funcinfo[1]; + hp[3] = make_small(funcinfo[2]); + BIF_RET(make_tuple(hp)); +} +#endif + +BIF_RETTYPE hipe_bifs_enter_sdesc_1(BIF_ALIST_1) +{ + struct sdesc *sdesc; + + sdesc = hipe_decode_sdesc(BIF_ARG_1); + if (!sdesc) { + fprintf(stderr, "%s: bad sdesc!\r\n", __FUNCTION__); + BIF_ERROR(BIF_P, BADARG); + } + if (hipe_put_sdesc(sdesc) != sdesc) { + fprintf(stderr, "%s: duplicate entry!\r\n", __FUNCTION__); + BIF_ERROR(BIF_P, BADARG); + } + BIF_RET(NIL); +} + +/* + * Hash table mapping {M,F,A} to nbif address. + */ +struct nbif { + HashBucket bucket; + Eterm mod; + Eterm fun; + unsigned arity; + const void *address; +}; + +static struct nbif nbifs[BIF_SIZE] = { +#define BIF_LIST(MOD,FUN,ARY,CFUN,IX) \ + { {0,0}, MOD, FUN, ARY, &nbif_##CFUN }, +#include "erl_bif_list.h" +#undef BIF_LIST +}; + +#define NBIF_HASH(m,f,a) ((m)*(f)+(a)) +static Hash nbif_table; + +static HashValue nbif_hash(struct nbif *x) +{ + return NBIF_HASH(x->mod, x->fun, x->arity); +} + +static int nbif_cmp(struct nbif *x, struct nbif *y) +{ + return !(x->mod == y->mod && x->fun == y->fun && x->arity == y->arity); +} + +static struct nbif *nbif_alloc(struct nbif *x) +{ + return x; /* pre-allocated */ +} + +static void init_nbif_table(void) +{ + HashFunctions f; + int i; + + f.hash = (H_FUN) nbif_hash; + f.cmp = (HCMP_FUN) nbif_cmp; + f.alloc = (HALLOC_FUN) nbif_alloc; + f.free = NULL; + + hash_init(ERTS_ALC_T_NBIF_TABLE, &nbif_table, "nbif_table", 500, f); + + for (i = 0; i < BIF_SIZE; ++i) + hash_put(&nbif_table, &nbifs[i]); +} + +static const void *nbif_address(Eterm mod, Eterm fun, unsigned arity) +{ + struct nbif tmpl; + struct nbif *nbif; + + tmpl.mod = mod; + tmpl.fun = fun; + tmpl.arity = arity; + + nbif = hash_get(&nbif_table, &tmpl); + return nbif ? nbif->address : NULL; +} + +/* + * hipe_bifs_bif_address(M,F,A) -> address or false + */ +BIF_RETTYPE hipe_bifs_bif_address_3(BIF_ALIST_3) +{ + const void *address; + static int init_done = 0; + + if (!init_done) { + init_nbif_table(); + init_done = 1; + } + + if (is_not_atom(BIF_ARG_1) || + is_not_atom(BIF_ARG_2) || + is_not_small(BIF_ARG_3) || + signed_val(BIF_ARG_3) < 0) + BIF_RET(am_false); + + address = nbif_address(BIF_ARG_1, BIF_ARG_2, unsigned_val(BIF_ARG_3)); + if (address) + BIF_RET(address_to_term(address, BIF_P)); + BIF_RET(am_false); +} + +/* + * Hash table mapping primops to their addresses. + */ +struct primop { + HashBucket bucket; /* bucket.hvalue == atom_val(name) */ + const void *address; +#if defined(__arm__) + void *trampoline; +#endif +}; + +static struct primop primops[] = { +#define PRIMOP_LIST(ATOM,ADDRESS) { {0,_unchecked_atom_val(ATOM)}, ADDRESS }, +#include "hipe_primops.h" +#undef PRIMOP_LIST +}; + +static Hash primop_table; + +static HashValue primop_hash(void *tmpl) +{ + return ((struct primop*)tmpl)->bucket.hvalue; /* pre-initialised */ +} + +static int primop_cmp(void *tmpl, void *bucket) +{ + return 0; /* hvalue matched so nothing further to do */ +} + +static void *primop_alloc(void *tmpl) +{ + return tmpl; /* pre-allocated */ +} + +static void init_primop_table(void) +{ + HashFunctions f; + int i; + static int init_done = 0; + + if (init_done) + return; + init_done = 1; + + f.hash = (H_FUN) primop_hash; + f.cmp = (HCMP_FUN) primop_cmp; + f.alloc = (HALLOC_FUN) primop_alloc; + f.free = NULL; + + hash_init(ERTS_ALC_T_HIPE, &primop_table, "primop_table", 50, f); + + for (i = 0; i < sizeof(primops)/sizeof(primops[0]); ++i) + hash_put(&primop_table, &primops[i]); +} + +static struct primop *primop_table_get(Eterm name) +{ + struct primop tmpl; + + init_primop_table(); + tmpl.bucket.hvalue = atom_val(name); + return hash_get(&primop_table, &tmpl); +} + +#if defined(__arm__) +static struct primop *primop_table_put(Eterm name) +{ + struct primop tmpl; + + init_primop_table(); + tmpl.bucket.hvalue = atom_val(name); + return hash_put(&primop_table, &tmpl); +} + +void *hipe_primop_get_trampoline(Eterm name) +{ + struct primop *primop = primop_table_get(name); + return primop ? primop->trampoline : NULL; +} + +void hipe_primop_set_trampoline(Eterm name, void *trampoline) +{ + struct primop *primop = primop_table_put(name); + primop->trampoline = trampoline; +} +#endif + +/* + * hipe_bifs_primop_address(Atom) -> address or false + */ +BIF_RETTYPE hipe_bifs_primop_address_1(BIF_ALIST_1) +{ + const struct primop *primop; + + if (is_not_atom(BIF_ARG_1)) + BIF_RET(am_false); + primop = primop_table_get(BIF_ARG_1); + if (!primop) + BIF_RET(am_false); + BIF_RET(address_to_term(primop->address, BIF_P)); +} + +#if 0 /* XXX: unused */ +/* + * hipe_bifs_gbif_address(F,A) -> address or false + */ +#define GBIF_LIST(ATOM,ARY,CFUN) extern Eterm gbif_##CFUN(void); +#include "hipe_gbif_list.h" +#undef GBIF_LIST + +BIF_RETTYPE hipe_bifs_gbif_address_2(BIF_ALIST_2) +{ + Uint arity; + void *address; + + if (is_not_atom(BIF_ARG_1) || is_not_small(BIF_ARG_2)) + BIF_RET(am_false); /* error or false, does it matter? */ + arity = signed_val(BIF_ARG_2); + /* XXX: replace with a hash table later */ + do { /* trick to let us use 'break' instead of 'goto' */ +#define GBIF_LIST(ATOM,ARY,CFUN) if (BIF_ARG_1 == ATOM && arity == ARY) { address = CFUN; break; } +#include "hipe_gbif_list.h" +#undef GBIF_LIST + printf("\r\n%s: guard BIF ", __FUNCTION__); + fflush(stdout); + erts_printf("%T", BIF_ARG_1); + printf("/%lu isn't listed in hipe_gbif_list.h\r\n", arity); + BIF_RET(am_false); + } while (0); + BIF_RET(address_to_term(address, BIF_P)); +} +#endif + +BIF_RETTYPE hipe_bifs_atom_to_word_1(BIF_ALIST_1) +{ + if (is_not_atom(BIF_ARG_1)) + BIF_ERROR(BIF_P, BADARG); + BIF_RET(Uint_to_term(BIF_ARG_1, BIF_P)); +} + +BIF_RETTYPE hipe_bifs_term_to_word_1(BIF_ALIST_1) +{ + BIF_RET(Uint_to_term(BIF_ARG_1, BIF_P)); +} + +/* XXX: this is really a primop, not a BIF */ +BIF_RETTYPE hipe_conv_big_to_float(BIF_ALIST_1) +{ + Eterm res; + Eterm *hp; + FloatDef f; + + if (is_not_big(BIF_ARG_1)) + BIF_ERROR(BIF_P, BADARG); + if (big_to_double(BIF_ARG_1, &f.fd) < 0) + BIF_ERROR(BIF_P, BADARG); + hp = HAlloc(BIF_P, FLOAT_SIZE_OBJECT); + res = make_float(hp); + PUT_DOUBLE(f, hp); + BIF_RET(res); +} + +#if 0 /* XXX: unused */ +/* + * At least parts of this should be inlined in native code. + * The rest could be made a primop used by both the emulator and + * native code... + */ +BIF_RETTYPE hipe_bifs_make_fun_3(BIF_ALIST_3) +{ + Eterm free_vars; + Eterm mod; + Eterm *tp; + Uint index; + Uint uniq; + Uint num_free; + Eterm tmp_var; + Uint *tmp_ptr; + unsigned needed; + ErlFunThing *funp; + Eterm *hp; + int i; + + if (is_not_list(BIF_ARG_1) && is_not_nil(BIF_ARG_1)) + BIF_ERROR(BIF_P, BADARG); + free_vars = BIF_ARG_1; + + if (is_not_atom(BIF_ARG_2)) + BIF_ERROR(BIF_P, BADARG); + mod = BIF_ARG_2; + + if (is_not_tuple(BIF_ARG_3) || + (arityval(*tuple_val(BIF_ARG_3)) != 3)) + BIF_ERROR(BIF_P, BADARG); + tp = tuple_val(BIF_ARG_3); + + if (term_to_Uint(tp[1], &index) == 0) + BIF_ERROR(BIF_P, BADARG); + if (term_to_Uint(tp[2], &uniq) == 0) + BIF_ERROR(BIF_P, BADARG); + if (term_to_Uint(tp[3], &num_free) == 0) + BIF_ERROR(BIF_P, BADARG); + + needed = ERL_FUN_SIZE + num_free; + funp = (ErlFunThing *) HAlloc(BIF_P, needed); + hp = funp->env; + + funp->thing_word = HEADER_FUN; + + /* Need a ErlFunEntry *fe + * fe->refc++; + * funp->fe = fe; + */ + + funp->num_free = num_free; + funp->creator = BIF_P->id; + for (i = 0; i < num_free; i++) { + if (is_nil(free_vars)) + BIF_ERROR(BIF_P, BADARG); + tmp_ptr = list_val(free_vars); + tmp_var = CAR(tmp_ptr); + free_vars = CDR(tmp_ptr); + *hp++ = tmp_var; + } + if (is_not_nil(free_vars)) + BIF_ERROR(BIF_P, BADARG); + +#ifndef HYBRID /* FIND ME! */ + funp->next = MSO(BIF_P).funs; + MSO(BIF_P).funs = funp; +#endif + + BIF_RET(make_fun(funp)); +} +#endif + +/* + * args: Nativecodeaddress, Module, {Uniq, Index, BeamAddress} + */ +BIF_RETTYPE hipe_bifs_make_fe_3(BIF_ALIST_3) +{ + Eterm mod; + Uint index; + Uint uniq; + void *beam_address; + ErlFunEntry *fe; + Eterm *tp; + void *native_address; + + native_address = term_to_address(BIF_ARG_1); + if (!native_address) + BIF_ERROR(BIF_P, BADARG); + + if (is_not_atom(BIF_ARG_2)) + BIF_ERROR(BIF_P, BADARG); + mod = BIF_ARG_2; + + if (is_not_tuple(BIF_ARG_3) || + (arityval(*tuple_val(BIF_ARG_3)) != 3)) + BIF_ERROR(BIF_P, BADARG); + tp = tuple_val(BIF_ARG_3); + if (term_to_Uint(tp[1], &uniq) == 0) + BIF_ERROR(BIF_P, BADARG); + if (term_to_Uint(tp[2], &index) == 0) + BIF_ERROR(BIF_P, BADARG); + + beam_address = term_to_address(tp[3]); + if (!beam_address) + BIF_ERROR(BIF_P, BADARG); + + fe = erts_get_fun_entry(mod, uniq, index); + if (fe == NULL) { + int i = atom_val(mod); + char atom_buf[256]; + + atom_buf[0] = '\0'; + strncat(atom_buf, (char*)atom_tab(i)->name, atom_tab(i)->len); + printf("no fun entry for %s %ld:%ld\n", atom_buf, uniq, index); + BIF_ERROR(BIF_P, BADARG); + } + fe->native_address = native_address; + if (erts_refc_dectest(&fe->refc, 0) == 0) + erts_erase_fun_entry(fe); + BIF_RET(address_to_term((void *)fe, BIF_P)); +} + +#if 0 /* XXX: unused */ +BIF_RETTYPE hipe_bifs_make_native_stub_2(BIF_ALIST_2) +{ + void *beamAddress; + Uint beamArity; + void *stubAddress; + + if ((beamAddress = term_to_address(BIF_ARG_1)) == 0 || + is_not_small(BIF_ARG_2) || + (beamArity = unsigned_val(BIF_ARG_2)) >= 256) + BIF_ERROR(BIF_P, BADARG); + stubAddress = hipe_make_native_stub(beamAddress, beamArity); + BIF_RET(address_to_term(stubAddress, BIF_P)); +} +#endif + +/* + * MFA info hash table: + * - maps MFA to native code entry point + * - the MFAs it calls (refers_to) + * - the references to it (referred_from) + * - maps MFA to most recent trampoline [if powerpc or arm] + */ +struct hipe_mfa_info { + struct { + unsigned long hvalue; + struct hipe_mfa_info *next; + } bucket; + Eterm m; /* atom */ + Eterm f; /* atom */ + unsigned int a; + void *remote_address; + void *local_address; + Eterm *beam_code; + Uint orig_beam_op; + struct hipe_mfa_info_list *refers_to; + struct ref *referred_from; +#if defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__) || defined(__arm__) + void *trampoline; +#endif +}; + +static struct { + unsigned int log2size; + unsigned int mask; /* INV: mask == (1 << log2size)-1 */ + unsigned int used; + struct hipe_mfa_info **bucket; +} hipe_mfa_info_table; + +#define HIPE_MFA_HASH(M,F,A) ((M) * (F) + (A)) + +static struct hipe_mfa_info **hipe_mfa_info_table_alloc_bucket(unsigned int size) +{ + unsigned long nbytes = size * sizeof(struct hipe_mfa_info*); + struct hipe_mfa_info **bucket = erts_alloc(ERTS_ALC_T_HIPE, nbytes); + sys_memzero(bucket, nbytes); + return bucket; +} + +static void hipe_mfa_info_table_grow(void) +{ + unsigned int old_size, new_size, new_mask; + struct hipe_mfa_info **old_bucket, **new_bucket; + unsigned int i; + + old_size = 1 << hipe_mfa_info_table.log2size; + hipe_mfa_info_table.log2size += 1; + new_size = 1 << hipe_mfa_info_table.log2size; + new_mask = new_size - 1; + hipe_mfa_info_table.mask = new_mask; + old_bucket = hipe_mfa_info_table.bucket; + new_bucket = hipe_mfa_info_table_alloc_bucket(new_size); + hipe_mfa_info_table.bucket = new_bucket; + for (i = 0; i < old_size; ++i) { + struct hipe_mfa_info *b = old_bucket[i]; + while (b != NULL) { + struct hipe_mfa_info *next = b->bucket.next; + unsigned int j = b->bucket.hvalue & new_mask; + b->bucket.next = new_bucket[j]; + new_bucket[j] = b; + b = next; + } + } + erts_free(ERTS_ALC_T_HIPE, old_bucket); +} + +static struct hipe_mfa_info *hipe_mfa_info_table_alloc(Eterm m, Eterm f, unsigned int arity) +{ + struct hipe_mfa_info *res; + + res = (struct hipe_mfa_info*)erts_alloc(ERTS_ALC_T_HIPE, sizeof(*res)); + res->m = m; + res->f = f; + res->a = arity; + res->remote_address = NULL; + res->local_address = NULL; + res->beam_code = NULL; + res->orig_beam_op = 0; + res->refers_to = NULL; + res->referred_from = NULL; +#if defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__) || defined(__arm__) + res->trampoline = NULL; +#endif + + return res; +} + +void hipe_mfa_info_table_init(void) +{ + unsigned int log2size, size; + + log2size = 10; + size = 1 << log2size; + hipe_mfa_info_table.log2size = log2size; + hipe_mfa_info_table.mask = size - 1; + hipe_mfa_info_table.used = 0; + hipe_mfa_info_table.bucket = hipe_mfa_info_table_alloc_bucket(size); +} + +static inline struct hipe_mfa_info *hipe_mfa_info_table_get(Eterm m, Eterm f, unsigned int arity) +{ + unsigned long h; + unsigned int i; + struct hipe_mfa_info *p; + + h = HIPE_MFA_HASH(m, f, arity); + i = h & hipe_mfa_info_table.mask; + p = hipe_mfa_info_table.bucket[i]; + for (; p; p = p->bucket.next) + /* XXX: do we want to compare p->bucket.hvalue as well? */ + if (p->m == m && p->f == f && p->a == arity) + return p; + return NULL; +} + +#if 0 /* XXX: unused */ +void *hipe_mfa_find_na(Eterm m, Eterm f, unsigned int arity) +{ + const struct hipe_mfa_info *p; + + p = hipe_mfa_info_table_get(m, f, arity); + return p ? p->address : NULL; +} +#endif + +static struct hipe_mfa_info *hipe_mfa_info_table_put(Eterm m, Eterm f, unsigned int arity) +{ + unsigned long h; + unsigned int i; + struct hipe_mfa_info *p; + unsigned int size; + + h = HIPE_MFA_HASH(m, f, arity); + i = h & hipe_mfa_info_table.mask; + p = hipe_mfa_info_table.bucket[i]; + for (; p; p = p->bucket.next) + /* XXX: do we want to compare p->bucket.hvalue as well? */ + if (p->m == m && p->f == f && p->a == arity) + return p; + p = hipe_mfa_info_table_alloc(m, f, arity); + p->bucket.hvalue = h; + p->bucket.next = hipe_mfa_info_table.bucket[i]; + hipe_mfa_info_table.bucket[i] = p; + hipe_mfa_info_table.used += 1; + size = 1 << hipe_mfa_info_table.log2size; + if (hipe_mfa_info_table.used > (4*size/5)) /* rehash at 80% */ + hipe_mfa_info_table_grow(); + return p; +} + +static void hipe_mfa_set_na(Eterm m, Eterm f, unsigned int arity, void *address, int is_exported) +{ + struct hipe_mfa_info *p = hipe_mfa_info_table_put(m, f, arity); +#ifdef DEBUG_LINKER + printf("%s: ", __FUNCTION__); + print_mfa(m, f, arity); + printf(": changing address from %p to %p\r\n", p->local_address, address); +#endif + p->local_address = address; + if (is_exported) + p->remote_address = address; +} + +#if defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__) || defined(__arm__) +void *hipe_mfa_get_trampoline(Eterm m, Eterm f, unsigned int arity) +{ + struct hipe_mfa_info *p = hipe_mfa_info_table_put(m, f, arity); + return p->trampoline; +} + +void hipe_mfa_set_trampoline(Eterm m, Eterm f, unsigned int arity, void *trampoline) +{ + struct hipe_mfa_info *p = hipe_mfa_info_table_put(m, f, arity); + p->trampoline = trampoline; +} +#endif + +BIF_RETTYPE hipe_bifs_set_funinfo_native_address_3(BIF_ALIST_3) +{ + struct mfa mfa; + void *address; + int is_exported; + + if (!term_to_mfa(BIF_ARG_1, &mfa)) + BIF_ERROR(BIF_P, BADARG); + address = term_to_address(BIF_ARG_2); + if (!address) + BIF_ERROR(BIF_P, BADARG); + if (BIF_ARG_3 == am_true) + is_exported = 1; + else if (BIF_ARG_3 == am_false) + is_exported = 0; + else + BIF_ERROR(BIF_P, BADARG); + hipe_mfa_set_na(mfa.mod, mfa.fun, mfa.ari, address, is_exported); + BIF_RET(NIL); +} + +BIF_RETTYPE hipe_bifs_invalidate_funinfo_native_addresses_1(BIF_ALIST_1) +{ + Eterm lst; + struct mfa mfa; + struct hipe_mfa_info *p; + + lst = BIF_ARG_1; + while (is_list(lst)) { + if (!term_to_mfa(CAR(list_val(lst)), &mfa)) + BIF_ERROR(BIF_P, BADARG); + lst = CDR(list_val(lst)); + p = hipe_mfa_info_table_get(mfa.mod, mfa.fun, mfa.ari); + if (p) { + p->remote_address = NULL; + p->local_address = NULL; + if (p->beam_code) { +#ifdef DEBUG_LINKER + printf("%s: ", __FUNCTION__); + print_mfa(mfa.mod, mfa.fun, mfa.ari); + printf(": removing call trap from BEAM pc %p (new op %#lx)\r\n", + p->beam_code, p->orig_beam_op); +#endif + p->beam_code[0] = p->orig_beam_op; + p->beam_code = NULL; + p->orig_beam_op = 0; + } else { +#ifdef DEBUG_LINKER + printf("%s: ", __FUNCTION__); + print_mfa(mfa.mod, mfa.fun, mfa.ari); + printf(": no call trap to remove\r\n"); +#endif + } + } + } + if (is_not_nil(lst)) + BIF_ERROR(BIF_P, BADARG); + BIF_RET(NIL); +} + +void hipe_mfa_save_orig_beam_op(Eterm mod, Eterm fun, unsigned int ari, Eterm *pc) +{ + Uint orig_beam_op; + struct hipe_mfa_info *p; + + orig_beam_op = pc[0]; + if (orig_beam_op != BeamOpCode(op_hipe_trap_call_closure) && + orig_beam_op != BeamOpCode(op_hipe_trap_call)) { + p = hipe_mfa_info_table_put(mod, fun, ari); +#ifdef DEBUG_LINKER + printf("%s: ", __FUNCTION__); + print_mfa(mod, fun, ari); + printf(": saving orig op %#lx from BEAM pc %p\r\n", orig_beam_op, pc); +#endif + p->beam_code = pc; + p->orig_beam_op = orig_beam_op; + } else { +#ifdef DEBUG_LINKER + printf("%s: ", __FUNCTION__); + print_mfa(mod, fun, ari); + printf(": orig op %#lx already saved\r\n", orig_beam_op); +#endif + } +} + +static void *hipe_make_stub(Eterm m, Eterm f, unsigned int arity, int is_remote) +{ + void *BEAMAddress; + void *StubAddress; + +#if 0 + if (is_not_atom(m) || is_not_atom(f) || arity > 255) + return NULL; +#endif + BEAMAddress = hipe_get_emu_address(m, f, arity, is_remote); + StubAddress = hipe_make_native_stub(BEAMAddress, arity); +#if 0 + hipe_mfa_set_na(m, f, arity, StubAddress); +#endif + return StubAddress; +} + +static void *hipe_get_na_nofail(Eterm m, Eterm f, unsigned int a, int is_remote) +{ + struct hipe_mfa_info *p; + void *address; + + p = hipe_mfa_info_table_get(m, f, a); + if (p) { + /* find address, predicting for a runtime apply call */ + address = p->remote_address; + if (!is_remote) + address = p->local_address; + if (address) + return address; + + /* bummer, install stub, checking if one already existed */ + address = p->remote_address; + if (address) + return address; + } else + p = hipe_mfa_info_table_put(m, f, a); + address = hipe_make_stub(m, f, a, is_remote); + /* XXX: how to tell if a BEAM MFA is exported or not? */ + p->remote_address = address; + return address; +} + +/* used for apply/3 in hipe_mode_switch */ +void *hipe_get_remote_na(Eterm m, Eterm f, unsigned int a) +{ + if (is_not_atom(m) || is_not_atom(f) || a > 255) + return NULL; + return hipe_get_na_nofail(m, f, a, 1); +} + +/* primop, but called like a BIF for error handling purposes */ +BIF_RETTYPE hipe_find_na_or_make_stub(BIF_ALIST_3) +{ + Uint arity; + void *address; + + if (is_not_atom(BIF_ARG_1) || is_not_atom(BIF_ARG_2)) + BIF_ERROR(BIF_P, BADARG); + arity = unsigned_val(BIF_ARG_3); /* no error check */ + address = hipe_get_na_nofail(BIF_ARG_1, BIF_ARG_2, arity, 1); + BIF_RET((Eterm)address); /* semi-Ok */ +} + +BIF_RETTYPE hipe_bifs_find_na_or_make_stub_2(BIF_ALIST_2) +{ + struct mfa mfa; + void *address; + int is_remote; + + if (!term_to_mfa(BIF_ARG_1, &mfa)) + BIF_ERROR(BIF_P, BADARG); + if (BIF_ARG_2 == am_true) + is_remote = 1; + else if (BIF_ARG_2 == am_false) + is_remote = 0; + else + BIF_ERROR(BIF_P, BADARG); + address = hipe_get_na_nofail(mfa.mod, mfa.fun, mfa.ari, is_remote); + BIF_RET(address_to_term(address, BIF_P)); +} + +/* primop, but called like a BIF for error handling purposes */ +BIF_RETTYPE hipe_nonclosure_address(BIF_ALIST_2) +{ + Eterm hdr, m, f; + void *address; + + if (!is_boxed(BIF_ARG_1)) + goto badfun; + hdr = *boxed_val(BIF_ARG_1); + if (is_export_header(hdr)) { + Export *ep = (Export*)(export_val(BIF_ARG_1)[1]); + unsigned int actual_arity = ep->code[2]; + if (actual_arity != BIF_ARG_2) + goto badfun; + m = ep->code[0]; + f = ep->code[1]; + } else if (hdr == make_arityval(2)) { + Eterm *tp = tuple_val(BIF_ARG_1); + m = tp[1]; + f = tp[2]; + if (is_not_atom(m) || is_not_atom(f)) + goto badfun; + if (!erts_find_export_entry(m, f, BIF_ARG_2)) + goto badfun; + } else + goto badfun; + address = hipe_get_na_nofail(m, f, BIF_ARG_2, 1); + BIF_RET((Eterm)address); + + badfun: + BIF_P->current = NULL; + BIF_P->fvalue = BIF_ARG_1; + BIF_ERROR(BIF_P, EXC_BADFUN); +} + +int hipe_find_mfa_from_ra(const void *ra, Eterm *m, Eterm *f, unsigned int *a) +{ + struct hipe_mfa_info *mfa; + long mfa_offset, ra_offset; + struct hipe_mfa_info **bucket; + unsigned int i, nrbuckets; + + /* Note about locking: the table is only updated from the + loader, which runs with the rest of the system suspended. */ + bucket = hipe_mfa_info_table.bucket; + nrbuckets = 1 << hipe_mfa_info_table.log2size; + mfa = NULL; + mfa_offset = LONG_MAX; + for (i = 0; i < nrbuckets; ++i) { + struct hipe_mfa_info *b = bucket[i]; + while (b != NULL) { + ra_offset = (char*)ra - (char*)b->local_address; + if (ra_offset > 0 && ra_offset < mfa_offset) { + mfa_offset = ra_offset; + mfa = b; + } + b = b->bucket.next; + } + } + if (!mfa) + return 0; + *m = mfa->m; + *f = mfa->f; + *a = mfa->a; + return 1; +} + +/* + * Patch Reference Handling. + */ +struct hipe_mfa_info_list { + struct hipe_mfa_info *mfa; + struct hipe_mfa_info_list *next; +}; + +struct ref { + struct hipe_mfa_info *caller_mfa; + void *address; + void *trampoline; + unsigned int flags; + struct ref *next; +}; +#define REF_FLAG_IS_LOAD_MFA 1 /* bit 0: 0 == call, 1 == load_mfa */ +#define REF_FLAG_IS_REMOTE 2 /* bit 1: 0 == local, 1 == remote */ +#define REF_FLAG_PENDING_REDIRECT 4 /* bit 2: 1 == pending redirect */ +#define REF_FLAG_PENDING_REMOVE 8 /* bit 3: 1 == pending remove */ + +/* add_ref(CalleeMFA, {CallerMFA,Address,'call'|'load_mfa',Trampoline,'remote'|'local'}) + */ +BIF_RETTYPE hipe_bifs_add_ref_2(BIF_ALIST_2) +{ + struct mfa callee; + Eterm *tuple; + struct mfa caller; + void *address; + void *trampoline; + unsigned int flags; + struct hipe_mfa_info *callee_mfa; + struct hipe_mfa_info *caller_mfa; + struct hipe_mfa_info_list *refers_to; + struct ref *ref; + + if (!term_to_mfa(BIF_ARG_1, &callee)) + goto badarg; + if (is_not_tuple(BIF_ARG_2)) + goto badarg; + tuple = tuple_val(BIF_ARG_2); + if (tuple[0] != make_arityval(5)) + goto badarg; + if (!term_to_mfa(tuple[1], &caller)) + goto badarg; + address = term_to_address(tuple[2]); + if (!address) + goto badarg; + switch (tuple[3]) { + case am_call: + flags = 0; + break; + case am_load_mfa: + flags = REF_FLAG_IS_LOAD_MFA; + break; + default: + goto badarg; + } + if (is_nil(tuple[4])) + trampoline = NULL; + else { + trampoline = term_to_address(tuple[4]); + if (!trampoline) + goto badarg; + } + switch (tuple[5]) { + case am_local: + break; + case am_remote: + flags |= REF_FLAG_IS_REMOTE; + break; + default: + goto badarg; + } + callee_mfa = hipe_mfa_info_table_put(callee.mod, callee.fun, callee.ari); + caller_mfa = hipe_mfa_info_table_put(caller.mod, caller.fun, caller.ari); + + refers_to = erts_alloc(ERTS_ALC_T_HIPE, sizeof(*refers_to)); + refers_to->mfa = callee_mfa; + refers_to->next = caller_mfa->refers_to; + caller_mfa->refers_to = refers_to; + + ref = erts_alloc(ERTS_ALC_T_HIPE, sizeof(*ref)); + ref->caller_mfa = caller_mfa; + ref->address = address; + ref->trampoline = trampoline; + ref->flags = flags; + ref->next = callee_mfa->referred_from; + callee_mfa->referred_from = ref; + + BIF_RET(NIL); + + badarg: + BIF_ERROR(BIF_P, BADARG); +} + +/* Given a CalleeMFA, mark each ref to it as pending-redirect. + * This ensures that remove_refs_from() won't remove them: any + * removal is instead done at the end of redirect_referred_from(). + */ +BIF_RETTYPE hipe_bifs_mark_referred_from_1(BIF_ALIST_1) /* get_refs_from */ +{ + struct mfa mfa; + const struct hipe_mfa_info *p; + struct ref *ref; + + if (!term_to_mfa(BIF_ARG_1, &mfa)) + BIF_ERROR(BIF_P, BADARG); + p = hipe_mfa_info_table_get(mfa.mod, mfa.fun, mfa.ari); + if (p) + for (ref = p->referred_from; ref != NULL; ref = ref->next) + ref->flags |= REF_FLAG_PENDING_REDIRECT; + BIF_RET(NIL); +} + +BIF_RETTYPE hipe_bifs_remove_refs_from_1(BIF_ALIST_1) +{ + struct mfa mfa; + struct hipe_mfa_info *caller_mfa, *callee_mfa; + struct hipe_mfa_info_list *refers_to, *tmp_refers_to; + struct ref **prev, *ref; + + if (!term_to_mfa(BIF_ARG_1, &mfa)) + BIF_ERROR(BIF_P, BADARG); + caller_mfa = hipe_mfa_info_table_get(mfa.mod, mfa.fun, mfa.ari); + if (caller_mfa) { + refers_to = caller_mfa->refers_to; + while (refers_to) { + callee_mfa = refers_to->mfa; + prev = &callee_mfa->referred_from; + ref = *prev; + while (ref) { + if (ref->caller_mfa == caller_mfa) { + if (ref->flags & REF_FLAG_PENDING_REDIRECT) { + ref->flags |= REF_FLAG_PENDING_REMOVE; + prev = &ref->next; + ref = ref->next; + } else { + struct ref *tmp = ref; + ref = ref->next; + *prev = ref; + erts_free(ERTS_ALC_T_HIPE, tmp); + } + } else { + prev = &ref->next; + ref = ref->next; + } + } + tmp_refers_to = refers_to; + refers_to = refers_to->next; + erts_free(ERTS_ALC_T_HIPE, tmp_refers_to); + } + caller_mfa->refers_to = NULL; + } + BIF_RET(NIL); +} + +/* redirect_referred_from(CalleeMFA) + * Redirect all pending-redirect refs in CalleeMFA's referred_from. + * Then remove any pending-redirect && pending-remove refs from CalleeMFA's referred_from. + */ +BIF_RETTYPE hipe_bifs_redirect_referred_from_1(BIF_ALIST_1) +{ + struct mfa mfa; + struct hipe_mfa_info *p; + struct ref **prev, *ref; + int is_remote, res; + void *new_address; + + if (!term_to_mfa(BIF_ARG_1, &mfa)) + BIF_ERROR(BIF_P, BADARG); + p = hipe_mfa_info_table_get(mfa.mod, mfa.fun, mfa.ari); + if (p) { + prev = &p->referred_from; + ref = *prev; + while (ref) { + if (ref->flags & REF_FLAG_PENDING_REDIRECT) { + is_remote = ref->flags & REF_FLAG_IS_REMOTE; + new_address = hipe_get_na_nofail(p->m, p->f, p->a, is_remote); + if (ref->flags & REF_FLAG_IS_LOAD_MFA) + res = hipe_patch_insn(ref->address, (Uint)new_address, am_load_mfa); + else + res = hipe_patch_call(ref->address, new_address, ref->trampoline); + if (res) + fprintf(stderr, "%s: patch failed\r\n", __FUNCTION__); + ref->flags &= ~REF_FLAG_PENDING_REDIRECT; + if (ref->flags & REF_FLAG_PENDING_REMOVE) { + struct ref *tmp = ref; + ref = ref->next; + *prev = ref; + erts_free(ERTS_ALC_T_HIPE, tmp); + } else { + prev = &ref->next; + ref = ref->next; + } + } else { + prev = &ref->next; + ref = ref->next; + } + } + } + BIF_RET(NIL); +} + +BIF_RETTYPE hipe_bifs_check_crc_1(BIF_ALIST_1) +{ + Uint crc; + + if (!term_to_Uint(BIF_ARG_1, &crc)) + BIF_ERROR(BIF_P, BADARG); + if (crc == HIPE_SYSTEM_CRC) + BIF_RET(am_true); + BIF_RET(am_false); +} + +BIF_RETTYPE hipe_bifs_system_crc_1(BIF_ALIST_1) +{ + Uint crc; + + if (!term_to_Uint(BIF_ARG_1, &crc)) + BIF_ERROR(BIF_P, BADARG); + crc ^= (HIPE_SYSTEM_CRC ^ HIPE_LITERALS_CRC); + BIF_RET(Uint_to_term(crc, BIF_P)); +} + +BIF_RETTYPE hipe_bifs_get_rts_param_1(BIF_ALIST_1) +{ + unsigned int is_defined; + unsigned long value; + + if (is_not_small(BIF_ARG_1)) + BIF_ERROR(BIF_P, BADARG); + is_defined = 1; + value = 0; + switch (unsigned_val(BIF_ARG_1)) { + RTS_PARAMS_CASES + default: + BIF_ERROR(BIF_P, BADARG); + } + if (!is_defined) + BIF_RET(NIL); + BIF_RET(Uint_to_term(value, BIF_P)); +} + +void hipe_patch_address(Uint *address, Eterm patchtype, Uint value) +{ + switch (patchtype) { + case am_load_fe: + hipe_patch_load_fe(address, value); + return; + default: + fprintf(stderr, "%s: unknown patchtype %#lx\r\n", + __FUNCTION__, patchtype); + return; + } +} + +struct modinfo { + HashBucket bucket; /* bucket.hvalue == atom_val(the module name) */ + unsigned int code_size; +}; + +static Hash modinfo_table; + +static HashValue modinfo_hash(void *tmpl) +{ + Eterm mod = (Eterm)tmpl; + return atom_val(mod); +} + +static int modinfo_cmp(void *tmpl, void *bucket) +{ + /* bucket->hvalue == modinfo_hash(tmpl), so just return 0 (match) */ + return 0; +} + +static void *modinfo_alloc(void *tmpl) +{ + struct modinfo *p; + + p = (struct modinfo*)erts_alloc(ERTS_ALC_T_HIPE, sizeof(*p)); + p->code_size = 0; + return &p->bucket; +} + +static void init_modinfo_table(void) +{ + HashFunctions f; + static int init_done = 0; + + if (init_done) + return; + init_done = 1; + f.hash = (H_FUN) modinfo_hash; + f.cmp = (HCMP_FUN) modinfo_cmp; + f.alloc = (HALLOC_FUN) modinfo_alloc; + f.free = (HFREE_FUN) NULL; + hash_init(ERTS_ALC_T_HIPE, &modinfo_table, "modinfo_table", 11, f); +} + +BIF_RETTYPE hipe_bifs_update_code_size_3(BIF_ALIST_3) +{ + struct modinfo *p; + Sint code_size; + + init_modinfo_table(); + + if (is_not_atom(BIF_ARG_1) || + is_not_small(BIF_ARG_3) || + (code_size = signed_val(BIF_ARG_3)) < 0) + BIF_ERROR(BIF_P, BADARG); + + p = (struct modinfo*)hash_put(&modinfo_table, (void*)BIF_ARG_1); + + if (is_nil(BIF_ARG_2)) /* some MFAs, not whole module */ + p->code_size += code_size; + else /* whole module */ + p->code_size = code_size; + BIF_RET(NIL); +} + +BIF_RETTYPE hipe_bifs_code_size_1(BIF_ALIST_1) +{ + struct modinfo *p; + unsigned int code_size; + + init_modinfo_table(); + + if (is_not_atom(BIF_ARG_1)) + BIF_ERROR(BIF_P, BADARG); + + p = (struct modinfo*)hash_get(&modinfo_table, (void*)BIF_ARG_1); + + code_size = p ? p->code_size : 0; + BIF_RET(make_small(code_size)); +} + +BIF_RETTYPE hipe_bifs_patch_insn_3(BIF_ALIST_3) +{ + Uint *address, value; + + address = term_to_address(BIF_ARG_1); + if (!address) + BIF_ERROR(BIF_P, BADARG); + if (!term_to_Uint(BIF_ARG_2, &value)) + BIF_ERROR(BIF_P, BADARG); + if (hipe_patch_insn(address, value, BIF_ARG_3)) + BIF_ERROR(BIF_P, BADARG); + BIF_RET(NIL); +} + +BIF_RETTYPE hipe_bifs_patch_call_3(BIF_ALIST_3) +{ + Uint *callAddress, *destAddress, *trampAddress; + + callAddress = term_to_address(BIF_ARG_1); + if (!callAddress) + BIF_ERROR(BIF_P, BADARG); + destAddress = term_to_address(BIF_ARG_2); + if (!destAddress) + BIF_ERROR(BIF_P, BADARG); + if (is_nil(BIF_ARG_3)) + trampAddress = NULL; + else { + trampAddress = term_to_address(BIF_ARG_3); + if (!trampAddress) + BIF_ERROR(BIF_P, BADARG); + } + if (hipe_patch_call(callAddress, destAddress, trampAddress)) + BIF_ERROR(BIF_P, BADARG); + BIF_RET(NIL); +} diff --git a/erts/emulator/hipe/hipe_bif0.h b/erts/emulator/hipe/hipe_bif0.h new file mode 100644 index 0000000000..ed27d5616a --- /dev/null +++ b/erts/emulator/hipe/hipe_bif0.h @@ -0,0 +1,53 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2001-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + * hipe_bif0.h + * + * Compiler and linker support. + */ +#ifndef HIPE_BIF0_H +#define HIPE_BIF0_H + +extern Uint *hipe_bifs_find_pc_from_mfa(Eterm mfa); + +/* shared with ggc.c -- NOT an official API */ +extern Eterm *hipe_constants_start; +extern Eterm *hipe_constants_next; + +extern void hipe_mfa_info_table_init(void); +extern void *hipe_get_remote_na(Eterm m, Eterm f, unsigned int a); +extern Eterm hipe_find_na_or_make_stub(Process*, Eterm, Eterm, Eterm); +extern int hipe_find_mfa_from_ra(const void *ra, Eterm *m, Eterm *f, unsigned int *a); +#if defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__) || defined(__arm__) +extern void *hipe_mfa_get_trampoline(Eterm m, Eterm f, unsigned int a); +extern void hipe_mfa_set_trampoline(Eterm m, Eterm f, unsigned int a, void *trampoline); +#endif +#if defined(__arm__) +extern void *hipe_primop_get_trampoline(Eterm name); +extern void hipe_primop_set_trampoline(Eterm name, void *trampoline); +#endif + +/* needed in beam_load.c */ +void hipe_mfa_save_orig_beam_op(Eterm m, Eterm f, unsigned int a, Eterm *pc); + +/* these are also needed in hipe_amd64.c */ +extern void *term_to_address(Eterm); +extern int term_to_Sint32(Eterm, Sint *); + +#endif /* HIPE_BIF0_H */ diff --git a/erts/emulator/hipe/hipe_bif0.tab b/erts/emulator/hipe/hipe_bif0.tab new file mode 100644 index 0000000000..46c0a3d67d --- /dev/null +++ b/erts/emulator/hipe/hipe_bif0.tab @@ -0,0 +1,142 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 2001-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# +# $Id$ +# HiPE level 0 bifs: compiler and linker support +# +# bif hipe_bifs:name/arity + +#bif hipe_bifs:read_u8/1 +#bif hipe_bifs:read_u32/1 +bif hipe_bifs:write_u8/2 +#bif hipe_bifs:write_s32/2 +bif hipe_bifs:write_u32/2 +# bif hipe_bifs:write_s64/2 +# bif hipe_bifs:write_u64/2 + +bif hipe_bifs:bytearray/2 +bif hipe_bifs:bytearray_sub/2 +bif hipe_bifs:bytearray_update/3 +bif hipe_bifs:bitarray/2 +bif hipe_bifs:bitarray_sub/2 +bif hipe_bifs:bitarray_update/3 +bif hipe_bifs:array/2 +bif hipe_bifs:array_length/1 +bif hipe_bifs:array_sub/2 +bif hipe_bifs:array_update/3 +bif hipe_bifs:ref/1 +bif hipe_bifs:ref_get/1 +bif hipe_bifs:ref_set/2 + +bif hipe_bifs:enter_code/2 +bif hipe_bifs:alloc_data/2 +bif hipe_bifs:constants_size/0 +bif hipe_bifs:merge_term/1 + +bif hipe_bifs:fun_to_address/1 +#bif hipe_bifs:get_emu_address/1 +bif hipe_bifs:set_native_address/3 +#bif hipe_bifs:address_to_fun/1 + +bif hipe_bifs:set_funinfo_native_address/3 +bif hipe_bifs:invalidate_funinfo_native_addresses/1 + +bif hipe_bifs:update_code_size/3 +bif hipe_bifs:code_size/1 + +bif hipe_bifs:enter_sdesc/1 + +bif hipe_bifs:bif_address/3 +bif hipe_bifs:primop_address/1 +#bif hipe_bifs:gbif_address/2 + +bif hipe_bifs:atom_to_word/1 +bif hipe_bifs:term_to_word/1 + +#bif hipe_bifs:make_fun/3 +bif hipe_bifs:make_fe/3 + +#bif hipe_bifs:make_native_stub/2 +bif hipe_bifs:find_na_or_make_stub/2 + +bif hipe_bifs:check_crc/1 +bif hipe_bifs:system_crc/1 +bif hipe_bifs:get_rts_param/1 + +#bif hipe_bifs:tuple_to_float/1 + +bif hipe_bifs:patch_insn/3 +bif hipe_bifs:patch_call/3 + +bif hipe_bifs:add_ref/2 +bif hipe_bifs:mark_referred_from/1 +bif hipe_bifs:remove_refs_from/1 +bif hipe_bifs:redirect_referred_from/1 + +# atoms used by add_ref/2 +atom call +atom load_mfa +atom local +atom remote + +# atoms used by hipe_bifs:patch_insn/3 +atom atom +atom c_const +atom call +atom closure +atom constant +atom load_mfa +atom x86_abs_pcrel + +# atom used by hipe_patch_address() +atom load_fe + +atom suspend_msg +atom suspend_msg_timeout +atom suspend_0 +atom gc_1 +atom hipe_apply +atom rethrow +atom find_na_or_make_stub +atom nonclosure_address +atom atomic_inc +atom clear_timeout +atom check_get_msg +atom select_msg +atom set_timeout +atom cmp_2 +atom op_exact_eqeq_2 +atom conv_big_to_float +atom fclearerror_error +atom bs_put_big_integer +atom bs_put_small_float +atom bs_put_bits +atom bs_allocate +atom bs_get_integer_2 +atom bs_get_float_2 +atom bs_get_binary_2 +atom bs_reallocate +atom bs_utf8_size +atom bs_put_utf8 +atom bs_get_utf8 +atom bs_utf16_size +atom bs_put_utf16be +atom bs_put_utf16le +atom bs_get_utf16 +atom bs_validate_unicode +atom bs_validate_unicode_retract diff --git a/erts/emulator/hipe/hipe_bif1.c b/erts/emulator/hipe/hipe_bif1.c new file mode 100644 index 0000000000..5188950e17 --- /dev/null +++ b/erts/emulator/hipe/hipe_bif1.c @@ -0,0 +1,937 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2001-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + * hipe_bif1.c + * + * Performance analysis support. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#include "sys.h" +#include "global.h" +#include "bif.h" +#include "big.h" +#include "error.h" +#include "beam_load.h" +#include "hipe_bif0.h" +#include "hipe_bif1.h" + +#define BeamOpCode(Op) ((Uint)BeamOp(Op)) + +BIF_RETTYPE hipe_bifs_call_count_on_1(BIF_ALIST_1) +{ + Eterm *pc; + struct hipe_call_count *hcc; + + pc = hipe_bifs_find_pc_from_mfa(BIF_ARG_1); + if (!pc) + BIF_ERROR(BIF_P, BADARG); + ASSERT(pc[-5] == BeamOpCode(op_i_func_info_IaaI)); + if (pc[0] == BeamOpCode(op_hipe_trap_call)) + BIF_ERROR(BIF_P, BADARG); + if (pc[0] == BeamOpCode(op_hipe_call_count)) + BIF_RET(NIL); + hcc = erts_alloc(ERTS_ALC_T_HIPE, sizeof(*hcc)); + hcc->count = 0; + hcc->opcode = pc[0]; + pc[-4] = (Eterm)hcc; + pc[0] = BeamOpCode(op_hipe_call_count); + BIF_RET(am_true); +} + +BIF_RETTYPE hipe_bifs_call_count_off_1(BIF_ALIST_1) +{ + Eterm *pc; + struct hipe_call_count *hcc; + unsigned count; + + pc = hipe_bifs_find_pc_from_mfa(BIF_ARG_1); + if (!pc) + BIF_ERROR(BIF_P, BADARG); + ASSERT(pc[-5] == BeamOpCode(op_i_func_info_IaaI)); + if (pc[0] != BeamOpCode(op_hipe_call_count)) + BIF_RET(am_false); + hcc = (struct hipe_call_count*)pc[-4]; + count = hcc->count; + pc[0] = hcc->opcode; + pc[-4] = (Eterm)NULL; + erts_free(ERTS_ALC_T_HIPE, hcc); + BIF_RET(make_small(count)); +} + +BIF_RETTYPE hipe_bifs_call_count_get_1(BIF_ALIST_1) +{ + Eterm *pc; + struct hipe_call_count *hcc; + + pc = hipe_bifs_find_pc_from_mfa(BIF_ARG_1); + if (!pc) + BIF_ERROR(BIF_P, BADARG); + ASSERT(pc[-5] == BeamOpCode(op_i_func_info_IaaI)); + if (pc[0] != BeamOpCode(op_hipe_call_count)) + BIF_RET(am_false); + hcc = (struct hipe_call_count*)pc[-4]; + BIF_RET(make_small(hcc->count)); +} + +BIF_RETTYPE hipe_bifs_call_count_clear_1(BIF_ALIST_1) +{ + Eterm *pc; + struct hipe_call_count *hcc; + unsigned count; + + pc = hipe_bifs_find_pc_from_mfa(BIF_ARG_1); + if (!pc) + BIF_ERROR(BIF_P, BADARG); + ASSERT(pc[-5] == BeamOpCode(op_i_func_info_IaaI)); + if (pc[0] != BeamOpCode(op_hipe_call_count)) + BIF_RET(am_false); + hcc = (struct hipe_call_count*)pc[-4]; + count = hcc->count; + hcc->count = 0; + BIF_RET(make_small(count)); +} + +unsigned int hipe_trap_count; + +BIF_RETTYPE hipe_bifs_trap_count_get_0(BIF_ALIST_0) +{ + BIF_RET(make_small(hipe_trap_count)); +} + +BIF_RETTYPE hipe_bifs_trap_count_clear_0(BIF_ALIST_0) +{ + unsigned int count = hipe_trap_count; + hipe_trap_count = 0; + BIF_RET(make_small(count)); +} + +/***************************************************************************** + * BIFs for benchmarking. These only do useful things if + * __BENCHMARK__ is defined in beam/benchmark.h. For documentation + * about how to add new counters or maintain the existing counters, + * see benchmark.h. + * + * If benchmarking is not enabled all BIFs will return false. If the + * required benchmark feature is not enabled, the counter will remain + * zero. + * + * process_info/0 -> { Number of live processes, + * Processes spawned in total } + * + * Live processes are increased when a new process is created, and + * decreased when a process dies. Processes spawned is increased + * when a process is created. + * + * + * process_info_clear/0 -> true + * + * Will reset the processes spawned-counters to zero. If this is + * done at some improper time, live processes may become a negative + * value. This is not a problem in itself, just as long as you know + * about it. + * + * + * message_info/0 -> { Messages sent, + * Messages copied, + * Ego messages (sender = receiver), + * Words sent, + * Words copied, + * Words preallocated } + * + * Counting the words sent in a shared heap system will affect + * runtime performance since it means that we have to calculate the + * size of the mesage. With private heaps, this is done anyway and + * will not affect performance. + * + * + * message_info_clear/0 -> true + * + * Reset the message counters to zero. + * + * + * message_sizes/0 -> true + * + * Displays a text-mode bar diagram with message sizes. There are no + * guaranties that this is printed in a way the Erlang system is + * supposed to print things. + * + * + * gc_info/0 -> { Minor collections, + * Major collections, + * Used heap, + * Allocated heap, + * Max used heap, + * Max allocated heap } + * + * Information about private heap garbage collections. Number of + * minor and major collections, how much heap is used and allocated + * and how much heap has been in use and allocated at most since the + * counters were reset. + * + * + * shared_gc_info/0 -> { Minor collections of the shared heap, + * Major collections of the shared heap, + * Used shared heap, + * Allocated shared heap, + * Max used shared heap, + * Max allocated shared heap } + * + * The same as above, but for the shared heap / message area. Note, + * that in a shared heap system the max used heap and max allocated + * heap are mostly the same, since the heap allways is filled before + * a garbage collection, and most garbage collections do not enlarge + * the heap. The private heap numbers are much more interesting. + * + * + * incremental_gc_info/0 -> { Complete minor GC cycles, + * Complete major GC cycles, + * Minor GC stages, + * Major GC stages } + * + * + * gc_info_clear/0 -> true + * + * Reset counters for both private and shared garbage collection. + * + * + * BM Timers + * --------- + * + * All timers returns tuples of the kind: { Minutes, Seconds, Milliseconds } + * except for the max times in garbage collection where times are normally + * small. The tuple is therefor: { Seconds, Milliseconds, Microseconds } + * + * system_timer/0 -> Mutator time + * + * This timer is not a real-time clock, it only runs when a process + * is scheduled to run. You can not find out the accual time a + * program has taken to run using this timer. + * + * + * system_timer_clear/0 -> true + * + * Reset system timer to zero. + * + * + * send_timer/0 -> { Send time, + * Copy time, + * Size time } + * + * Time spent in sending messages. The copy time and size time are + * only active if the copying is needed in send. Copying of data + * into ETS-tables etc is not timed with this timer. + * + * + * send_timer_clear/0 -> true + * + * Reset send timers to zero. + * + * + * gc_timer/0 -> { Time in minor collection, + * Time in major collection, + * Max time in minor collection (�s), + * Max time in major collection (�s) } + * + * Total time spent in garbage collection of the private heaps. The + * max times are for one separate collection. + * + * + * shared_gc_timer/0 -> { Time in minor collection, + * Time in major collection, + * Max time in minor collection (�s), + * Max time in major collection (�s) } + * + * Total time spent in garbage collection of the shared heap / + * message area. The max times are for one separate collection. + * + * + * gc_timer_clear/0 -> true + * + * Reset private and shared garbage collection timers to zero. Note, + * that the max-times are also reset. + * + * + * misc_timer/0 -> { Misc 0, Misc 1, Misc 2 } + * + * Timers for debug purposes. In a normal system, these timers are + * never used. Add these timers at places where you want to time + * something not covered here. Use BM_SWAP_TIMER(from,to) to start + * one of the misc timers. + * + * ... code timed by the system timer ... + * BM_SWAP_TIMER(system,misc1); + * ... code we want to time ... + * BM_SWAP_TIMER(misc1,system); + * ... back on system time ... + * + * + * misc_timer_clear/0 -> true + * + * Reset misc timers to zero. + */ + +BIF_RETTYPE hipe_bifs_process_info_0(BIF_ALIST_0) +{ +#ifdef __BENCHMARK__ +#ifndef BM_COUNTERS + Uint processes_busy = 0; + Uint processes_spawned = 0; +#endif + Eterm *hp; + + hp = HAlloc(BIF_P, 3); + BIF_RET(TUPLE2(hp, + make_small(processes_busy), + make_small(processes_spawned))); +#else + BIF_RET(am_false); +#endif +} + +BIF_RETTYPE hipe_bifs_process_info_clear_0(BIF_ALIST_0) +{ +#ifdef __BENCHMARK__ +#ifdef BM_COUNTERS + processes_spawned = 0; +#endif + BIF_RET(am_true); +#else + BIF_RET(am_false); +#endif +} + +BIF_RETTYPE hipe_bifs_message_info_0(BIF_ALIST_0) +{ +#ifdef __BENCHMARK__ + Eterm *hp; +#ifndef BM_COUNTERS + unsigned long messages_sent = 0; + unsigned long messages_copied = 0; + unsigned long messages_ego = 0; +#endif +#ifndef BM_MESSAGE_SIZES + unsigned long words_sent = 0; + unsigned long words_copied = 0; + unsigned long words_prealloc = 0; +#endif + + hp = HAlloc(BIF_P, 7); + BIF_RET(TUPLE6(hp, + make_small(messages_sent), + make_small(messages_copied), + make_small(messages_ego), + make_small(words_sent), + make_small(words_copied), + make_small(words_prealloc))); +#else + BIF_RET(am_false); +#endif +} + +BIF_RETTYPE hipe_bifs_message_info_clear_0(BIF_ALIST_0) +{ +#ifdef __BENCHMARK__ +#ifdef BM_COUNTERS + messages_sent = 0; + messages_copied = 0; + messages_ego = 0; +#endif +#ifdef BM_MESSAGE_SIZES + words_sent = 0; + words_copied = 0; + words_prealloc = 0; + { + int i; + for (i = 0; i < 1000; i++) + message_sizes[i] = 0; + } +#endif + BIF_RET(am_true); +#else + BIF_RET(am_false); +#endif +} + +BIF_RETTYPE hipe_bifs_message_sizes_0(BIF_ALIST_0) +{ +#ifdef BM_MESSAGE_SIZES + int i, j, max = 0; + int tmp[12] = {0,0,0,0,0,0,0,0,0,0,0,0}; + + for (i = 0; i < 65; i++) { + tmp[0] += message_sizes[i]; + if (tmp[0] > max) + max = tmp[0]; + } + for (i = 65; i < 999; i++) { + tmp[i / 100 + 1] += message_sizes[i]; + if (tmp[i / 100 + 1] > max) + max = tmp[i / 100 + 1]; + } + tmp[11] = message_sizes[999]; + if (tmp[11] > max) + max = tmp[11]; + for (i = -1; i < 11; i++) { + int num = (tmp[i + 1] * 50) / max; + if (i == -1) + printf("\n\r 0 - 64: (%6d) |", tmp[0]); + else if (i == 0) + printf("\n\r 65 - 99: (%6d) |", tmp[1]); + else if (i == 10) + printf("\n\r >= 1000: (%6d) |", tmp[11]); + else + printf("\n\r%3d - %3d: (%6d) |", i * 100, i * 100 + 99, + tmp[i + 1]); + + for (j = 0; j < num; j++) + printf("."); + } + printf("\n\r"); + + BIF_RET(am_true); +#else + BIF_RET(am_false); +#endif +} + +BIF_RETTYPE hipe_bifs_gc_info_0(BIF_ALIST_0) +{ +#ifdef __BENCHMARK__ +#ifndef BM_COUNTERS + Uint minor_gc = 0; + Uint major_gc = 0; +#endif +#ifndef BM_HEAP_SIZES + Uint max_used_heap = 0; + Uint max_allocated_heap = 0; +#endif + Eterm *hp; + Uint used_heap = (BIF_P->htop - BIF_P->heap) + + (OLD_HTOP(BIF_P) - OLD_HEAP(BIF_P)) + + MBUF_SIZE(BIF_P); + + Uint alloc_heap = (BIF_P->hend - BIF_P->heap) + + (OLD_HEND(BIF_P) - OLD_HEAP(BIF_P)) + + MBUF_SIZE(BIF_P); + + hp = HAlloc(BIF_P, 7); + BIF_RET(TUPLE6(hp, + make_small((Uint)minor_gc), + make_small((Uint)major_gc), + make_small((Uint)used_heap), + make_small((Uint)alloc_heap), + make_small(max_used_heap), + make_small(max_allocated_heap))); +#else + BIF_RET(am_false); +#endif +} + +BIF_RETTYPE hipe_bifs_shared_gc_info_0(BIF_ALIST_0) +{ +#ifdef __BENCHMARK__ +#if !(defined(BM_COUNTERS) && defined(HYBRID)) + Uint minor_global_gc = 0; + Uint major_global_gc = 0; +#endif +#ifndef BM_HEAP_SIZES + Uint max_used_global_heap = 0; + Uint max_allocated_global_heap = 0; +#endif + Eterm *hp; + +#if defined(HYBRID) + Uint tmp_used_heap = (Uint)((BIF_P->htop - BIF_P->heap) + + (OLD_HTOP(BIF_P) - OLD_HEAP(BIF_P)) + + MBUF_SIZE(BIF_P)); + Uint tmp_allocated_heap = (Uint)((BIF_P->hend - BIF_P->heap) + + (OLD_HEND(BIF_P) - OLD_HEAP(BIF_P)) + + MBUF_SIZE(BIF_P)); +#else + Uint tmp_used_heap = 0; + Uint tmp_allocated_heap = 0; +#endif + hp = HAlloc(BIF_P, 7); + BIF_RET(TUPLE6(hp, + make_small((uint)minor_global_gc), + make_small((uint)major_global_gc), + make_small(tmp_used_heap), + make_small(tmp_allocated_heap), + make_small(max_used_global_heap), + make_small(max_allocated_global_heap))); +#else + BIF_RET(am_false); +#endif +} + +BIF_RETTYPE hipe_bifs_incremental_gc_info_0(BIF_ALIST_0) +{ +#ifdef __BENCHMARK__ +#if !(defined(BM_COUNTERS) && defined(INCREMENTAL)) + Uint minor_gc_cycles = 0; + Uint major_gc_cycles = 0; + Uint minor_gc_stages = 0; + Uint major_gc_stages = 0; +#endif + Eterm *hp; + + hp = HAlloc(BIF_P, 5); + BIF_RET(TUPLE4(hp, + make_small(minor_gc_cycles), + make_small(major_gc_cycles), + make_small(minor_gc_stages), + make_small(major_gc_stages))); +#else + BIF_RET(am_false); +#endif +} + +BIF_RETTYPE hipe_bifs_gc_info_clear_0(BIF_ALIST_0) +{ +#ifdef __BENCHMARK__ + +#ifdef BM_COUNTERS + minor_gc = 0; + major_gc = 0; +#ifdef HYBRID + minor_global_gc = 0; + major_global_gc = 0; + gc_in_copy = 0; +#ifdef INCREMENTAL + minor_gc_cycles = 0; + major_gc_cycles = 0; + minor_gc_stages = 0; + major_gc_stages = 0; +#endif +#endif +#endif + +#ifdef BM_HEAP_SIZES + max_used_heap = 0; + max_allocated_heap = 0; + max_used_global_heap = 0; + max_allocated_global_heap = 0; +#endif + + BIF_RET(am_true); +#else + BIF_RET(am_false); +#endif +} + +BIF_RETTYPE hipe_bifs_pause_times_0(BIF_ALIST_0) +{ +#ifdef BM_TIMERS + int i; + int total_time = 0, n = 0; + int left = 0, right = 0, mid = 0; + + printf("Pause times in minor collection:\r\n"); + for (i = 0; i < MAX_PAUSE_TIME; i++) { + if (pause_times[i] > 0) { + printf("%d: %ld\r\n", i, pause_times[i]); + total_time += pause_times[i] * i; + n += pause_times[i]; + + if (i > mid) + right += pause_times[i]; + + while (right > left) { + left += pause_times[mid++]; + right -= pause_times[mid]; + } + } + } + + printf("Number of collections: %d\r\n", n); + printf("Total collection time: %d\r\n", total_time); + if (n > 0) + printf("Mean pause time: %d\r\n", total_time / n); + + printf("Geometrical mean: %d\r\n", mid); + + total_time = 0; n = 0; + left = 0; right = 0; mid = 0; + printf("Pause times in major collection:\r\n"); + for (i = 0; i < MAX_PAUSE_TIME; i++) { + if (pause_times_old[i] > 0) { + printf("%d: %ld\r\n", i, pause_times_old[i]); + total_time += pause_times_old[i] * i; + n += pause_times_old[i]; + } + } + + printf("Number of collections: %d\r\n", n); + printf("Total collection time: %d\r\n", total_time); + if (n > 0) + printf("Mean pause time: %d\r\n", total_time / n); + + BIF_RET(am_true); +#else + BIF_RET(am_false); +#endif +} + +/* XXX: these macros have free variables */ +#ifdef BM_TIMERS +#if USE_PERFCTR +#define MAKE_TIME(_timer_) { \ + BM_TIMER_T tmp = _timer_##_time; \ + milli = (uint)(tmp - ((int)(tmp / 1000)) * 1000); \ + tmp /= 1000; \ + sec = (uint)(tmp - ((int)(tmp / 60)) * 60); \ + min = (uint)tmp / 60; } + +#define MAKE_MICRO_TIME(_timer_) { \ + BM_TIMER_T tmp = _timer_##_time * 1000; \ + micro = (uint)(tmp - ((int)(tmp / 1000)) * 1000); \ + tmp /= 1000; \ + milli = (uint)(tmp - ((int)(tmp / 1000)) * 1000); \ + sec = (uint)tmp / 1000; } + +#else +#define MAKE_TIME(_timer_) { \ + BM_TIMER_T tmp = _timer_##_time / 1000000; \ + milli = tmp % 1000; \ + tmp /= 1000; \ + sec = tmp % 60; \ + min = tmp / 60; } + +#define MAKE_MICRO_TIME(_timer_) { \ + BM_TIMER_T tmp = _timer_##_time / 1000; \ + micro = tmp % 1000; \ + tmp /= 1000; \ + milli = tmp % 1000; \ + sec = tmp / 1000; } + +#endif +#else +#define MAKE_TIME(_timer_) +#define MAKE_MICRO_TIME(_timer_) +#endif + +BIF_RETTYPE hipe_bifs_system_timer_0(BIF_ALIST_0) +{ +#ifdef __BENCHMARK__ + uint min = 0; + uint sec = 0; + uint milli = 0; + Eterm *hp; + + hp = HAlloc(BIF_P, 4); + MAKE_TIME(system); + BIF_RET(TUPLE3(hp, + make_small(min), + make_small(sec), + make_small(milli))); +#else + BIF_RET(am_false); +#endif +} + +BIF_RETTYPE hipe_bifs_system_timer_clear_0(BIF_ALIST_0) +{ +#ifdef BM_TIMERS + system_time = 0; + BIF_RET(am_true); +#else + BIF_RET(am_false); +#endif +} + +BIF_RETTYPE hipe_bifs_send_timer_0(BIF_ALIST_0) +{ +#ifdef __BENCHMARK__ + uint min = 0; + uint sec = 0; + uint milli = 0; + Eterm *hp; + Eterm sendtime, copytime, sizetime; + + hp = HAlloc(BIF_P, 4 * 4); + + MAKE_TIME(send); + sendtime = TUPLE3(hp, + make_small(min), + make_small(sec), + make_small(milli)); + hp += 4; + + MAKE_TIME(copy); + copytime = TUPLE3(hp, + make_small(min), + make_small(sec), + make_small(milli)); + hp += 4; + + MAKE_TIME(size); + sizetime = TUPLE3(hp, + make_small(min), + make_small(sec), + make_small(milli)); + hp += 4; + BIF_RET(TUPLE3(hp, sendtime, copytime, sizetime)); +#else + BIF_RET(am_false); +#endif +} + +BIF_RETTYPE hipe_bifs_send_timer_clear_0(BIF_ALIST_0) +{ +#ifdef BM_TIMERS + send_time = 0; + copy_time = 0; + size_time = 0; + BIF_RET(am_true); +#else + BIF_RET(am_false); +#endif +} + +BIF_RETTYPE hipe_bifs_gc_timer_0(BIF_ALIST_0) +{ +#ifdef __BENCHMARK__ + Eterm *hp; + uint min = 0; + uint sec = 0; + uint milli = 0; + uint micro = 0; + Eterm minor, major, max_min, max_maj; + + hp = HAlloc(BIF_P, 4 * 4 + 5); + + MAKE_TIME(minor_gc); + minor = TUPLE3(hp, + make_small(min), + make_small(sec), + make_small(milli)); + hp += 4; + + MAKE_TIME(major_gc); + major = TUPLE3(hp, + make_small(min), + make_small(sec), + make_small(milli)); + hp += 4; + + MAKE_MICRO_TIME(max_minor); + max_min = TUPLE3(hp, + make_small(sec), + make_small(milli), + make_small(micro)); + hp += 4; + + MAKE_MICRO_TIME(max_major); + max_maj = TUPLE3(hp, + make_small(sec), + make_small(milli), + make_small(micro)); + hp += 4; + + BIF_RET(TUPLE4(hp, minor, major, max_min, max_maj)); +#else + BIF_RET(am_false); +#endif +} + +BIF_RETTYPE hipe_bifs_shared_gc_timer_0(BIF_ALIST_0) +{ +#ifdef __BENCHMARK__ + Eterm *hp; + uint min = 0; + uint sec = 0; + uint milli = 0; + uint micro = 0; + Eterm minor, major, max_min, max_maj; + + hp = HAlloc(BIF_P, 4 * 4 + 5); + + MAKE_TIME(minor_global_gc); + minor = TUPLE3(hp, + make_small(min), + make_small(sec), + make_small(milli)); + hp += 4; + + MAKE_TIME(major_global_gc); + major = TUPLE3(hp, + make_small(min), + make_small(sec), + make_small(milli)); + hp += 4; + + MAKE_MICRO_TIME(max_global_minor); + max_min = TUPLE3(hp, + make_small(sec), + make_small(milli), + make_small(micro)); + hp += 4; + + MAKE_MICRO_TIME(max_global_major); + max_maj = TUPLE3(hp, + make_small(sec), + make_small(milli), + make_small(micro)); + hp += 4; + + BIF_RET(TUPLE4(hp, minor, major, max_min, max_maj)); +#else + BIF_RET(am_false); +#endif +} + +BIF_RETTYPE hipe_bifs_gc_timer_clear_0(BIF_ALIST_0) +{ +#ifdef BM_TIMERS + minor_gc_time = 0; + major_gc_time = 0; + max_minor_time = 0; + max_major_time = 0; + minor_global_gc_time = 0; + major_global_gc_time = 0; + max_global_minor_time = 0; + max_global_major_time = 0; + BIF_RET(am_true); +#else + BIF_RET(am_false); +#endif +} + +BIF_RETTYPE hipe_bifs_misc_timer_0(BIF_ALIST_0) +{ +#ifdef __BENCHMARK__ + uint min = 0; + uint sec = 0; + uint milli = 0; + Eterm *hp; + Eterm misctime1, misctime2, misctime3; + + hp = HAlloc(BIF_P, 4 * 4); + + MAKE_TIME(misc0); + misctime1 = TUPLE3(hp, + make_small(min), + make_small(sec), + make_small(milli)); + hp += 4; + + MAKE_TIME(misc1); + misctime2 = TUPLE3(hp, + make_small(min), + make_small(sec), + make_small(milli)); + hp += 4; + + MAKE_TIME(misc2); + misctime3 = TUPLE3(hp, + make_small(min), + make_small(sec), + make_small(milli)); + hp += 4; + BIF_RET(TUPLE3(hp, misctime1, misctime2, misctime3)); +#else + BIF_RET(am_false); +#endif +} + +BIF_RETTYPE hipe_bifs_misc_timer_clear_0(BIF_ALIST_0) +{ +#ifdef BM_TIMERS + misc0_time = 0; + misc1_time = 0; + misc2_time = 0; + BIF_RET(am_true); +#else + BIF_RET(am_false); +#endif +} + +#undef MAKE_TIME +#undef MAKE_MICRO_TIME + +/* + * HiPE hrvtime(). + * These implementations are currently available: + * + On Linux with the perfctr extension we can use the process' + * virtualised time-stamp counter. To enable this mode you must + * pass `--with-perfctr=/path/to/perfctr' when configuring. + * + The fallback, which is the same as {X,_} = runtime(statistics). + */ + +#if USE_PERFCTR + +#include "hipe_perfctr.h" +static int hrvtime_is_open; +#define hrvtime_is_started() hrvtime_is_open + +static void start_hrvtime(void) +{ + if (hipe_perfctr_hrvtime_open() >= 0) + hrvtime_is_open = 1; +} + +#define get_hrvtime() hipe_perfctr_hrvtime_get() +#define stop_hrvtime() hipe_perfctr_hrvtime_close() + +#else + +/* + * Fallback, if nothing better exists. + * This is the same as {X,_} = statistics(runtime), which uses + * times(2) on Unix systems. + */ + +#define hrvtime_is_started() 1 +#define start_hrvtime() do{}while(0) +#define stop_hrvtime() do{}while(0) + +static double get_hrvtime(void) +{ + unsigned long ms_user; + elapsed_time_both(&ms_user, NULL, NULL, NULL); + return (double)ms_user; +} + +#endif /* hrvtime support */ + +BIF_RETTYPE hipe_bifs_get_hrvtime_0(BIF_ALIST_0) +{ + Eterm *hp; + Eterm res; + FloatDef f; + + if (!hrvtime_is_started()) { + start_hrvtime(); + if (!hrvtime_is_started()) + BIF_RET(NIL); /* arity 0 BIFs may not fail */ + } + f.fd = get_hrvtime(); + hp = HAlloc(BIF_P, FLOAT_SIZE_OBJECT); + res = make_float(hp); + PUT_DOUBLE(f, hp); + BIF_RET(res); +} + +BIF_RETTYPE hipe_bifs_stop_hrvtime_0(BIF_ALIST_0) +{ + stop_hrvtime(); + BIF_RET(am_true); +} diff --git a/erts/emulator/hipe/hipe_bif1.h b/erts/emulator/hipe/hipe_bif1.h new file mode 100644 index 0000000000..c3b607565d --- /dev/null +++ b/erts/emulator/hipe/hipe_bif1.h @@ -0,0 +1,34 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2001-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + * hipe_bif1.h + * + * Performance analysis support. + */ +#ifndef HIPE_BIF1_H +#define HIPE_BIF1_H + +struct hipe_call_count { + unsigned count; + Uint opcode; +}; + +extern unsigned int hipe_trap_count; + +#endif /* HIPE_BIF1_H */ diff --git a/erts/emulator/hipe/hipe_bif1.tab b/erts/emulator/hipe/hipe_bif1.tab new file mode 100644 index 0000000000..eb445d56f7 --- /dev/null +++ b/erts/emulator/hipe/hipe_bif1.tab @@ -0,0 +1,49 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 2001-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# +# HiPE level 1 bifs: performance analysis support +# +# bif hipe_bifs:name/arity + +bif hipe_bifs:call_count_on/1 +bif hipe_bifs:call_count_off/1 +bif hipe_bifs:call_count_get/1 +bif hipe_bifs:call_count_clear/1 +bif hipe_bifs:trap_count_get/0 +bif hipe_bifs:trap_count_clear/0 +bif hipe_bifs:process_info/0 +bif hipe_bifs:process_info_clear/0 +bif hipe_bifs:message_info/0 +bif hipe_bifs:message_info_clear/0 +bif hipe_bifs:message_sizes/0 +bif hipe_bifs:gc_info/0 +bif hipe_bifs:shared_gc_info/0 +bif hipe_bifs:incremental_gc_info/0 +bif hipe_bifs:gc_info_clear/0 +bif hipe_bifs:pause_times/0 +bif hipe_bifs:system_timer/0 +bif hipe_bifs:system_timer_clear/0 +bif hipe_bifs:send_timer/0 +bif hipe_bifs:send_timer_clear/0 +bif hipe_bifs:gc_timer/0 +bif hipe_bifs:shared_gc_timer/0 +bif hipe_bifs:gc_timer_clear/0 +bif hipe_bifs:misc_timer/0 +bif hipe_bifs:misc_timer_clear/0 +bif hipe_bifs:get_hrvtime/0 +bif hipe_bifs:stop_hrvtime/0 diff --git a/erts/emulator/hipe/hipe_bif2.c b/erts/emulator/hipe/hipe_bif2.c new file mode 100644 index 0000000000..f992b758be --- /dev/null +++ b/erts/emulator/hipe/hipe_bif2.c @@ -0,0 +1,170 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2001-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + * hipe_bif2.c + * + * Miscellaneous add-ons. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#include "sys.h" +#include "error.h" +#include "erl_vm.h" +#include "global.h" +#include "erl_process.h" +#include "bif.h" +#include "big.h" +#include "hipe_debug.h" +#include "hipe_mode_switch.h" +#include "hipe_bif0.h" /* hipe_constants_{start,next} */ +#include "hipe_arch.h" +#include "hipe_stack.h" + +BIF_RETTYPE hipe_bifs_show_estack_1(BIF_ALIST_1) +{ + Process *rp = erts_pid2proc(BIF_P, ERTS_PROC_LOCK_MAIN, + BIF_ARG_1, ERTS_PROC_LOCKS_ALL); + if (!rp) + BIF_ERROR(BIF_P, BADARG); + hipe_print_estack(rp); + erts_smp_proc_unlock(rp, ERTS_PROC_LOCKS_ALL); + BIF_RET(am_true); +} + +BIF_RETTYPE hipe_bifs_show_heap_1(BIF_ALIST_1) +{ + Process *rp = erts_pid2proc(BIF_P, ERTS_PROC_LOCK_MAIN, + BIF_ARG_1, ERTS_PROC_LOCKS_ALL); + if (!rp) + BIF_ERROR(BIF_P, BADARG); + hipe_print_heap(rp); + erts_smp_proc_unlock(rp, ERTS_PROC_LOCKS_ALL); + BIF_RET(am_true); +} + +BIF_RETTYPE hipe_bifs_show_nstack_1(BIF_ALIST_1) +{ + Process *rp = erts_pid2proc(BIF_P, ERTS_PROC_LOCK_MAIN, + BIF_ARG_1, ERTS_PROC_LOCKS_ALL); + if (!rp) + BIF_ERROR(BIF_P, BADARG); + hipe_print_nstack(rp); + erts_smp_proc_unlock(rp, ERTS_PROC_LOCKS_ALL); + BIF_RET(am_true); +} + +BIF_RETTYPE hipe_bifs_nstack_used_size_0(BIF_ALIST_0) +{ + BIF_RET(make_small(hipe_nstack_used(BIF_P))); +} + +BIF_RETTYPE hipe_bifs_show_pcb_1(BIF_ALIST_1) +{ + Process *rp = erts_pid2proc(BIF_P, ERTS_PROC_LOCK_MAIN, + BIF_ARG_1, ERTS_PROC_LOCKS_ALL); + if (!rp) + BIF_ERROR(BIF_P, BADARG); + hipe_print_pcb(rp); + erts_smp_proc_unlock(rp, ERTS_PROC_LOCKS_ALL); + BIF_RET(am_true); +} + +BIF_RETTYPE hipe_bifs_show_term_1(BIF_ALIST_1) +{ + Eterm obj = BIF_ARG_1; + + printf("0x%0*lx\r\n", 2*(int)sizeof(long), obj); + do { + Eterm *objp; + int i, ary; + + if (is_list(obj)) { + objp = list_val(obj); + ary = 2; + } else if (is_boxed(obj)) { + Eterm header; + + objp = boxed_val(obj); + header = objp[0]; + if (is_thing(header)) + ary = thing_arityval(header); + else if (is_arity_value(header)) + ary = arityval(header); + else { + printf("bad header %#lx\r\n", header); + break; + } + ary += 1; + } else + break; + for (i = 0; i < ary; ++i) + printf("0x%0*lx: 0x%0*lx\r\n", + 2*(int)sizeof(long), (unsigned long)&objp[i], + 2*(int)sizeof(long), objp[i]); + } while (0); + erts_printf("%T", obj); + printf("\r\n"); + BIF_RET(am_true); +} + +BIF_RETTYPE hipe_bifs_show_literals_0(BIF_ALIST_0) +{ + Eterm *p; + + p = hipe_constants_start; + for (; p < hipe_constants_next; ++p) + printf("0x%0*lx: 0x%0*lx\r\n", + 2*(int)sizeof(long), (unsigned long)p, + 2*(int)sizeof(long), *p); + BIF_RET(am_true); +} + +BIF_RETTYPE hipe_bifs_in_native_0(BIF_ALIST_0) +{ + BIF_RET(am_false); +} + +BIF_RETTYPE hipe_bifs_modeswitch_debug_on_0(BIF_ALIST_0) +{ + hipe_modeswitch_debug = 1; + BIF_RET(am_true); +} + +BIF_RETTYPE hipe_bifs_modeswitch_debug_off_0(BIF_ALIST_0) +{ + hipe_modeswitch_debug = 0; + BIF_RET(am_true); +} + +/* BIFs for handling the message area */ + +BIF_RETTYPE hipe_bifs_show_message_area_0(BIF_ALIST_0) +{ +#ifdef HYBRID +#ifdef DEBUG + print_message_area(); +#else + printf("Only available in debug compiled emulator\r\n"); +#endif + BIF_RET(am_true); +#else + BIF_RET(am_false); +#endif +} diff --git a/erts/emulator/hipe/hipe_bif2.tab b/erts/emulator/hipe/hipe_bif2.tab new file mode 100644 index 0000000000..d8d627e370 --- /dev/null +++ b/erts/emulator/hipe/hipe_bif2.tab @@ -0,0 +1,33 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 2001-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# +# HiPE level 2 bifs: miscellaneous add-ons +# +# bif hipe_bifs:name/arity + +bif hipe_bifs:show_estack/1 +bif hipe_bifs:show_heap/1 +bif hipe_bifs:show_nstack/1 +bif hipe_bifs:nstack_used_size/0 +bif hipe_bifs:show_pcb/1 +bif hipe_bifs:show_term/1 +bif hipe_bifs:show_literals/0 +bif hipe_bifs:in_native/0 +bif hipe_bifs:modeswitch_debug_on/0 +bif hipe_bifs:modeswitch_debug_off/0 +bif hipe_bifs:show_message_area/0 diff --git a/erts/emulator/hipe/hipe_bif_list.m4 b/erts/emulator/hipe/hipe_bif_list.m4 new file mode 100644 index 0000000000..c92d94ed9d --- /dev/null +++ b/erts/emulator/hipe/hipe_bif_list.m4 @@ -0,0 +1,280 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2004-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + * + * List all non architecture-specific BIFs and primops, and + * classify each as belonging to one of the classes below. + * This list is included in hipe_${ARCH}_bifs.m4, which is + * responsible for translating these classifications to the + * best possible native code wrappers. + * + * XXX: We should have a more detailed BIF classification + * with a number of orthogonal properties (e.g., UPDATES_HP, + * NEEDS_NSP, CAN_FAIL, CAN_GC, etc), from which we should + * generate appropriate interfaces. + * + * The classification is expressed in terms of the resources + * and BIF failure modes described below. + * + * Resources: + * - NSP: native stack pointer + * NSP is read by GC BIFs and primops, and hipe_handle_exception(). + * NSP is updated at compiler-inserted calls to hipe_inc_nstack(). + * No other BIF or primop may access NSP. + * - NSP_LIMIT: native stack limit + * NSP_LIMIT is only updated at compiler-inserted calls to inc_stack. + * Everywhere else, the cached value equals the value stored in P. + * - NRA: native return address + * NRA is read by GC BIFs and primops, and hipe_handle_exception(). + * No BIF or primop may update NRA. + * - HP: heap pointer + * All BIFs can read and update HP. + * Primops with access to P that do not access HP are called "nocons". + * - HP_LIMIT: heap limit + * HP_LIMIT is only updated by GC BIFs and primops. + * Everywhere else, the cached value equals the value stored in P. + * - FCALLS: reduction counter + * All BIFs can read and update FCALLS (because BEAM abuses FCALLS + * to trigger GCs). XXX: can we avoid that overhead? + * All nocons primops do not access FCALLS. + * All other primops with access to P can read and update FCALLS. + * - P: pointer to the state record for the process + * + * BIF failure modes: + * - none: may not signal any exception + * The BIF wrapper needs no checks before returning. + * - standard: may signal any exception + * The BIF wrapper must check for an exception before returning. + * Zero-arity BIFs signal no exceptions, except in a small number + * of cases explicitly enumerated here. + */ + +/**************************************************************** + * BIF CLASS DESCRIPTIONS * + ****************************************************************/ + +/* + * standard_bif_interface_1(nbif_name, cbif_name) + * standard_bif_interface_2(nbif_name, cbif_name) + * standard_bif_interface_3(nbif_name, cbif_name) + * + * A BIF with implicit P parameter, 1-3 ordinary parameters, + * which may fail. + * HP and FCALLS may be read and updated. + * HP_LIMIT, NSP, NSP_LIMIT, and NRA may not be accessed. + */ + +/* + * fail_bif_interface_0(nbif_name, cbif_name) + * + * A zero-arity BIF which may fail, otherwise + * identical to standard_bif_interface_N. + */ + +/* + * nofail_primop_interface_0(nbif_name, cbif_name) + * nofail_primop_interface_1(nbif_name, cbif_name) + * nofail_primop_interface_2(nbif_name, cbif_name) + * nofail_primop_interface_3(nbif_name, cbif_name) + * + * A primop or guard BIF with no failure mode, otherwise + * identical to standard_bif_interface_N. + */ + +/* + * gc_bif_interface_0(nbif_name, cbif_name) + * gc_bif_interface_1(nbif_name, cbif_name) + * gc_bif_interface_2(nbif_name, cbif_name) + * + * A BIF which may do a GC or walk the native stack. + * May read NSP, NSP_LIMIT, NRA, HP, HP_LIMIT, and FCALLS. + * May update HP, HP_LIMIT, and FCALLS. + * May not update NSP, NSP_LIMIT, or NRA. + * Otherwise identical to standard_bif_interface_N. + */ + +/* + * gc_nofail_primop_interface_1(nbif_name, cbif_name) + * + * A primop with implicit P parameter, 1 ordinary parameter, + * and no failure mode. + * May read NSP, NSP_LIMIT, NRA, HP, HP_LIMIT, and FCALLS. + * May update HP, HP_LIMIT, and FCALLS. + * May not update NSP, NSP_LIMIT, or NRA. + */ + +/* + * nocons_nofail_primop_interface_0(nbif_name, cbif_name) + * nocons_nofail_primop_interface_1(nbif_name, cbif_name) + * nocons_nofail_primop_interface_2(nbif_name, cbif_name) + * nocons_nofail_primop_interface_3(nbif_name, cbif_name) + * nocons_nofail_primop_interface_5(nbif_name, cbif_name) + * + * A primop with implicit P parameter, 0-3 or 5 ordinary parameters, + * and no failure mode. + * HP, HP_LIMIT, FCALLS, NSP, NSP_LIMIT, and NRA may not be accessed. + */ + +/* + * noproc_primop_interface_0(nbif_name, cbif_name) + * noproc_primop_interface_1(nbif_name, cbif_name) + * noproc_primop_interface_2(nbif_name, cbif_name) + * noproc_primop_interface_3(nbif_name, cbif_name) + * noproc_primop_interface_5(nbif_name, cbif_name) + * + * A primop with no P parameter, 0-3 or 5 ordinary parameters, + * and no failure mode. + * HP, HP_LIMIT, FCALLS, NSP, NSP_LIMIT, and NRA may not be accessed. + */ + +/**************************************************************** + * BIF CLASSIFICATION * + ****************************************************************/ + +/* + * Zero-arity BIFs that can fail. + */ +fail_bif_interface_0(nbif_memory_0, memory_0) +fail_bif_interface_0(nbif_processes_0, processes_0) + +/* + * BIFs and primops that may do a GC (change heap limit and walk the native stack). + * XXX: erase/1 and put/2 cannot fail + */ +gc_bif_interface_2(nbif_check_process_code_2, hipe_check_process_code_2) +gc_bif_interface_1(nbif_erase_1, erase_1) +gc_bif_interface_0(nbif_garbage_collect_0, garbage_collect_0) +gc_bif_interface_1(nbif_garbage_collect_1, hipe_garbage_collect_1) +gc_nofail_primop_interface_1(nbif_gc_1, hipe_gc) +gc_bif_interface_2(nbif_put_2, put_2) + +/* + * Debug BIFs that need read access to the full state. + * hipe_bifs:nstack_used_size/0 only needs read access to NSP. + * They are classified as GC BIFs for simplicity. + */ +gc_bif_interface_1(nbif_hipe_bifs_show_nstack_1, hipe_show_nstack_1) +gc_bif_interface_1(nbif_hipe_bifs_show_pcb_1, hipe_bifs_show_pcb_1) +gc_bif_interface_0(nbif_hipe_bifs_nstack_used_size_0, hipe_bifs_nstack_used_size_0) + +/* + * Arithmetic operators called indirectly by the HiPE compiler. + */ +standard_bif_interface_2(nbif_add_2, erts_mixed_plus) +standard_bif_interface_2(nbif_sub_2, erts_mixed_minus) +standard_bif_interface_2(nbif_mul_2, erts_mixed_times) +standard_bif_interface_2(nbif_div_2, erts_mixed_div) +standard_bif_interface_2(nbif_intdiv_2, intdiv_2) +standard_bif_interface_2(nbif_rem_2, rem_2) +standard_bif_interface_2(nbif_bsl_2, bsl_2) +standard_bif_interface_2(nbif_bsr_2, bsr_2) +standard_bif_interface_2(nbif_band_2, band_2) +standard_bif_interface_2(nbif_bor_2, bor_2) +standard_bif_interface_2(nbif_bxor_2, bxor_2) +standard_bif_interface_1(nbif_bnot_1, bnot_1) + +/* + * Miscellaneous primops. + */ +standard_bif_interface_1(nbif_set_timeout, hipe_set_timeout) +standard_bif_interface_1(nbif_conv_big_to_float, hipe_conv_big_to_float) +standard_bif_interface_2(nbif_rethrow, hipe_rethrow) +standard_bif_interface_3(nbif_find_na_or_make_stub, hipe_find_na_or_make_stub) +standard_bif_interface_2(nbif_nonclosure_address, hipe_nonclosure_address) +nocons_nofail_primop_interface_0(nbif_fclearerror_error, hipe_fclearerror_error) + +/* + * Mbox primops with implicit P parameter. + */ +nocons_nofail_primop_interface_0(nbif_select_msg, hipe_select_msg) + +/* + * Primops without any P parameter. + * These cannot CONS or gc. + */ +noproc_primop_interface_2(nbif_cmp_2, cmp) +noproc_primop_interface_2(nbif_eq_2, eq) + +/* + * Bit-syntax primops with implicit P parameter. + * XXX: all of the _2 versions cons on the ordinary heap + * XXX: all of them can cons and thus update FCALLS + */ +nofail_primop_interface_3(nbif_bs_get_integer_2, erts_bs_get_integer_2) +nofail_primop_interface_3(nbif_bs_get_binary_2, erts_bs_get_binary_2) +nofail_primop_interface_3(nbif_bs_get_float_2, erts_bs_get_float_2) +standard_bif_interface_3(nbif_bs_put_utf8, hipe_bs_put_utf8) +standard_bif_interface_3(nbif_bs_put_utf16be, hipe_bs_put_utf16be) +standard_bif_interface_3(nbif_bs_put_utf16le, hipe_bs_put_utf16le) +standard_bif_interface_1(nbif_bs_validate_unicode, hipe_bs_validate_unicode) + +/* + * Bit-syntax primops without any P parameter. + * These cannot CONS or gc. + */ +noproc_primop_interface_1(nbif_bs_allocate, hipe_bs_allocate) +noproc_primop_interface_2(nbif_bs_reallocate, hipe_bs_reallocate) +noproc_primop_interface_1(nbif_bs_utf8_size, hipe_bs_utf8_size) +noproc_primop_interface_1(nbif_bs_get_utf8, erts_bs_get_utf8) +noproc_primop_interface_1(nbif_bs_utf16_size, hipe_bs_utf16_size) +noproc_primop_interface_2(nbif_bs_get_utf16, erts_bs_get_utf16) +noproc_primop_interface_2(nbif_bs_validate_unicode_retract, hipe_bs_validate_unicode_retract) + +/* + * Bit-syntax primops. The ERTS_SMP runtime system requires P, + * hence the use of nocons_nofail_primop_interface_N(). + * When ERTS_SMP is disabled, noproc_primop_interface_N() + * should be used instead. + */ +nocons_nofail_primop_interface_5(nbif_bs_put_small_float, hipe_bs_put_small_float) +noproc_primop_interface_5(nbif_bs_put_bits, hipe_bs_put_bits) +ifelse(ERTS_SMP,1,` +nocons_nofail_primop_interface_5(nbif_bs_put_big_integer, hipe_bs_put_big_integer) +',` +noproc_primop_interface_5(nbif_bs_put_big_integer, hipe_bs_put_big_integer) +')dnl + +gc_bif_interface_0(nbif_check_get_msg, hipe_check_get_msg) + +/* + * SMP-specific stuff + */ +ifelse(ERTS_SMP,1,` +nocons_nofail_primop_interface_0(nbif_clear_timeout, hipe_clear_timeout) +noproc_primop_interface_1(nbif_atomic_inc, hipe_atomic_inc) +',)dnl + +/* + * Implement standard_bif_interface_0 as nofail_primop_interface_0. + */ +define(standard_bif_interface_0,`nofail_primop_interface_0($1, $2)') + +/* + * Standard BIFs. + * BIF_LIST(ModuleAtom,FunctionAtom,Arity,CFun,Index) + */ +define(BIF_LIST,`standard_bif_interface_$3(nbif_$4, $4)') +include(TARGET/`erl_bif_list.h') + +/* + * Guard BIFs. + * GBIF_LIST(FunctionAtom,Arity,CFun) + */ +define(GBIF_LIST,`nofail_primop_interface_$2(gbif_$3, $3)') +include(`hipe/hipe_gbif_list.h') diff --git a/erts/emulator/hipe/hipe_debug.c b/erts/emulator/hipe/hipe_debug.c new file mode 100644 index 0000000000..548998b7b7 --- /dev/null +++ b/erts/emulator/hipe/hipe_debug.c @@ -0,0 +1,242 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2001-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + * hipe_debug.c + * + * TODO: + * - detect mode-switch native return addresses (ARCH-specific) + * - map user-code native return addresses to symbolic names + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#include <stddef.h> /* offsetof() */ +#include <stdio.h> +#include "sys.h" +#include "erl_vm.h" +#include "global.h" +#include "erl_process.h" +#include "beam_catches.h" +#include "beam_load.h" +#include "hipe_mode_switch.h" +#include "hipe_debug.h" + +static const char dashes[2*sizeof(long)+5] = { + [0 ... 2*sizeof(long)+3] = '-' +}; + +static const char dots[2*sizeof(long)+5] = { + [0 ... 2*sizeof(long)+3] = '.' +}; + +static const char stars[2*sizeof(long)+5] = { + [0 ... 2*sizeof(long)+3] = '*' +}; + +extern Uint beam_apply[]; + +static void print_beam_pc(Uint *pc) +{ + if (pc == hipe_beam_pc_return) { + printf("return-to-native"); + } else if (pc == hipe_beam_pc_throw) { + printf("throw-to-native"); + } else if (pc == &beam_apply[1]) { + printf("normal-process-exit"); + } else { + Eterm *mfa = find_function_from_pc(pc); + if (mfa) + erts_printf("%T:%T/%bpu + 0x%bpx", + mfa[0], mfa[1], mfa[2], pc - &mfa[3]); + else + printf("?"); + } +} + +static void catch_slot(Eterm *pos, Eterm val) +{ + Uint *pc = catch_pc(val); + printf(" | 0x%0*lx | 0x%0*lx | CATCH 0x%0*lx (BEAM ", + 2*(int)sizeof(long), (unsigned long)pos, + 2*(int)sizeof(long), (unsigned long)val, + 2*(int)sizeof(long), (unsigned long)pc); + print_beam_pc(pc); + printf(")\r\n"); +} + +static void print_beam_cp(Eterm *pos, Eterm val) +{ + printf(" |%s|%s| BEAM ACTIVATION RECORD\r\n", dashes, dashes); + printf(" | 0x%0*lx | 0x%0*lx | BEAM PC ", + 2*(int)sizeof(long), (unsigned long)pos, + 2*(int)sizeof(long), (unsigned long)val); + print_beam_pc(cp_val(val)); + printf("\r\n"); +} + +static void print_catch(Eterm *pos, Eterm val) +{ + printf(" |%s|%s| BEAM CATCH FRAME\r\n", dots, dots); + catch_slot(pos, val); + printf(" |%s|%s|\r\n", stars, stars); +} + +static void print_stack(Eterm *sp, Eterm *end) +{ + printf(" | %*s | %*s |\r\n", + 2+2*(int)sizeof(long), "Address", + 2+2*(int)sizeof(long), "Contents"); + while (sp < end) { + Eterm val = sp[0]; + if (is_CP(val)) + print_beam_cp(sp, val); + else if (is_catch(val)) + print_catch(sp, val); + else { + printf(" | 0x%0*lx | 0x%0*lx | ", + 2*(int)sizeof(long), (unsigned long)sp, + 2*(int)sizeof(long), (unsigned long)val); + erts_printf("%.30T", val); + printf("\r\n"); + } + sp += 1; + } + printf(" |%s|%s|\r\n", dashes, dashes); +} + +void hipe_print_estack(Process *p) +{ + printf(" | BEAM STACK |\r\n"); + print_stack(p->stop, STACK_START(p)); +} + +static void print_heap(Eterm *pos, Eterm *end) +{ + printf("From: 0x%0*lx to 0x%0*lx\n\r", + 2*(int)sizeof(long), (unsigned long)pos, + 2*(int)sizeof(long), (unsigned long)end); + printf(" | H E A P |\r\n"); + printf(" | %*s | %*s |\r\n", + 2+2*(int)sizeof(long), "Address", + 2+2*(int)sizeof(long), "Contents"); + printf(" |%s|%s|\r\n", dashes, dashes); + while (pos < end) { + Eterm val = pos[0]; + printf(" | 0x%0*lx | 0x%0*lx | ", + 2*(int)sizeof(long), (unsigned long)pos, + 2*(int)sizeof(long), (unsigned long)val); + ++pos; + if (is_arity_value(val)) + printf("Arity(%lu)", arityval(val)); + else if (is_thing(val)) { + unsigned int ari = thing_arityval(val); + printf("Thing Arity(%u) Tag(%lu)", ari, thing_subtag(val)); + while (ari) { + printf("\r\n | 0x%0*lx | 0x%0*lx | THING", + 2*(int)sizeof(long), (unsigned long)pos, + 2*(int)sizeof(long), (unsigned long)*pos); + ++pos; + --ari; + } + } else + erts_printf("%.30T", val); + printf("\r\n"); + } + printf(" |%s|%s|\r\n", dashes, dashes); +} + +void hipe_print_heap(Process *p) +{ + print_heap(p->heap, p->htop); +} + +void hipe_print_pcb(Process *p) +{ + printf("P: 0x%0*lx\r\n", 2*(int)sizeof(long), (unsigned long)p); + printf("-----------------------------------------------\r\n"); + printf("Offset| Name | Value | *Value |\r\n"); +#define U(n,x) \ + printf(" % 4d | %s | 0x%0*lx | |\r\n", (int)offsetof(Process,x), n, 2*(int)sizeof(long), (unsigned long)p->x) +#define P(n,x) \ + printf(" % 4d | %s | 0x%0*lx | 0x%0*lx |\r\n", (int)offsetof(Process,x), n, 2*(int)sizeof(long), (unsigned long)p->x, 2*(int)sizeof(long), p->x ? (unsigned long)*(p->x) : -1UL) + + U("htop ", htop); + U("hend ", hend); + U("heap ", heap); + U("heap_sz ", heap_sz); + U("stop ", stop); + U("gen_gcs ", gen_gcs); + U("max_gen_gcs", max_gen_gcs); + U("high_water ", high_water); + U("old_hend ", old_hend); + U("old_htop ", old_htop); + U("old_head ", old_heap); + U("min_heap_..", min_heap_size); + U("status ", status); + U("rstatus ", rstatus); + U("rcount ", rcount); + U("id ", id); + U("prio ", prio); + U("reds ", reds); + U("tracer_pr..", tracer_proc); + U("trace_fla..", trace_flags); + U("group_lea..", group_leader); + U("flags ", flags); + U("fvalue ", fvalue); + U("freason ", freason); + U("fcalls ", fcalls); + /*XXX: ErlTimer tm; */ + U("next ", next); + /*XXX: ErlOffHeap off_heap; */ + U("reg ", reg); + U("nlinks ", nlinks); + /*XXX: ErlMessageQueue msg; */ + U("mbuf ", mbuf); + U("mbuf_sz ", mbuf_sz); + U("dictionary ", dictionary); + U("seq..clock ", seq_trace_clock); + U("seq..astcnt", seq_trace_lastcnt); + U("seq..token ", seq_trace_token); + U("intial[0] ", initial[0]); + U("intial[1] ", initial[1]); + U("intial[2] ", initial[2]); + P("current ", current); + P("cp ", cp); + P("i ", i); + U("catches ", catches); + U("arity ", arity); + P("arg_reg ", arg_reg); + U("max_arg_reg", max_arg_reg); + U("def..reg[0]", def_arg_reg[0]); + U("def..reg[1]", def_arg_reg[1]); + U("def..reg[2]", def_arg_reg[2]); + U("def..reg[3]", def_arg_reg[3]); + U("def..reg[4]", def_arg_reg[4]); + U("def..reg[5]", def_arg_reg[5]); +#ifdef HIPE + U("nsp ", hipe.nsp); + U("nstack ", hipe.nstack); + U("nstend ", hipe.nstend); + U("ncallee ", hipe.ncallee); + hipe_arch_print_pcb(&p->hipe); +#endif /* HIPE */ +#undef U +#undef P + printf("-----------------------------------------------\r\n"); +} diff --git a/erts/emulator/hipe/hipe_debug.h b/erts/emulator/hipe/hipe_debug.h new file mode 100644 index 0000000000..3980bc8230 --- /dev/null +++ b/erts/emulator/hipe/hipe_debug.h @@ -0,0 +1,29 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2001-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + * hipe_debug.h + */ +#ifndef HIPE_DEBUG_H +#define HIPE_DEBUG_H + +extern void hipe_print_estack(Process *p); +extern void hipe_print_heap(Process *p); +extern void hipe_print_pcb(Process *p); + +#endif /* HIPE_DEBUG_H */ diff --git a/erts/emulator/hipe/hipe_gbif_list.h b/erts/emulator/hipe/hipe_gbif_list.h new file mode 100644 index 0000000000..659f74b5e5 --- /dev/null +++ b/erts/emulator/hipe/hipe_gbif_list.h @@ -0,0 +1,23 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2001-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + * GBIF_LIST(FunctionAtom,Arity,CFun) + * manually maintained for now -- expand when necessary + */ +GBIF_LIST(am_node,1,node_1) diff --git a/erts/emulator/hipe/hipe_gc.c b/erts/emulator/hipe/hipe_gc.c new file mode 100644 index 0000000000..e57e293547 --- /dev/null +++ b/erts/emulator/hipe/hipe_gc.c @@ -0,0 +1,556 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2004-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + * GC support procedures + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#include "global.h" + +#include "erl_gc.h" + +#include "hipe_stack.h" +#include "hipe_gc.h" +#include "hipe_bif0.h" /* for hipe_constants_{start,next} */ + +Eterm *fullsweep_nstack(Process *p, Eterm *n_htop) +{ + /* known nstack walk state */ + Eterm *nsp; + Eterm *nsp_end; + const struct sdesc *sdesc; + unsigned int sdesc_size; + unsigned long ra; + unsigned int i; + unsigned int mask; + /* arch-specific nstack walk state */ + struct nstack_walk_state walk_state; + + /* fullsweep-specific state */ + char *src, *oh; + Uint src_size, oh_size; + + if (!nstack_walk_init_check(p)) + return n_htop; + + nsp = nstack_walk_nsp_begin(p); + nsp_end = p->hipe.nstgraylim; + if (nsp_end) + nstack_walk_kill_trap(p, nsp_end); + nsp_end = nstack_walk_nsp_end(p); + + sdesc = nstack_walk_init_sdesc(p, &walk_state); + + src = (char*)HEAP_START(p); + src_size = (char*)HEAP_TOP(p) - src; + oh = (char*)OLD_HEAP(p); + oh_size = (char*)OLD_HTOP(p) - oh; + + for (;;) { + if (nstack_walk_nsp_reached_end(nsp, nsp_end)) { + if (nsp == nsp_end) { + if (nsp) { + /* see the HIGH_WATER update in fullsweep_heap() */ + p->hipe.nstblacklim = nsp; /* nsp == nsp_end */ + nstack_walk_update_trap(p, walk_state.sdesc0); + } + return n_htop; + } + fprintf(stderr, "%s: passed end of stack\r\n", __FUNCTION__); + break; + } + sdesc_size = nstack_walk_frame_size(sdesc); + i = 0; + mask = sdesc->livebits[0]; + for (;;) { + if (mask & 1) { + Eterm *nsp_i = nstack_walk_frame_index(nsp, i); + Eterm gval = *nsp_i; + if (is_boxed(gval)) { + Eterm *ptr = boxed_val(gval); + Eterm val = *ptr; + if (IS_MOVED(val)) { + ASSERT(is_boxed(val)); + *nsp_i = val; + } else if (in_area(ptr, src, src_size) || + in_area(ptr, oh, oh_size)) { + MOVE_BOXED(ptr, val, n_htop, nsp_i); + } + } else if (is_list(gval)) { + Eterm *ptr = list_val(gval); + Eterm val = *ptr; + if (is_non_value(val)) { + *nsp_i = ptr[1]; + } else if (in_area(ptr, src, src_size) || + in_area(ptr, oh, oh_size)) { + ASSERT(within(ptr, p)); + MOVE_CONS(ptr, val, n_htop, nsp_i); + } + } + } + if (++i >= sdesc_size) + break; + if (i & 31) + mask >>= 1; + else + mask = sdesc->livebits[i >> 5]; + } + ra = nstack_walk_frame_ra(nsp, sdesc); + sdesc = hipe_find_sdesc(ra); + nsp = nstack_walk_next_frame(nsp, sdesc_size); + } + abort(); +} + +void gensweep_nstack(Process *p, Eterm **ptr_old_htop, Eterm **ptr_n_htop) +{ + /* known nstack walk state */ + Eterm *nsp; + Eterm *nsp_end; + const struct sdesc *sdesc; + unsigned int sdesc_size; + unsigned long ra; + unsigned int i; + unsigned int mask; + /* arch-specific nstack walk state */ + struct nstack_walk_state walk_state; + + /* gensweep-specific state */ + Eterm *old_htop, *n_htop; + char *heap; + Uint heap_size, mature_size; + + if (!nstack_walk_init_check(p)) + return; + + nsp = nstack_walk_nsp_begin(p); + nsp_end = p->hipe.nstgraylim; + if (nsp_end) { + /* if gray limit passed black limit, reset black limit */ + if (nstack_walk_gray_passed_black(nsp_end, p->hipe.nstblacklim)) + p->hipe.nstblacklim = nsp_end; + nstack_walk_kill_trap(p, nsp_end); + nsp_end = p->hipe.nstblacklim; + } else + nsp_end = nstack_walk_nsp_end(p); + + sdesc = nstack_walk_init_sdesc(p, &walk_state); + + old_htop = *ptr_old_htop; + n_htop = *ptr_n_htop; + heap = (char*)HEAP_START(p); + heap_size = (char*)HEAP_TOP(p) - heap; + mature_size = (char*)HIGH_WATER(p) - heap; + + for (;;) { + if (nstack_walk_nsp_reached_end(nsp, nsp_end)) { + if (nsp == nsp_end) { + *ptr_old_htop = old_htop; + *ptr_n_htop = n_htop; + if (nsp) { + /* see the HIGH_WATER update in gen_gc() */ + if (HEAP_START(p) != HIGH_WATER(p)) { + p->hipe.nstblacklim = + p->hipe.nstgraylim + ? p->hipe.nstgraylim + : nsp; /* nsp == nsp_end */ + } else { + /* blacklim = graylim ? blacklim : end */ + if (!p->hipe.nstgraylim) + p->hipe.nstblacklim = nsp; /* nsp == nsp_end */ + } + nstack_walk_update_trap(p, walk_state.sdesc0); + } + return; + } + fprintf(stderr, "%s: passed end of stack\r\n", __FUNCTION__); + break; + } + sdesc_size = nstack_walk_frame_size(sdesc); + i = 0; + mask = sdesc->livebits[0]; + for (;;) { + if (mask & 1) { + Eterm *nsp_i = nstack_walk_frame_index(nsp, i); + Eterm gval = *nsp_i; + if (is_boxed(gval)) { + Eterm *ptr = boxed_val(gval); + Eterm val = *ptr; + if (IS_MOVED(val)) { + ASSERT(is_boxed(val)); + *nsp_i = val; + } else if (in_area(ptr, heap, mature_size)) { + MOVE_BOXED(ptr, val, old_htop, nsp_i); + } else if (in_area(ptr, heap, heap_size)) { + ASSERT(within(ptr, p)); + MOVE_BOXED(ptr, val, n_htop, nsp_i); + } + } else if (is_list(gval)) { + Eterm *ptr = list_val(gval); + Eterm val = *ptr; + if (is_non_value(val)) { + *nsp_i = ptr[1]; + } else if (in_area(ptr, heap, mature_size)) { + MOVE_CONS(ptr, val, old_htop, nsp_i); + } else if (in_area(ptr, heap, heap_size)) { + ASSERT(within(ptr, p)); + MOVE_CONS(ptr, val, n_htop, nsp_i); + } + } + } + if (++i >= sdesc_size) + break; + if (i & 31) + mask >>= 1; + else + mask = sdesc->livebits[i >> 5]; + } + ra = nstack_walk_frame_ra(nsp, sdesc); + sdesc = hipe_find_sdesc(ra); + nsp = nstack_walk_next_frame(nsp, sdesc_size); + } + abort(); +} + +#ifdef HYBRID + +#ifdef INCREMENTAL +Eterm *ma_fullsweep_nstack(Process *p, Eterm *n_htop, Eterm *n_hend) +{ + /* known nstack walk state */ + Eterm *nsp; + Eterm *nsp_end; + const struct sdesc *sdesc; + unsigned int sdesc_size; + unsigned long ra; + unsigned int i; + unsigned int mask; + /* arch-specific nstack walk state */ + struct nstack_walk_state walk_state; + + if (!nstack_walk_init_check(p)) + return n_htop; + + nsp = nstack_walk_nsp_begin(p); + nsp_end = nstack_walk_nsp_end(p); + + sdesc = nstack_walk_init_sdesc(p, &walk_state); + + for (;;) { + if (nstack_walk_nsp_reached_end(nsp, nsp_end)) { + if (nsp == nsp_end) + return n_htop; + fprintf(stderr, "%s: passed end of stack\r\n", __FUNCTION__); + break; + } + sdesc_size = nstack_walk_frame_size(sdesc); + i = 0; + mask = sdesc->livebits[0]; + for (;;) { + if (mask & 1) { + Eterm *nsp_i = nstack_walk_frame_index(nsp, i); + Eterm val = *nsp_i; + Eterm *obj_ptr = ptr_val(val); + switch (primary_tag(val)) { + case TAG_PRIMARY_LIST: + COPYMARK_CONS(obj_ptr, n_htop, nsp_i, n_hend); + break; + case TAG_PRIMARY_BOXED: + COPYMARK_BOXED(obj_ptr, n_htop, nsp_i, n_hend); + break; + default: + break; + } + } + if (++i >= sdesc_size) + break; + if (i & 31) + mask >>= 1; + else + mask = sdesc->livebits[i >> 5]; + } + ra = nstack_walk_frame_ra(nsp, sdesc); + if (ra == (unsigned long)nbif_stack_trap_ra) + ra = (unsigned long)p->hipe.ngra; + sdesc = hipe_find_sdesc(ra); + nsp = nstack_walk_next_frame(nsp, sdesc_size); + } + abort(); +} + +void ma_gensweep_nstack(Process *p, Eterm **ptr_old_htop, Eterm **ptr_n_htop) +{ + /* known nstack walk state */ + Eterm *nsp; + Eterm *nsp_end; + const struct sdesc *sdesc; + unsigned int sdesc_size; + unsigned long ra; + unsigned int i; + unsigned int mask; + /* arch-specific nstack walk state */ + struct nstack_walk_state walk_state; + + /* ma_gensweep-specific state */ + Eterm *low_water, *high_water, *surface; + Eterm *n_htop; + Eterm *old_htop; + + if (!nstack_walk_init_check(p)) + return; + + nsp = nstack_walk_nsp_begin(p); + nsp_end = nstack_walk_nsp_end(p); + + low_water = global_heap; + //high_water = global_high_water; + surface = global_htop; + + old_htop = *ptr_old_htop; + n_htop = *ptr_n_htop; + + sdesc = nstack_walk_init_sdesc(p, &walk_state); + + for (;;) { + if (nstack_walk_nsp_reached_end(nsp, nsp_end)) { + if (nsp == nsp_end) { + *ptr_old_htop = old_htop; + *ptr_n_htop = n_htop; + return; + } + fprintf(stderr, "%s: passed end of stack\r\n", __FUNCTION__); + break; + } + sdesc_size = nstack_walk_frame_size(sdesc); + i = 0; + mask = sdesc->livebits[0]; + for (;;) { + if (mask & 1) { + Eterm *nsp_i = nstack_walk_frame_index(nsp, i); + Eterm gval = *nsp_i; + if (is_boxed(gval)) { + Eterm *ptr = boxed_val(gval); + Eterm val = *ptr; + if (MY_IS_MOVED(val)) { + *nsp_i = val; + } else if (ptr_within(ptr, low_water, high_water)) { + MOVE_BOXED(ptr, val, old_htop, nsp_i); + } else if (ptr_within(ptr, high_water, surface)) { + MOVE_BOXED(ptr, val, n_htop, nsp_i); + } + } else if (is_list(gval)) { + Eterm *ptr = list_val(gval); + Eterm val = *ptr; + if (is_non_value(val)) { + *nsp_i = ptr[1]; + } else if (ptr_within(ptr, low_water, high_water)) { + MOVE_CONS(ptr, val, old_htop, nsp_i); + } else if (ptr_within(ptr, high_water, surface)) { + MOVE_CONS(ptr, val, n_htop, nsp_i); + } + } + } + if (++i >= sdesc_size) + break; + if (i & 31) + mask >>= 1; + else + mask = sdesc->livebits[i >> 5]; + } + ra = nstack_walk_frame_ra(nsp, sdesc); + if (ra == (unsigned long)nbif_stack_trap_ra) + ra = (unsigned long)p->hipe.ngra; + sdesc = hipe_find_sdesc(ra); + nsp = nstack_walk_next_frame(nsp, sdesc_size); + } + abort(); +} + +#else /* not INCREMENTAL */ + +Eterm *ma_fullsweep_nstack(Process *p, Eterm *n_htop) +{ + /* known nstack walk state */ + Eterm *nsp; + Eterm *nsp_end; + const struct sdesc *sdesc; + unsigned int sdesc_size; + unsigned long ra; + unsigned int i; + unsigned int mask; + /* arch-specific nstack walk state */ + struct nstack_walk_state walk_state; + + /* ma_fullsweep-specific state */ + Eterm *gheap = global_heap; + Eterm *ghtop = global_htop; + Eterm *goheap = global_old_heap; + Eterm *gohtop = global_old_htop; + + if (!nstack_walk_init_check(p)) + return n_htop; + + nsp = nstack_walk_nsp_begin(p); + nsp_end = nstack_walk_nsp_end(p); + + sdesc = nstack_walk_init_sdesc(p, &walk_state); + + for (;;) { + if (nstack_walk_nsp_reached_end(nsp, nsp_end)) { + if (nsp == nsp_end) + return n_htop; + fprintf(stderr, "%s: passed end of stack\r\n", __FUNCTION__); + break; + } + sdesc_size = nstack_walk_frame_size(sdesc); + i = 0; + mask = sdesc->livebits[0]; + for (;;) { + if (mask & 1) { + Eterm *nsp_i = nstack_walk_frame_index(nsp, i); + Eterm gval = *nsp_i; + if (is_boxed(gval)) { + Eterm *ptr = boxed_val(gval); + Eterm val = *ptr; + if (MY_IS_MOVED(val)) { + *nsp_i = val; + } else if (ptr_within(ptr, gheap, ghtop)) { + MOVE_BOXED(ptr, val, n_htop, nsp_i); + } else if (ptr_within(ptr, goheap, gohtop)) { + MOVE_BOXED(ptr, val, n_htop, nsp_i); + } + } else if (is_list(gval)) { + Eterm *ptr = list_val(gval); + Eterm val = *ptr; + if (is_non_value(val)) { + *nsp_i = ptr[1]; + } else if (ptr_within(ptr, gheap, ghtop)) { + MOVE_CONS(ptr, val, n_htop, nsp_i); + } else if (ptr_within(ptr, gheap, ghtop)) { + MOVE_CONS(ptr, val, n_htop, nsp_i); + } + } + } + if (++i >= sdesc_size) + break; + if (i & 31) + mask >>= 1; + else + mask = sdesc->livebits[i >> 5]; + } + ra = nstack_walk_frame_ra(nsp, sdesc); + if (ra == (unsigned long)nbif_stack_trap_ra) + ra = (unsigned long)p->hipe.ngra; + sdesc = hipe_find_sdesc(ra); + nsp = nstack_walk_next_frame(nsp, sdesc_size); + } + abort(); +} + +void ma_gensweep_nstack(Process *p, Eterm **ptr_old_htop, Eterm **ptr_n_htop) +{ + /* known nstack walk state */ + Eterm *nsp; + Eterm *nsp_end; + const struct sdesc *sdesc; + unsigned int sdesc_size; + unsigned long ra; + unsigned int i; + unsigned int mask; + /* arch-specific nstack walk state */ + struct nstack_walk_state walk_state; + + /* ma_gensweep-specific state */ + Eterm *low_water, *high_water, *surface; + Eterm *n_htop; + Eterm *old_htop; + + if (!nstack_walk_init_check(p)) + return; + + nsp = nstack_walk_nsp_begin(p); + nsp_end = nstack_walk_nsp_end(p); + + low_water = global_heap; + high_water = global_high_water; + surface = global_htop; + + old_htop = *ptr_old_htop; + n_htop = *ptr_n_htop; + + sdesc = nstack_walk_init_sdesc(p, &walk_state); + + for (;;) { + if (nstack_walk_nsp_reached_end(nsp, nsp_end)) { + if (nsp == nsp_end) { + *ptr_old_htop = old_htop; + *ptr_n_htop = n_htop; + return; + } + fprintf(stderr, "%s: passed end of stack\r\n", __FUNCTION__); + break; + } + sdesc_size = nstack_walk_frame_size(sdesc); + i = 0; + mask = sdesc->livebits[0]; + for (;;) { + if (mask & 1) { + Eterm *nsp_i = nstack_walk_frame_index(nsp, i); + Eterm gval = *nsp_i; + if (is_boxed(gval)) { + Eterm *ptr = boxed_val(gval); + Eterm val = *ptr; + if (MY_IS_MOVED(val)) { + *nsp_i = val; + } else if (ptr_within(ptr, low_water, high_water)) { + MOVE_BOXED(ptr, val, old_htop, nsp_i); + } else if (ptr_within(ptr, high_water, surface)) { + MOVE_BOXED(ptr, val, n_htop, nsp_i); + } + } else if (is_list(gval)) { + Eterm *ptr = list_val(gval); + Eterm val = *ptr; + if (is_non_value(val)) { + *nsp_i = ptr[1]; + } else if (ptr_within(ptr, low_water, high_water)) { + MOVE_CONS(ptr, val, old_htop, nsp_i); + } else if (ptr_within(ptr, high_water, surface)) { + MOVE_CONS(ptr, val, n_htop, nsp_i); + } + } + } + if (++i >= sdesc_size) + break; + if (i & 31) + mask >>= 1; + else + mask = sdesc->livebits[i >> 5]; + } + ra = nstack_walk_frame_ra(nsp, sdesc); + if (ra == (unsigned long)nbif_stack_trap_ra) + ra = (unsigned long)p->hipe.ngra; + sdesc = hipe_find_sdesc(ra); + nsp = nstack_walk_next_frame(nsp, sdesc_size); + } + abort(); +} +#endif /* INCREMENTAL */ + +#endif /* HYBRID */ diff --git a/erts/emulator/hipe/hipe_gc.h b/erts/emulator/hipe/hipe_gc.h new file mode 100644 index 0000000000..712d0ffa78 --- /dev/null +++ b/erts/emulator/hipe/hipe_gc.h @@ -0,0 +1,40 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2004-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + */ +#ifndef HIPE_GC_H +#define HIPE_GC_H + +#if defined(__sparc__) +#include "hipe_sparc_gc.h" +#endif +#if defined(__i386__) +#include "hipe_x86_gc.h" +#endif +#if defined(__x86_64__) +#include "hipe_amd64_gc.h" +#endif +#if defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__) +#include "hipe_ppc_gc.h" +#endif +#if defined(__arm__) +#include "hipe_arm_gc.h" +#endif + +#endif /* HIPE_GC_H */ diff --git a/erts/emulator/hipe/hipe_mkliterals.c b/erts/emulator/hipe/hipe_mkliterals.c new file mode 100644 index 0000000000..a77aec7919 --- /dev/null +++ b/erts/emulator/hipe/hipe_mkliterals.c @@ -0,0 +1,631 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2001-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* + * $Id$ + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#include <stdio.h> +#include <stddef.h> +#include <string.h> +#include <errno.h> +#include <math.h> +#include "sys.h" +#include "erl_vm.h" +#include "global.h" +#include "erl_process.h" +#include "error.h" +#include "erl_bits.h" +#include "erl_message.h" +/* this sucks, but the compiler needs data for all platforms */ +#include "hipe_arm_asm.h" +#undef P +#undef NSP +#undef HP +#undef TEMP_LR +#undef SAVE_CACHED_STATE +#undef RESTORE_CACHED_STATE +#undef SAVE_CONTEXT_QUICK +#undef RESTORE_CONTEXT_QUICK +#undef SAVE_CONTEXT_BIF +#undef RESTORE_CONTEXT_BIF +#undef SAVE_CONTEXT_GC +#undef RESTORE_CONTEXT_GC +#undef NR_ARG_REGS +#undef LOAD_ARG_REGS +#undef STORE_ARG_REGS +#undef TEMP_ARG0 +#undef TEMP_ARG1 +#undef TEMP_ARG2 +#undef ARG0 +#undef ARG1 +#undef ARG2 +#undef ARG3 +#undef ARG4 +#undef ARG5 +#include "hipe_ppc_asm.h" +#undef P +#undef NSP +#undef HP +#undef TEMP_LR +#undef SAVE_CACHED_STATE +#undef RESTORE_CACHED_STATE +#undef SAVE_CONTEXT_QUICK +#undef RESTORE_CONTEXT_QUICK +#undef SAVE_CONTEXT_BIF +#undef RESTORE_CONTEXT_BIF +#undef SAVE_CONTEXT_GC +#undef RESTORE_CONTEXT_GC +#undef NR_ARG_REGS +#undef LOAD_ARG_REGS +#undef STORE_ARG_REGS +#undef TEMP_ARG0 +#undef TEMP_ARG1 +#undef TEMP_ARG2 +#undef ARG0 +#undef ARG1 +#undef ARG2 +#undef ARG3 +#undef ARG4 +#undef ARG5 +#include "hipe_amd64_asm.h" +#undef P +#undef HP +#undef NSP +#undef TEMP_ARG0 +#undef TEMP_ARG1 +#undef TEMP_ARG2 +#undef ARG0 +#undef ARG1 +#undef ARG2 +#undef ARG3 +#undef ARG4 +#undef ARG5 +#undef SAVE_HP +#undef RESTORE_HP +#undef SAVE_CSP +#undef RESTORE_CSP +#undef SAVE_CACHED_STATE +#undef RESTORE_CACHED_STATE +#undef SWITCH_C_TO_ERLANG_QUICK +#undef SWITCH_ERLANG_TO_C_QUICK +#undef SWITCH_C_TO_ERLANG +#undef SWITCH_ERLANG_TO_C +#undef NR_ARG_REGS +#undef LEAF_WORDS +#undef TEMP_RV +#undef LOAD_ARG_REGS +#undef STORE_ARG_REGS +#undef NSP_CALL +#undef NSP_RETN +#undef NSP_RET0 +#include "hipe_x86_asm.h" +#undef P +#undef HP +#undef NSP +#undef TEMP0 +#undef TEMP1 +#undef ARG0 +#undef ARG1 +#undef ARG2 +#undef SAVE_HP +#undef RESTORE_HP +#undef SAVE_CSP +#undef RESTORE_CSP +#undef SAVE_CACHED_STATE +#undef RESTORE_CACHED_STATE +#undef SWITCH_C_TO_ERLANG_QUICK +#undef SWITCH_ERLANG_TO_C_QUICK +#undef NR_ARG_REGS +#undef LEAF_WORDS +#undef TEMP_RV +#undef LOAD_ARG_REGS +#undef STORE_ARG_REGS +#include "hipe_sparc_asm.h" +#include "erl_binary.h" + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +#define field_sizeof(STRUCT, FIELD) (sizeof(((STRUCT *)0)->FIELD)) + +static const unsigned int CRCTABLE[256] = { + 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, + 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, + 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, + 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, + 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, + 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, + 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, + 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, + 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, + 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, + 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, + 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, + 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, + 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, + 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, + 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, + 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, + 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, + 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, + 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, + 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, + 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, + 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, + 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, + 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, + 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, + 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, + 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, + 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, + 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, + 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, + 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, + 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, + 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, + 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, + 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, + 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, + 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, + 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, + 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, + 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, + 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, + 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, + 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, + 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, + 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, + 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, + 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, + 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, + 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, + 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, + 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, + 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, + 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, + 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, + 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, + 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, + 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, + 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, + 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, + 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, + 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, + 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, + 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D, +}; + +/* + * The algorithm for calculating the 32 bit CRC checksum is based upon + * documentation and algorithms provided by Dr. Ross N. Williams in the + * document "A Painless Guide to CRC Error Detection Algorithms." + * This document may be downloaded from + * ftp://ftp.rocksoft.com/cliens/rocksoft/papers/crc_v3.txt + * as of 12/15/1998. Dr. Williams has placed this document and algorithms + * in the public domain. + */ +static unsigned int crc_init(void) +{ + return 0xFFFFFFFF; +} + +static unsigned int +crc_update_buf(unsigned int crc_value, + const void *buf, + unsigned int length) +{ + const unsigned char *tab; + + tab = (const unsigned char*)buf; + for (; length > 0; --length) { + unsigned char t = (crc_value >> 24) & 0xFF; + crc_value = (crc_value << 8) | *tab++; + crc_value ^= CRCTABLE[t]; + } + return crc_value; +} + +static unsigned int +crc_update_int(unsigned int crc_value, const unsigned int *p) +{ + return crc_update_buf(crc_value, p, sizeof *p); +} + +/* + * Runtime system parameters. + * Invariant for a given CPU architecture. + * (Would be invariant for 32 bit CPUs if SPARC didn't + * enlarge the def_arg_reg[] array.) + */ +static const struct literal { + const char *name; + unsigned int value; +} literals[] = { + /* Field offsets in a process struct */ + { "P_HP", offsetof(struct process, htop) }, + { "P_HP_LIMIT", offsetof(struct process, stop) }, + { "P_OFF_HEAP_MSO", offsetof(struct process, off_heap.mso) }, + { "P_MBUF", offsetof(struct process, mbuf) }, + { "P_ID", offsetof(struct process, id) }, + { "P_FLAGS", offsetof(struct process, flags) }, + { "P_FVALUE", offsetof(struct process, fvalue) }, + { "P_FREASON", offsetof(struct process, freason) }, + { "P_FTRACE", offsetof(struct process, ftrace) }, + { "P_FCALLS", offsetof(struct process, fcalls) }, + { "P_BEAM_IP", offsetof(struct process, i) }, + { "P_ARITY", offsetof(struct process, arity) }, + { "P_ARG0", offsetof(struct process, def_arg_reg[0]) }, + { "P_ARG1", offsetof(struct process, def_arg_reg[1]) }, + { "P_ARG2", offsetof(struct process, def_arg_reg[2]) }, + { "P_ARG3", offsetof(struct process, def_arg_reg[3]) }, + { "P_ARG4", offsetof(struct process, def_arg_reg[4]) }, + { "P_ARG5", offsetof(struct process, def_arg_reg[5]) }, +#ifdef HIPE + { "P_NSP", offsetof(struct process, hipe.nsp) }, + { "P_NCALLEE", offsetof(struct process, hipe.ncallee) }, + { "P_CLOSURE", offsetof(struct process, hipe.closure) }, +#if defined(__i386__) || defined(__x86_64__) + { "P_NSP_LIMIT", offsetof(struct process, hipe.nstack) }, + { "P_CSP", offsetof(struct process, hipe.ncsp) }, +#elif defined(__sparc__) || defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__) || defined(__arm__) + { "P_NSP_LIMIT", offsetof(struct process, hipe.nstack) }, + { "P_NRA", offsetof(struct process, hipe.nra) }, +#endif + { "P_NARITY", offsetof(struct process, hipe.narity) }, +#endif /* HIPE */ + + /* process flags bits */ + { "F_TIMO", F_TIMO }, + + /* freason codes */ + { "FREASON_TRAP", TRAP }, + + /* special Erlang constants */ + { "THE_NON_VALUE", THE_NON_VALUE }, + + /* funs */ +#ifdef HIPE + { "EFE_NATIVE_ADDRESS", offsetof(struct erl_fun_entry, native_address) }, +#endif + { "EFE_REFC", offsetof(struct erl_fun_entry, refc) }, + { "EFT_THING", offsetof(struct erl_fun_thing, thing_word) }, + + /* bit syntax */ + { "BSF_ALIGNED", BSF_ALIGNED}, + { "PB_ACTIVE_WRITER", PB_ACTIVE_WRITER}, + { "PB_IS_WRITABLE", PB_IS_WRITABLE}, + { "MB_ORIG", offsetof(struct erl_bin_match_buffer, orig) }, + { "MB_BASE", offsetof(struct erl_bin_match_buffer, base) }, + { "MB_OFFSET", offsetof(struct erl_bin_match_buffer, offset) }, + { "MB_SIZE", offsetof(struct erl_bin_match_buffer, size) }, + { "PROC_BIN_THING_WORD", offsetof(struct proc_bin, thing_word) }, + { "PROC_BIN_BINSIZE", offsetof(struct proc_bin, size) }, + { "PROC_BIN_NEXT", offsetof(struct proc_bin, next) }, + { "PROC_BIN_VAL", offsetof(struct proc_bin, val) }, + { "PROC_BIN_BYTES", offsetof(struct proc_bin, bytes) }, + { "PROC_BIN_FLAGS", offsetof(struct proc_bin, flags) }, + { "PROC_BIN_WORDSIZE", PROC_BIN_SIZE}, + { "SUB_BIN_THING_WORD", offsetof(struct erl_sub_bin, thing_word) }, + { "SUB_BIN_BINSIZE", offsetof(struct erl_sub_bin, size) }, + { "SUB_BIN_BITSIZE", offsetof(struct erl_sub_bin, bitsize) }, + { "SUB_BIN_OFFS", offsetof(struct erl_sub_bin, offs) }, + { "SUB_BIN_BITOFFS", offsetof(struct erl_sub_bin, bitoffs) }, + { "SUB_BIN_WRITABLE", offsetof(struct erl_sub_bin, is_writable) }, + { "SUB_BIN_ORIG", offsetof(struct erl_sub_bin, orig) }, + { "SUB_BIN_WORDSIZE", ERL_SUB_BIN_SIZE}, + { "HEAP_BIN_THING_WORD", offsetof(struct erl_heap_bin, thing_word) }, + { "HEAP_BIN_SIZE", offsetof(struct erl_heap_bin, size) }, + { "HEAP_BIN_DATA", offsetof(struct erl_heap_bin, data) }, + { "BINARY_ORIG_SIZE", offsetof(struct binary, orig_size) }, + { "BINARY_ORIG_BYTES", offsetof(struct binary, orig_bytes) }, + { "MAX_HEAP_BIN_SIZE", ERL_ONHEAP_BIN_LIMIT}, + { "MS_THING_WORD", offsetof(struct erl_bin_match_struct, thing_word)}, + { "MS_MATCHBUFFER", offsetof(struct erl_bin_match_struct, mb)}, + { "MS_SAVEOFFSET", offsetof(struct erl_bin_match_struct, save_offset)}, + + { "MS_MIN_SIZE", ERL_BIN_MATCHSTATE_SIZE(0)}, + + { "MB_ORIG_SIZE", field_sizeof(struct erl_bin_match_buffer, orig) }, + { "MB_BASE_SIZE", field_sizeof(struct erl_bin_match_buffer, base) }, + { "MB_OFFSET_SIZE", field_sizeof(struct erl_bin_match_buffer, offset) }, + { "MB_SIZE_SIZE", field_sizeof(struct erl_bin_match_buffer, size) }, + { "PROC_BIN_THING_WORD_SIZE", field_sizeof(struct proc_bin, thing_word) }, + { "PROC_BIN_BINSIZE_SIZE", field_sizeof(struct proc_bin, size) }, + { "PROC_BIN_NEXT_SIZE", field_sizeof(struct proc_bin, next) }, + { "PROC_BIN_VAL_SIZE", field_sizeof(struct proc_bin, val) }, + { "PROC_BIN_BYTES_SIZE", field_sizeof(struct proc_bin, bytes) }, + { "PROC_BIN_FLAGS_SIZE", field_sizeof(struct proc_bin, flags) }, + { "SUB_BIN_THING_WORD_SIZE", field_sizeof(struct erl_sub_bin, thing_word) }, + { "SUB_BIN_BINSIZE_SIZE", field_sizeof(struct erl_sub_bin, size) }, + { "SUB_BIN_BITSIZE_SIZE", field_sizeof(struct erl_sub_bin, bitsize) }, + { "SUB_BIN_OFFS_SIZE", field_sizeof(struct erl_sub_bin, offs) }, + { "SUB_BIN_BITOFFS_SIZE", field_sizeof(struct erl_sub_bin, bitoffs) }, + { "SUB_BIN_WRITABLE_SIZE", field_sizeof(struct erl_sub_bin, is_writable) }, + { "SUB_BIN_ORIG_SIZE", field_sizeof(struct erl_sub_bin, orig) }, + { "HEAP_BIN_THING_WORD_SIZE", field_sizeof(struct erl_heap_bin, thing_word) }, + { "HEAP_BIN_SIZE_SIZE", field_sizeof(struct erl_heap_bin, size) }, + { "HEAP_BIN_DATA_SIZE", field_sizeof(struct erl_heap_bin, data) }, + { "BINARY_ORIG_SIZE_SIZE", field_sizeof(struct binary, orig_size) }, + { "BINARY_ORIG_BYTES_SIZE", field_sizeof(struct binary, orig_bytes) }, + { "MS_THING_WORD_SIZE", field_sizeof(struct erl_bin_match_struct, thing_word)}, + { "MS_SAVEOFFSET_SIZE", field_sizeof(struct erl_bin_match_struct, save_offset)}, + + /* messages */ + { "P_MSG_FIRST", offsetof(struct process, msg.first) }, + { "P_MSG_SAVE", offsetof(struct process, msg.save) }, + { "MSG_NEXT", offsetof(struct erl_mesg, next) }, + + /* ARM */ + { "ARM_LEAF_WORDS", ARM_LEAF_WORDS }, + { "ARM_NR_ARG_REGS", ARM_NR_ARG_REGS }, + { "ARM_IS_BIG_ENDIAN", +#if defined(__arm__) && defined(__ARMEB__) + 1 +#else + 0 +#endif + }, + + /* PowerPC */ + { "PPC_LEAF_WORDS", PPC_LEAF_WORDS }, + { "PPC_NR_ARG_REGS", PPC_NR_ARG_REGS }, + + /* Amd64 */ + { "AMD64_LEAF_WORDS", AMD64_LEAF_WORDS }, + { "AMD64_NR_ARG_REGS", AMD64_NR_ARG_REGS }, +#if AMD64_HP_IN_REGISTER + { "AMD64_HP_IN_REGISTER", 1 }, + { "AMD64_HEAP_POINTER", AMD64_HEAP_POINTER }, +#endif +#if AMD64_FCALLS_IN_REGISTER + { "AMD64_FCALLS_IN_REGISTER", 1 }, + { "AMD64_FCALLS_REGISTER", AMD64_FCALLS_REGISTER }, +#endif +#if AMD64_HEAP_LIMIT_IN_REGISTER + { "AMD64_HEAP_LIMIT_IN_REGISTER", 1 }, + { "AMD64_HEAP_LIMIT_REGISTER", AMD64_HEAP_LIMIT_REGISTER }, +#endif +#if AMD64_SIMULATE_NSP + { "AMD64_SIMULATE_NSP", 1 }, +#endif + + /* x86 */ + { "X86_LEAF_WORDS", X86_LEAF_WORDS }, + { "X86_NR_ARG_REGS", X86_NR_ARG_REGS }, + /* Jag vet att detta suger.. temp dock. */ + { "X86_NR_RET_REGS", 3}, +#if X86_HP_IN_ESI + { "X86_HP_IN_ESI", 1 }, +#endif +#if X86_SIMULATE_NSP + { "X86_SIMULATE_NSP", 1 }, +#endif + + /* SPARC */ + { "SPARC_LEAF_WORDS", SPARC_LEAF_WORDS }, + { "SPARC_NR_ARG_REGS", SPARC_NR_ARG_REGS}, +}; + +#define NR_LITERALS ARRAY_SIZE(literals) + +/* + * Runtime system parameters that generate Erlang atoms. + */ +static const struct atom_literal { + const char *name; + const char *value; +} atom_literals[] = { + { "ARM_ENDIANESS", +#if defined(__arm__) && defined(__ARMEB__) + "big" +#else + "little" +#endif + }, +}; + +#define NR_ATOM_LITERALS ARRAY_SIZE(atom_literals) + +/* + * Runtime system parameters. + * These depend on configuration options such as heap architecture. + * The compiler accesses these through hipe_bifs:get_rts_param/1. + */ +static const struct rts_param { + unsigned int nr; + const char *name; + unsigned int is_defined; + unsigned int value; +} rts_params[] = { + { 1, "P_OFF_HEAP_FUNS", +#if !defined(HYBRID) + 1, offsetof(struct process, off_heap.funs) +#endif + }, + + { 4, "EFT_NEXT", +#if !defined(HYBRID) + 1, offsetof(struct erl_fun_thing, next) +#endif + }, + + /* These are always defined, but their values depend on the + presence or absence of struct erl_fun_thing's "next" field. */ + { 5, "EFT_CREATOR", 1, offsetof(struct erl_fun_thing, creator) }, + { 6, "EFT_FE", 1, offsetof(struct erl_fun_thing, fe) }, +#ifdef HIPE + { 7, "EFT_NATIVE_ADDRESS", 1, offsetof(struct erl_fun_thing, native_address) }, +#endif + { 8, "EFT_ARITY", 1, offsetof(struct erl_fun_thing, arity) }, + { 9, "EFT_NUM_FREE", 1, offsetof(struct erl_fun_thing, num_free) }, + { 10, "EFT_ENV", 1, offsetof(struct erl_fun_thing, env[0]) }, + { 11, "ERL_FUN_SIZE", 1, ERL_FUN_SIZE }, + + { 12, "P_SCHED_DATA", +#ifdef ERTS_SMP + 1, offsetof(struct process, scheduler_data) +#endif + }, + { 14, "P_FP_EXCEPTION", +#if !defined(NO_FPE_SIGNALS) + 1, offsetof(struct process, fp_exception) +#endif + }, + /* This flag is always defined, but its value is configuration-dependent. */ + { 15, "ERTS_IS_SMP", + 1, +#if defined(ERTS_SMP) + 1 +#else + 0 +#endif + }, + /* This parameter is always defined, but its value depends on ERTS_SMP. */ + { 19, "MSG_MESSAGE", + 1, offsetof(struct erl_mesg, m[0]) + }, + /* highest entry ever used == 21 */ +}; + +#define NR_PARAMS ARRAY_SIZE(rts_params) + +static unsigned int literals_crc; +static unsigned int system_crc; + +static void compute_crc(void) +{ + unsigned int crc_value; + unsigned int i; + + crc_value = crc_init(); + for (i = 0; i < NR_LITERALS; ++i) + crc_value = crc_update_int(crc_value, &literals[i].value); + crc_value &= 0x07FFFFFF; + literals_crc = crc_value; + for (i = 0; i < NR_PARAMS; ++i) + if (rts_params[i].is_defined) + crc_value = crc_update_int(crc_value, &rts_params[i].value); + crc_value &= 0x07FFFFFF; + system_crc = crc_value; +} + +static void c_define_literal(FILE *fp, const struct literal *literal) +{ + fprintf(fp, "#define %s %u\n", literal->name, literal->value); +} + +static void e_define_literal(FILE *fp, const struct literal *literal) +{ + fprintf(fp, "-define(%s, %u).\n", literal->name, literal->value); +} + +static void print_literals(FILE *fp, void (*print_literal)(FILE*, const struct literal*)) +{ + unsigned int i; + + for (i = 0; i < NR_LITERALS; ++i) + (*print_literal)(fp, &literals[i]); +} + +static void e_define_atom_literal(FILE *fp, const struct atom_literal *atom_literal) +{ + fprintf(fp, "-define(%s, %s).\n", atom_literal->name, atom_literal->value); +} + +static void print_atom_literals(FILE *fp, void (*print_atom_literal)(FILE*, const struct atom_literal*)) +{ + unsigned int i; + + for (i = 0; i < NR_ATOM_LITERALS; ++i) + (*print_atom_literal)(fp, &atom_literals[i]); +} + +static void c_define_param(FILE *fp, const struct rts_param *param) +{ + if (param->is_defined) + fprintf(fp, "#define %s %u\n", param->name, param->value); +} + +static void c_case_param(FILE *fp, const struct rts_param *param) +{ + fprintf(fp, " \\\n"); + fprintf(fp, "\tcase %u: ", param->nr); + if (param->is_defined) + fprintf(fp, "value = %u", param->value); + else + fprintf(fp, "is_defined = 0"); + fprintf(fp, "; break;"); +} + +static void e_define_param(FILE *fp, const struct rts_param *param) +{ + fprintf(fp, "-define(%s, hipe_bifs:get_rts_param(%u)).\n", param->name, param->nr); +} + +static void print_params(FILE *fp, void (*print_param)(FILE*,const struct rts_param*)) +{ + unsigned int i; + + for (i = 0; i < NR_PARAMS; ++i) + (*print_param)(fp, &rts_params[i]); +} + +static int do_c(FILE *fp) +{ + fprintf(fp, "/* File: hipe_literals.h, generated by hipe_mkliterals */\n"); + fprintf(fp, "#ifndef __HIPE_LITERALS_H__\n"); + fprintf(fp, "#define __HIPE_LITERALS_H__\n\n"); + print_literals(fp, c_define_literal); + print_params(fp, c_define_param); + fprintf(fp, "#define HIPE_LITERALS_CRC %uU\n", literals_crc); + fprintf(fp, "#define HIPE_SYSTEM_CRC %uU\n", system_crc); + fprintf(fp, "\n"); + fprintf(fp, "#define RTS_PARAMS_CASES"); + print_params(fp, c_case_param); + fprintf(fp, "\n#endif\n"); + return 0; +} + +static int do_e(FILE *fp) +{ + fprintf(fp, "%%%% File: hipe_literals.hrl, generated by hipe_mkliterals"); + fprintf(fp, "\n\n"); + print_literals(fp, e_define_literal); + fprintf(fp, "\n"); + print_atom_literals(fp, e_define_atom_literal); + fprintf(fp, "\n"); + print_params(fp, e_define_param); + fprintf(fp, "\n"); + fprintf(fp, "-define(HIPE_SYSTEM_CRC, hipe_bifs:system_crc(%u)).\n", literals_crc); + return 0; +} + +int main(int argc, const char **argv) +{ + compute_crc(); + if (argc == 2) { + if (strcmp(argv[1], "-c") == 0) + return do_c(stdout); + if (strcmp(argv[1], "-e") == 0) + return do_e(stdout); + } + fprintf(stderr, "usage: %s [-c | -e] > output-file\n", argv[0]); + return 1; +} diff --git a/erts/emulator/hipe/hipe_mode_switch.c b/erts/emulator/hipe/hipe_mode_switch.c new file mode 100644 index 0000000000..e5de244d25 --- /dev/null +++ b/erts/emulator/hipe/hipe_mode_switch.c @@ -0,0 +1,641 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2001-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + * hipe_mode_switch.c + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#include "sys.h" +#include "erl_vm.h" +#include "global.h" +#include "erl_process.h" +#include "beam_load.h" /* which includes beam_opcodes.h */ +#include "beam_catches.h" +#include "hipe_mode_switch.h" +#include "bif.h" +#include "error.h" +#include "hipe_stack.h" +#include "hipe_bif0.h" /* hipe_mfa_info_table_init() */ + +/* + * Internal debug support. + * #define HIPE_DEBUG to the desired debug level: + * 0 no checks + * 1 check PCB consistency at mode-switches + * 2 log commands and results at mode-switches + * 3 log commands, results, and PCB contents at mode-switches + * + * TODO: check PCB consistency at native BIF calls + */ +int hipe_modeswitch_debug = 0; + +#define HIPE_DEBUG 0 + +#if HIPE_DEBUG > 1 /* include DPRINTF() logging */ + +#define DPRINTF(fmt, args...) \ +do { \ + if (hipe_modeswitch_debug > 0) { \ + printf("%s, line %u: " fmt "\r\n", __FUNCTION__, __LINE__ , ##args); \ + fflush(stdout); \ + } \ +} while (0) + +static const char *code_str(unsigned code) +{ + static const char *cmd_str[] = { + "call from beam", + "return from beam", + "throw from beam", + "resume from beam", + "return to beam", + "call to beam", + "throw to beam", + "suspend to beam", + "wait from native", + "wait_timeout from native", + "trap from native", + "call closure from beam", + "call closure to beam", + }; + unsigned cmd = code & 0xFF; + + if (cmd < (sizeof(cmd_str)/sizeof(cmd_str[0]))) + return cmd_str[cmd]; + else + return "???"; +} + +#else /* HIPE_DEBUG > 1 */ + +#define DPRINTF(fmt, args...) do{}while(0) + +#endif /* HIPE_DEBUG > 1 */ + +#if HIPE_DEBUG > 0 /* include HIPE_ASSERT and PCB checking */ + +static void __noreturn +hipe_abort(const char *expr, const char *file, unsigned line) +{ + erl_exit(1, "ASSERTION FAILED, file %s, line %u: %s\r\n", file, line, expr); +} + +#define HIPE_ASSERT3(expr, file, line) \ +do { \ + if (!(expr)) \ + hipe_abort(#expr, file, line); \ +} while (0) +#define HIPE_ASSERT(expr) HIPE_ASSERT3(expr, __FILE__, __LINE__) + +void hipe_check_pcb(Process *p, const char *file, unsigned line) +{ +#if HIPE_DEBUG > 2 + if (hipe_modeswitch_debug > 0) { + printf("%s, line %u: p %p = {htop %p, stop %p, nstack %p, nsp %p, nstend %p}\r\n", file, line, p, p->htop, p->stop, p->hipe.nstack, p->hipe.nsp, p->hipe.nstend); + } +#endif + HIPE_ASSERT3(p != NULL, file, line); + HIPE_ASSERT3(p->htop <= p->stop, file, line); + HIPE_ASSERT3(p->hipe.nstack <= p->hipe.nstend, file, line); + HIPE_ASSERT3(p->hipe.nsp >= p->hipe.nstack, file, line); + HIPE_ASSERT3(p->hipe.nsp <= p->hipe.nstend, file, line); +} +#define HIPE_CHECK_PCB(P) hipe_check_pcb((P), __FILE__, __LINE__) + +#else /* HIPE_DEBUG > 0 */ + +#define HIPE_ASSERT(expr) do{}while(0) +#define HIPE_CHECK_PCB(P) do{}while(0) + +#endif /* HIPE_DEBUG > 0 */ + +/* ensure that at least nwords words are available on the native stack */ +static void hipe_check_nstack(Process *p, unsigned nwords); + +#if defined(__sparc__) +#include "hipe_sparc_glue.h" +#elif defined(__i386__) +#include "hipe_x86_glue.h" +#elif defined(__x86_64__) +#include "hipe_amd64_glue.h" +#elif defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__) +#include "hipe_ppc_glue.h" +#elif defined(__arm__) +#include "hipe_arm_glue.h" +#endif + +#define BeamOpCode(Op) ((Uint)BeamOp(Op)) + +Uint hipe_beam_pc_return[1]; /* needed in hipe_debug.c */ +Uint hipe_beam_pc_throw[1]; /* needed in hipe_debug.c */ +Uint hipe_beam_pc_resume[1]; /* needed by hipe_set_timeout() */ +static Eterm hipe_beam_catch_throw; + +void hipe_mode_switch_init(void) +{ + hipe_arch_glue_init(); + + hipe_beam_pc_return[0] = BeamOpCode(op_hipe_trap_return); + hipe_beam_pc_throw[0] = BeamOpCode(op_hipe_trap_throw); + hipe_beam_pc_resume[0] = BeamOpCode(op_hipe_trap_resume); + + hipe_beam_catch_throw = + make_catch(beam_catches_cons(hipe_beam_pc_throw, BEAM_CATCHES_NIL)); + + hipe_mfa_info_table_init(); +} + +void hipe_set_call_trap(Uint *bfun, void *nfun, int is_closure) +{ + HIPE_ASSERT(bfun[-5] == BeamOpCode(op_i_func_info_IaaI)); + bfun[0] = + is_closure + ? BeamOpCode(op_hipe_trap_call_closure) + : BeamOpCode(op_hipe_trap_call); + bfun[-4] = (Uint)nfun; +} + +static __inline__ void +hipe_push_beam_trap_frame(Process *p, Eterm reg[], unsigned arity) +{ + /* ensure that at least 2 words are available on the BEAM stack */ + if ((p->stop - 2) < p->htop) { + DPRINTF("calling gc to increase BEAM stack size"); + p->fcalls -= erts_garbage_collect(p, 2, reg, arity); + } + p->stop -= 2; + p->stop[1] = hipe_beam_catch_throw; + p->stop[0] = make_cp(p->cp); + ++p->catches; + p->cp = hipe_beam_pc_return; +} + +static __inline__ void hipe_pop_beam_trap_frame(Process *p) +{ + p->cp = cp_val(p->stop[0]); + --p->catches; + p->stop += 2; +} + +Process *hipe_mode_switch(Process *p, unsigned cmd, Eterm reg[]) +{ + unsigned result; +#if NR_ARG_REGS > 5 + /* When NR_ARG_REGS > 5, we need to protect the process' input + reduction count (which BEAM stores in def_arg_reg[5]) from + being clobbered by the arch glue code. */ + Eterm reds_in = p->def_arg_reg[5]; +#endif +#if NR_ARG_REGS > 4 + Eterm o_reds = p->def_arg_reg[4]; +#endif + + p->i = NULL; + + DPRINTF("cmd == %#x (%s)", cmd, code_str(cmd)); + HIPE_CHECK_PCB(p); + p->arity = 0; + switch (cmd & 0xFF) { + case HIPE_MODE_SWITCH_CMD_CALL: { + /* BEAM calls a native code function */ + unsigned arity = cmd >> 8; + + /* p->hipe.ncallee set in beam_emu */ + if (p->cp == hipe_beam_pc_return) { + /* Native called BEAM, which now tailcalls native. */ + hipe_pop_beam_trap_frame(p); + result = hipe_tailcall_to_native(p, arity, reg); + break; + } + DPRINTF("calling %#lx/%u", (long)p->hipe.ncallee, arity); + result = hipe_call_to_native(p, arity, reg); + break; + } + case HIPE_MODE_SWITCH_CMD_CALL_CLOSURE: { + /* BEAM calls a native code closure */ + unsigned arity = cmd >> 8; /* #formals + #fvs (closure not counted) */ + Eterm fun; + ErlFunThing *funp; + + /* drop the fvs, move the closure, correct arity */ + fun = reg[arity]; + HIPE_ASSERT(is_fun(fun)); + funp = (ErlFunThing*)fun_val(fun); + HIPE_ASSERT(funp->num_free <= arity); + arity -= funp->num_free; /* arity == #formals */ + reg[arity] = fun; + ++arity; /* correct for having added the closure */ + /* HIPE_ASSERT(p->hipe.ncallee == (void(*)(void))funp->native_address); */ + + /* just like a normal call from now on */ + + /* p->hipe.ncallee set in beam_emu */ + if (p->cp == hipe_beam_pc_return) { + /* Native called BEAM, which now tailcalls native. */ + hipe_pop_beam_trap_frame(p); + result = hipe_tailcall_to_native(p, arity, reg); + break; + } + DPRINTF("calling %#lx/%u", (long)p->hipe.ncallee, arity); + result = hipe_call_to_native(p, arity, reg); + break; + } + case HIPE_MODE_SWITCH_CMD_THROW: { + /* BEAM just executed hipe_beam_pc_throw[] */ + /* Native called BEAM, which now throws an exception back to native. */ + DPRINTF("beam throws freason %#lx fvalue %#lx", p->freason, p->fvalue); + hipe_pop_beam_trap_frame(p); + do_throw_to_native: + p->def_arg_reg[0] = exception_tag[GET_EXC_CLASS(p->freason)]; + hipe_find_handler(p); + result = hipe_throw_to_native(p); + break; + } + case HIPE_MODE_SWITCH_CMD_RETURN: { + /* BEAM just executed hipe_beam_pc_return[] */ + /* Native called BEAM, which now returns back to native. */ + /* pop trap frame off estack */ + hipe_pop_beam_trap_frame(p); + p->def_arg_reg[0] = reg[0]; + result = hipe_return_to_native(p); + break; + } + do_resume: + case HIPE_MODE_SWITCH_CMD_RESUME: { + /* BEAM just executed hipe_beam_pc_resume[] */ + /* BEAM called native, which suspended. */ + if (p->flags & F_TIMO) { + /* XXX: The process will immediately execute 'clear_timeout', + repeating these two statements. Remove them? */ + p->flags &= ~F_TIMO; + JOIN_MESSAGE(p); + p->def_arg_reg[0] = 0; /* make_small(0)? */ + } else + p->def_arg_reg[0] = 1; /* make_small(1)? */ + result = hipe_return_to_native(p); + break; + } + default: + erl_exit(1, "hipe_mode_switch: cmd %#x\r\n", cmd); + } + do_return_from_native: + DPRINTF("result == %#x (%s)", result, code_str(result)); + HIPE_CHECK_PCB(p); + switch (result) { + case HIPE_MODE_SWITCH_RES_RETURN: { + hipe_return_from_native(p); + reg[0] = p->def_arg_reg[0]; + DPRINTF("returning with r(0) == %#lx", reg[0]); + break; + } + case HIPE_MODE_SWITCH_RES_THROW: { + DPRINTF("native throws freason %#lx fvalue %#lx", p->freason, p->fvalue); + hipe_throw_from_native(p); + break; + } + case HIPE_MODE_SWITCH_RES_TRAP: { + /* + * Native code called a BIF, which "failed" with a TRAP to BEAM. + * Prior to returning, the BIF stored (see BIF_TRAP<N>): + + * the callee's address in p->def_arg_reg[3] + * the callee's parameters in p->def_arg_reg[0..2] + * the callee's arity in p->arity (for BEAM gc purposes) + * + * We need to remove the BIF's parameters from the native + * stack: to this end hipe_${ARCH}_glue.S stores the BIF's + * arity in p->hipe.narity. + */ + unsigned int i, is_recursive, callee_arity; + + /* Save p->arity, then update it with the original BIF's arity. + Get rid of any stacked parameters in that call. */ + /* XXX: hipe_call_from_native_is_recursive() copies data to + reg[], which is useless in the TRAP case. Maybe write a + specialised hipe_trap_from_native_is_recursive() later. */ + callee_arity = p->arity; + p->arity = p->hipe.narity; /* caller's arity */ + is_recursive = hipe_call_from_native_is_recursive(p, reg); + + p->i = (Eterm *)(p->def_arg_reg[3]); + p->arity = callee_arity; + + for (i = 0; i < p->arity; ++i) + reg[i] = p->def_arg_reg[i]; + + if (is_recursive) + hipe_push_beam_trap_frame(p, reg, p->arity); + + result = HIPE_MODE_SWITCH_RES_CALL; + break; + } + case HIPE_MODE_SWITCH_RES_CALL: { + /* Native code calls or tailcalls BEAM. + * + * p->i is the callee's BEAM code + * p->arity is the callee's arity + * p->def_arg_reg[] contains the register parameters + * p->hipe.nsp[] contains the stacked parameters + */ + if (hipe_call_from_native_is_recursive(p, reg)) { + /* BEAM called native, which now calls BEAM */ + hipe_push_beam_trap_frame(p, reg, p->arity); + } + break; + } + case HIPE_MODE_SWITCH_RES_CALL_CLOSURE: { + /* Native code calls or tailcalls a closure in BEAM + * + * In native code a call to a closure of arity n looks like + * F(A1, ..., AN, Closure), + * The BEAM code for a closure expects to get: + * F(A1, ..., AN, FV1, ..., FVM, Closure) + * (Where Ai is argument i and FVj is free variable j) + * + * p->hipe.closure contains the closure + * p->def_arg_reg[] contains the register parameters + * p->hipe.nsp[] contains the stacked parameters + */ + ErlFunThing *closure; + unsigned num_free, arity, i, is_recursive; + + HIPE_ASSERT(is_fun(p->hipe.closure)); + closure = (ErlFunThing*)fun_val(p->hipe.closure); + num_free = closure->num_free; + arity = closure->fe->arity; + + /* Store the arity in p->arity for the stack popping. */ + /* Note: we already have the closure so only need to move arity + values to reg[]. However, there are arity+1 parameters in the + native code state that need to be removed. */ + p->arity = arity+1; /* +1 for the closure */ + + /* Get parameters, don't do GC just yet. */ + is_recursive = hipe_call_from_native_is_recursive(p, reg); + + if ((Sint)closure->fe->address[-1] < 0) { + /* Unloaded. Let beam_emu.c:call_fun() deal with it. */ + result = HIPE_MODE_SWITCH_RES_CALL_CLOSURE; + } else { + /* The BEAM code is present. Prepare to call it. */ + + /* Append the free vars after the actual parameters. */ + for (i = 0; i < num_free; ++i) + reg[arity+i] = closure->env[i]; + + /* Update arity to reflect the new parameters. */ + arity += i; + + /* Make a call to the closure's BEAM code. */ + p->i = closure->fe->address; + + /* Change result code to the faster plain CALL type. */ + result = HIPE_MODE_SWITCH_RES_CALL; + } + /* Append the closure as the last parameter. Don't increment arity. */ + reg[arity] = p->hipe.closure; + + if (is_recursive) { + /* BEAM called native, which now calls BEAM. + Need to put a trap-frame on the beam stack. + This may cause GC, which is safe now that + the arguments, free vars, and most + importantly the closure, all are in reg[]. */ + hipe_push_beam_trap_frame(p, reg, arity+1); + } + break; + } + case HIPE_MODE_SWITCH_RES_SUSPEND: { + p->i = hipe_beam_pc_resume; + p->arity = 0; + erts_smp_proc_lock(p, ERTS_PROC_LOCK_STATUS); + if (p->status != P_SUSPENDED) + erts_add_to_runq(p); + erts_smp_proc_unlock(p, ERTS_PROC_LOCK_STATUS); + goto do_schedule; + } + case HIPE_MODE_SWITCH_RES_WAIT: + case HIPE_MODE_SWITCH_RES_WAIT_TIMEOUT: { + /* same semantics, different debug trace messages */ +#ifdef ERTS_SMP + /* XXX: BEAM has different entries for the locked and unlocked + cases. HiPE doesn't, so we must check dynamically. */ + if (p->hipe_smp.have_receive_locks) + p->hipe_smp.have_receive_locks = 0; + else + erts_smp_proc_lock(p, ERTS_PROC_LOCKS_MSG_RECEIVE); +#endif + p->i = hipe_beam_pc_resume; + p->arity = 0; + p->status = P_WAITING; + erts_smp_proc_unlock(p, ERTS_PROC_LOCKS_MSG_RECEIVE); + do_schedule: + { +#if !(NR_ARG_REGS > 5) + int reds_in = p->def_arg_reg[5]; +#endif + p = schedule(p, reds_in - p->fcalls); +#ifdef ERTS_SMP + p->hipe_smp.have_receive_locks = 0; + reg = p->scheduler_data->save_reg; +#endif + } + { + Eterm *argp; + int i; + + argp = p->arg_reg; + for (i = p->arity; --i >= 0;) + reg[i] = argp[i]; + } + { +#if !(NR_ARG_REGS > 5) + Eterm reds_in; +#endif +#if !(NR_ARG_REGS > 4) + Eterm o_reds; +#endif + + reds_in = p->fcalls; + o_reds = 0; + if (ERTS_PROC_GET_SAVED_CALLS_BUF(p)) { + o_reds = reds_in; + reds_in = 0; + p->fcalls = 0; + } + p->def_arg_reg[4] = o_reds; + p->def_arg_reg[5] = reds_in; + if (p->i == hipe_beam_pc_resume) { + p->i = NULL; + p->arity = 0; + goto do_resume; + } + } + HIPE_CHECK_PCB(p); + result = HIPE_MODE_SWITCH_RES_CALL; + p->def_arg_reg[3] = result; + return p; + } + case HIPE_MODE_SWITCH_RES_APPLY: { + Eterm mfa[3], args; + unsigned int arity; + void *address; + + hipe_pop_params(p, 3, &mfa[0]); + + /* Unroll the arglist onto reg[]. */ + args = mfa[2]; + arity = 0; + while (is_list(args)) { + if (arity < 255) { + reg[arity++] = CAR(list_val(args)); + args = CDR(list_val(args)); + } else + goto do_apply_fail; + } + if (is_not_nil(args)) + goto do_apply_fail; + + /* find a native code entry point for {M,F,A} for a remote call */ + address = hipe_get_remote_na(mfa[0], mfa[1], arity); + if (!address) + goto do_apply_fail; + p->hipe.ncallee = (void(*)(void)) address; + result = hipe_tailcall_to_native(p, arity, reg); + goto do_return_from_native; + do_apply_fail: + p->freason = BADARG; + goto do_throw_to_native; + } + default: + erl_exit(1, "hipe_mode_switch: result %#x\r\n", result); + } + HIPE_CHECK_PCB(p); + p->def_arg_reg[3] = result; +#if NR_ARG_REGS > 4 + p->def_arg_reg[4] = o_reds; +#endif +#if NR_ARG_REGS > 5 + p->def_arg_reg[5] = reds_in; +#endif + return p; +} + +#define HIPE_INITIAL_NSTACK_SIZE 128 + +/* PRE: size is zero or a power of two */ +static unsigned hipe_next_nstack_size(unsigned size) +{ + return size ? size * 2 : HIPE_INITIAL_NSTACK_SIZE; +} + +#if 0 && defined(HIPE_NSTACK_GROWS_UP) +#define hipe_nstack_avail(p) ((p)->hipe.nstend - (p)->hipe.nsp) +void hipe_inc_nstack(Process *p) +{ + Eterm *old_nstack = p->hipe.nstack; + unsigned old_size = p->hipe.nstend - old_nstack; + unsigned new_size = hipe_next_nstack_size(old_size); + Eterm *new_nstack = erts_realloc(ERTS_ALC_T_HIPE, + (char *) old_nstack, + new_size*sizeof(Eterm)); + p->hipe.nstend = new_nstack + new_size; + if (new_nstack != old_nstack) { + p->hipe.nsp = new_nstack + (p->hipe.nsp - old_nstack); + p->hipe.nstack = new_nstack; + if (p->hipe.nstgraylim) + p->hipe.nstgraylim = + new_nstack + (p->hipe.nstgraylim - old_nstack); + if (p->hipe.nstblacklim) + p->hipe.nstblacklim = + new_nstack + (p->hipe.nstblacklim - old_nstack); + } +} +#endif + +#if defined(HIPE_NSTACK_GROWS_DOWN) +#define hipe_nstack_avail(p) ((unsigned)((p)->hipe.nsp - (p)->hipe.nstack)) +void hipe_inc_nstack(Process *p) +{ + unsigned old_size = p->hipe.nstend - p->hipe.nstack; + unsigned new_size = hipe_next_nstack_size(old_size); + Eterm *new_nstack = erts_alloc(ERTS_ALC_T_HIPE, new_size*sizeof(Eterm)); + unsigned used_size = p->hipe.nstend - p->hipe.nsp; + + sys_memcpy(new_nstack+new_size-used_size, p->hipe.nsp, used_size*sizeof(Eterm)); + if (p->hipe.nstgraylim) + p->hipe.nstgraylim = new_nstack + new_size - (p->hipe.nstend - p->hipe.nstgraylim); + if (p->hipe.nstblacklim) + p->hipe.nstblacklim = new_nstack + new_size - (p->hipe.nstend - p->hipe.nstblacklim); + if (p->hipe.nstack) + erts_free(ERTS_ALC_T_HIPE, p->hipe.nstack); + p->hipe.nstack = new_nstack; + p->hipe.nstend = new_nstack + new_size; + p->hipe.nsp = new_nstack + new_size - used_size; +} +#endif + +static void hipe_check_nstack(Process *p, unsigned nwords) +{ + while (hipe_nstack_avail(p) < nwords) + hipe_inc_nstack(p); +} + +void hipe_set_closure_stub(ErlFunEntry *fe, unsigned num_free) +{ + unsigned arity; + + arity = fe->arity; + fe->native_address = (Eterm*) hipe_closure_stub_address(arity); +} + +Eterm hipe_build_stacktrace(Process *p, struct StackTrace *s) +{ + int depth, i; + Uint heap_size; + Eterm *hp, *hp_end, mfa, m, f, head, *next_p, next; + const void *ra; + unsigned int a; + + depth = s->depth; + if (depth < 1) + return NIL; + + heap_size = 6 * depth; /* each [{M,F,A}|_] is 2+4 == 6 words */ + hp = HAlloc(p, heap_size); + hp_end = hp + heap_size; + + head = NIL; + next_p = &head; + + for (i = 0; i < depth; ++i) { + ra = (const void*)s->trace[i]; + if (!hipe_find_mfa_from_ra(ra, &m, &f, &a)) + continue; + mfa = TUPLE3(hp, m, f, make_small(a)); + hp += 4; + next = CONS(hp, mfa, NIL); + *next_p = next; + next_p = &CDR(list_val(next)); + hp += 2; + } + HRelease(p, hp_end, hp); + return head; +} diff --git a/erts/emulator/hipe/hipe_mode_switch.h b/erts/emulator/hipe/hipe_mode_switch.h new file mode 100644 index 0000000000..187b9145e2 --- /dev/null +++ b/erts/emulator/hipe/hipe_mode_switch.h @@ -0,0 +1,66 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2001-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + * hipe_mode_switch.h + */ +#ifndef HIPE_MODE_SWITCH_H +#define HIPE_MODE_SWITCH_H + +/* command codes for beam_emu -> hipe_mode_switch() call */ +#define HIPE_MODE_SWITCH_CMD_CALL 0 +#define HIPE_MODE_SWITCH_CMD_RETURN 1 +#define HIPE_MODE_SWITCH_CMD_THROW 2 +#define HIPE_MODE_SWITCH_CMD_RESUME 3 + +/* result codes for beam_emu <- hipe_mode_switch() return */ +#define HIPE_MODE_SWITCH_RES_RETURN 4 +#define HIPE_MODE_SWITCH_RES_CALL 5 +#define HIPE_MODE_SWITCH_RES_THROW 6 + +/* additional result codes for hipe_mode_switch() <- native return */ +#define HIPE_MODE_SWITCH_RES_SUSPEND 7 +#define HIPE_MODE_SWITCH_RES_WAIT 8 +#define HIPE_MODE_SWITCH_RES_WAIT_TIMEOUT 9 +#define HIPE_MODE_SWITCH_RES_TRAP 10 + +#define HIPE_MODE_SWITCH_CMD_CALL_CLOSURE 11 /* BEAM -> mode_switch */ +#define HIPE_MODE_SWITCH_RES_CALL_CLOSURE 12 /* mode_switch <- native */ + +#define HIPE_MODE_SWITCH_RES_APPLY 13 /* mode_switch <- native */ + +#ifndef ASM + +#include "error.h" + +int hipe_modeswitch_debug; + +void hipe_mode_switch_init(void); +void hipe_set_call_trap(Uint *bfun, void *nfun, int is_closure); +Process *hipe_mode_switch(Process*, unsigned, Eterm*); +void hipe_inc_nstack(Process *p); +void hipe_set_closure_stub(ErlFunEntry *fe, unsigned num_free); +Eterm hipe_build_stacktrace(Process *p, struct StackTrace *s); + +extern Uint hipe_beam_pc_return[]; +extern Uint hipe_beam_pc_throw[]; +extern Uint hipe_beam_pc_resume[]; + +#endif /* ASM */ + +#endif /* HIPE_MODE_SWITCH_H */ diff --git a/erts/emulator/hipe/hipe_native_bif.c b/erts/emulator/hipe/hipe_native_bif.c new file mode 100644 index 0000000000..f8c2502522 --- /dev/null +++ b/erts/emulator/hipe/hipe_native_bif.c @@ -0,0 +1,590 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2001-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + * hipe_native_bif.c + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#include "sys.h" +#include "erl_vm.h" +#include "global.h" +#include "erl_process.h" +#include "error.h" +#include "bif.h" +#include "erl_bits.h" +#include "erl_binary.h" +#include "hipe_mode_switch.h" +#include "hipe_native_bif.h" +#include "hipe_arch.h" +#include "hipe_stack.h" + +/* + * These are wrappers for BIFs that may trigger a native + * stack walk with p->hipe.narity != 0. + */ + +/* for -Wmissing-prototypes :-( */ +extern Eterm hipe_check_process_code_2(Process*, Eterm, Eterm); +extern Eterm hipe_garbage_collect_1(Process*, Eterm); +extern Eterm hipe_show_nstack_1(Process*, Eterm); + +/* Used when a BIF can trigger a stack walk. */ +static __inline__ void hipe_set_narity(Process *p, unsigned int arity) +{ + p->hipe.narity = arity; +} + +Eterm hipe_check_process_code_2(BIF_ALIST_2) +{ + Eterm ret; + + hipe_set_narity(BIF_P, 2); + ret = check_process_code_2(BIF_P, BIF_ARG_1, BIF_ARG_2); + hipe_set_narity(BIF_P, 0); + return ret; +} + +Eterm hipe_garbage_collect_1(BIF_ALIST_1) +{ + Eterm ret; + + hipe_set_narity(BIF_P, 1); + ret = garbage_collect_1(BIF_P, BIF_ARG_1); + hipe_set_narity(BIF_P, 0); + return ret; +} + +Eterm hipe_show_nstack_1(BIF_ALIST_1) +{ + Eterm ret; + + hipe_set_narity(BIF_P, 1); + ret = hipe_bifs_show_nstack_1(BIF_P, BIF_ARG_1); + hipe_set_narity(BIF_P, 0); + return ret; +} + +/* + * This is called when inlined heap allocation in native code fails. + * The 'need' parameter is the number of heap words needed. + * The value is tagged as a fixnum to avoid untagged data on + * the x86 stack while the gc is running. + */ +void hipe_gc(Process *p, Eterm need) +{ + hipe_set_narity(p, 1); + p->fcalls -= erts_garbage_collect(p, unsigned_val(need), NULL, 0); + hipe_set_narity(p, 0); +} + +/* This is like the OP_setTimeout JAM instruction. + * Transformation to the BEAM instruction wait_timeout_fs + * has begun. + * XXX: BUG: native code should check return status + */ +Eterm hipe_set_timeout(Process *p, Eterm timeout_value) +{ +#if !defined(ARCH_64) + Uint time_val; +#endif + /* XXX: This should be converted to follow BEAM conventions, + * but that requires some compiler changes. + * + * In BEAM, set_timeout saves TWO CP values, and suspends. + * p->def_arg_reg[0] and p->i are both defined and used. + * If a message arrives, BEAM resumes at p->i. + * If a timeout fires, BEAM resumes at p->def_arg_reg[0]. + * (See set_timer() and timeout_proc() in erl_process.c.) + * + * Here we set p->def_arg_reg[0] to hipe_beam_pc_resume. + * Assuming our caller invokes suspend immediately after + * our return, then hipe_mode_switch() will also set + * p->i to hipe_beam_pc_resume. Thus we'll resume in the same + * way regardless of the cause (message or timeout). + * hipe_mode_switch() checks for F_TIMO and returns a + * flag to native code indicating the cause. + */ + + /* + * def_arg_reg[0] is (re)set unconditionally, in case this is the + * 2nd/3rd/... iteration through the receive loop: in order to pass + * a boolean flag to native code indicating timeout or new message, + * our mode switch has to clobber def_arg_reg[0]. This is ok, but if + * we re-suspend (because we ignored a received message) we also have + * to reinitialise def_arg_reg[0] with the BEAM resume label. + * + * XXX: A better solution would be to pass two parameters to + * set_timeout: the timeout and the on-timeout resume label. + * We could put the resume label in def_arg_reg[1] and resume + * at it without having to load a flag in a register and generate + * code to test it. Requires a HiPE compiler change though. + */ + p->def_arg_reg[0] = (Eterm) hipe_beam_pc_resume; + + /* + * If we have already set the timer, we must NOT set it again. Therefore, + * we must test the F_INSLPQUEUE flag as well as the F_TIMO flag. + */ + if (p->flags & (F_INSLPQUEUE | F_TIMO)) + return NIL; /* caller had better call nbif_suspend ASAP! */ + if (is_small(timeout_value) && signed_val(timeout_value) >= 0 && +#if defined(ARCH_64) + (unsigned_val(timeout_value) >> 32) == 0 +#else + 1 +#endif + ) { + set_timer(p, unsigned_val(timeout_value)); + } else if (timeout_value == am_infinity) { + /* p->flags |= F_TIMO; */ /* XXX: nbif_suspend_msg_timeout */ +#if !defined(ARCH_64) + } else if (term_to_Uint(timeout_value, &time_val)) { + set_timer(p, time_val); +#endif + } else { +#ifdef ERTS_SMP + if (p->hipe_smp.have_receive_locks) { + p->hipe_smp.have_receive_locks = 0; + erts_smp_proc_unlock(p, ERTS_PROC_LOCKS_MSG_RECEIVE); + } +#endif + BIF_ERROR(p, EXC_TIMEOUT_VALUE); + } + return NIL; /* caller had better call nbif_suspend ASAP! */ +} + +/* This is like the remove_message BEAM instruction + */ +void hipe_select_msg(Process *p) +{ + ErlMessage *msgp; + + msgp = PEEK_MESSAGE(p); + UNLINK_MESSAGE(p, msgp); /* decrements global 'erts_proc_tot_mem' variable */ + JOIN_MESSAGE(p); + CANCEL_TIMER(p); /* calls erl_cancel_timer() */ + free_message(msgp); +} + +void hipe_fclearerror_error(Process *p) +{ +#if !defined(NO_FPE_SIGNALS) + erts_fp_check_init_error(&p->fp_exception); +#endif +} + +/* Saving a stacktrace from native mode. Right now, we only create a + * minimal struct with no fields filled in except freason. The flag + * EXF_NATIVE is set, so that build_stacktrace (in beam_emu.c) does not + * try to interpret any other field. + */ +static void hipe_save_stacktrace(Process* c_p, Eterm args) +{ + Eterm *hp; + struct StackTrace* s; + int sz; + int depth = erts_backtrace_depth; /* max depth (never negative) */ + + /* Create a container for the exception data. This must be done just + as in the save_stacktrace function in beam_emu.c */ + sz = (offsetof(struct StackTrace, trace) + sizeof(Eterm)*depth + + sizeof(Eterm) - 1) / sizeof(Eterm); + hp = HAlloc(c_p, 2 + 1 + sz); + s = (struct StackTrace *) (hp + 2); + c_p->ftrace = CONS(hp, args, make_big((Eterm *) s)); + s->header = make_pos_bignum_header(sz); + s->current = NULL; + s->pc = NULL; + + s->depth = hipe_fill_stacktrace(c_p, depth, s->trace); + + /* Must mark this as a native-code exception. */ + s->freason = NATIVE_EXCEPTION(c_p->freason); + return; +} + +/* + * hipe_handle_exception() is called from hipe_${ARCH}_glue.S when an + * exception has been thrown, to expand the exception value, set the + * stack trace, and locate the current handler. + */ +void hipe_handle_exception(Process *c_p) +{ + Eterm Value = c_p->fvalue; + Eterm Args = am_true; + + ASSERT(c_p->freason != TRAP); /* Should have been handled earlier. */ + + if (c_p->mbuf) { + erts_printf("%s line %u: p==%p, p->mbuf==%p\n", __FUNCTION__, __LINE__, c_p, c_p->mbuf); + //erts_garbage_collect(c_p, 0, NULL, 0); + } + + /* + * Check if we have an arglist for the top level call. If so, this + * is encoded in Value, so we have to dig out the real Value as well + * as the Arglist. + */ + if (c_p->freason & EXF_ARGLIST) { + Eterm *tp; + ASSERT(is_tuple(Value)); + tp = tuple_val(Value); + Value = tp[1]; + Args = tp[2]; + } + + /* If necessary, build a stacktrace object. */ + if (c_p->freason & EXF_SAVETRACE) + hipe_save_stacktrace(c_p, Args); + + /* Get the fully expanded error term */ + Value = expand_error_value(c_p, c_p->freason, Value); + + /* Save final error term and stabilize the exception flags so no + further expansion is done. */ + c_p->fvalue = Value; + c_p->freason = PRIMARY_EXCEPTION(c_p->freason); + + /* Synthesized to avoid having to generate code for it. */ + c_p->def_arg_reg[0] = exception_tag[GET_EXC_CLASS(c_p->freason)]; + + if (c_p->mbuf) { + //erts_printf("%s line %u: p==%p, p->mbuf==%p, p->lastbif==%p\n", __FUNCTION__, __LINE__, c_p, c_p->mbuf, c_p->hipe.lastbif); + erts_garbage_collect(c_p, 0, NULL, 0); + } + + hipe_find_handler(c_p); +} + +/* This is duplicated from beam_emu.c for now */ +static struct StackTrace *get_trace_from_exc(Eterm exc) +{ + if (exc == NIL) + return NULL; + else + return (struct StackTrace *) big_val(CDR(list_val(exc))); +} + +/* + * This does what the (misnamed) Beam instruction 'raise_ss' does, + * namely, a proper re-throw of an exception that was caught by 'try'. + */ +Eterm hipe_rethrow(Process *c_p, Eterm exc, Eterm value) +{ + c_p->fvalue = value; + if (c_p->freason == EXC_NULL) { + /* a safety check for the R10-0 case; should not happen */ + c_p->ftrace = NIL; + BIF_ERROR(c_p, EXC_ERROR); + } + /* For R10-0 code, 'exc' might be an atom. In that case, just + keep the existing c_p->ftrace. */ + switch (exc) { + case am_throw: + BIF_ERROR(c_p, (EXC_THROWN & ~EXF_SAVETRACE)); + break; + case am_error: + BIF_ERROR(c_p, (EXC_ERROR & ~EXF_SAVETRACE)); + break; + case am_exit: + BIF_ERROR(c_p, (EXC_EXIT & ~EXF_SAVETRACE)); + break; + default: + {/* R10-1 and later + XXX note: should do sanity check on given exception if it can be + passed from a user! Currently only expecting generated calls. + */ + struct StackTrace *s; + c_p->ftrace = exc; + s = get_trace_from_exc(exc); + if (s == NULL) { + BIF_ERROR(c_p, EXC_ERROR); + } else { + BIF_ERROR(c_p, PRIMARY_EXCEPTION(s->freason)); + } + } + } +} + +/* + * Support for compiled binary syntax operations. + */ + +char *hipe_bs_allocate(int len) +{ + Binary *bptr; + + bptr = erts_bin_nrml_alloc(len); + bptr->flags = 0; + bptr->orig_size = len; + erts_smp_atomic_init(&bptr->refc, 1); + return bptr->orig_bytes; +} + +Binary *hipe_bs_reallocate(Binary* oldbptr, int newsize) +{ + Binary *bptr; + + bptr = erts_bin_realloc(oldbptr, newsize); + bptr->orig_size = newsize; + return bptr; +} + +int hipe_bs_put_big_integer( +#ifdef ERTS_SMP + Process *p, +#endif + Eterm arg, Uint num_bits, byte* base, unsigned offset, unsigned flags) +{ + byte *save_bin_buf; + Uint save_bin_offset; + int res; + ERL_BITS_DEFINE_STATEP(p); + + save_bin_buf = erts_current_bin; + save_bin_offset = erts_bin_offset; + erts_current_bin = base; + erts_bin_offset = offset; + res = erts_new_bs_put_integer(ERL_BITS_ARGS_3(arg, num_bits, flags)); + erts_current_bin = save_bin_buf; + erts_bin_offset = save_bin_offset; + return res; +} + +int hipe_bs_put_small_float( + Process *p, + Eterm arg, Uint num_bits, byte* base, unsigned offset, unsigned flags) +{ + byte *save_bin_buf; + Uint save_bin_offset; + int res; + ERL_BITS_DEFINE_STATEP(p); + + save_bin_buf = erts_current_bin; + save_bin_offset = erts_bin_offset; + erts_current_bin = base; + erts_bin_offset = offset; + res = erts_new_bs_put_float(p, arg, num_bits, flags); + erts_current_bin = save_bin_buf; + erts_bin_offset = save_bin_offset; + return res; +} + +void hipe_bs_put_bits( + Eterm arg, Uint num_bits, byte* base, unsigned offset, unsigned flags) +{ + Uint Bitoffs, Bitsize; + byte *Bytep; + + ERTS_GET_BINARY_BYTES(arg, Bytep, Bitoffs, Bitsize); + erts_copy_bits(Bytep, Bitoffs, 1, base, offset, 1, num_bits); +} + +Eterm hipe_bs_utf8_size(Eterm arg) +{ + /* See beam_emu.c:OpCase(i_bs_utf8_size_sd): error handling + is delayed to the subsequent put_utf8 operation. */ + if (arg < make_small(0x80UL)) + return make_small(1); + else if (arg < make_small(0x800UL)) + return make_small(2); + else if (arg < make_small(0x10000UL)) + return make_small(3); + else + return make_small(4); +} + +Eterm hipe_bs_put_utf8(Process *p, Eterm arg, byte *base, unsigned int offset) +{ + byte *save_bin_buf; + Uint save_bin_offset; + int res; + unsigned int new_offset; + ERL_BITS_DEFINE_STATEP(p); + + save_bin_buf = erts_current_bin; + save_bin_offset = erts_bin_offset; + erts_current_bin = base; + erts_bin_offset = offset; + res = erts_bs_put_utf8(ERL_BITS_ARGS_1(arg)); + new_offset = erts_bin_offset; + erts_current_bin = save_bin_buf; + erts_bin_offset = save_bin_offset; + if (res == 0) + BIF_ERROR(p, BADARG); + return new_offset; +} + +Eterm hipe_bs_utf16_size(Eterm arg) +{ + /* See beam_emu.c:OpCase(i_bs_utf16_size_sd): error handling + is delayed to the subsequent put_utf16 operation. */ + if (arg >= make_small(0x10000UL)) + return make_small(4); + else + return make_small(2); +} + +/* This would have used standard_bif_interface_4, which doesn't exist. + * Instead we call it via wrappers for the two relevant cases: + * (flags & BSF_LITTLE) != 0 and (flags & BSF_LITTLE) == 0. + */ +static +Eterm hipe_bs_put_utf16(Process *p, Eterm arg, byte *base, unsigned int offset, Uint flags) +{ + byte *save_bin_buf; + Uint save_bin_offset; + int res; + unsigned int new_offset; + ERL_BITS_DEFINE_STATEP(p); + + save_bin_buf = erts_current_bin; + save_bin_offset = erts_bin_offset; + erts_current_bin = base; + erts_bin_offset = offset; + res = erts_bs_put_utf16(ERL_BITS_ARGS_2(arg, flags)); + new_offset = erts_bin_offset; + erts_current_bin = save_bin_buf; + erts_bin_offset = save_bin_offset; + if (res == 0) + BIF_ERROR(p, BADARG); + return new_offset; +} + +Eterm hipe_bs_put_utf16be(Process *p, Eterm arg, byte *base, unsigned int offset) +{ + return hipe_bs_put_utf16(p, arg, base, offset, 0); +} + +Eterm hipe_bs_put_utf16le(Process *p, Eterm arg, byte *base, unsigned int offset) +{ + return hipe_bs_put_utf16(p, arg, base, offset, BSF_LITTLE); +} + +static int validate_unicode(Eterm arg) +{ + if (is_not_small(arg) || + arg > make_small(0x10FFFFUL) || + (make_small(0xD800UL) <= arg && arg <= make_small(0xDFFFUL)) || + arg == make_small(0xFFFEUL) || + arg == make_small(0xFFFFUL)) + return 0; + return 1; +} + +Eterm hipe_bs_validate_unicode(Process *p, Eterm arg) +{ + if (!validate_unicode(arg)) + BIF_ERROR(p, BADARG); + return NIL; +} + +int hipe_bs_validate_unicode_retract(ErlBinMatchBuffer* mb, Eterm arg) +{ + if (!validate_unicode(arg)) { + mb->offset -= 32; + return 0; + } + return 1; +} + +/* This is like the loop_rec_fr BEAM instruction + */ +Eterm hipe_check_get_msg(Process *c_p) +{ + Eterm ret; + ErlMessage *msgp; + + next_message: + + msgp = PEEK_MESSAGE(c_p); + + if (!msgp) { +#ifdef ERTS_SMP + erts_smp_proc_lock(c_p, ERTS_PROC_LOCKS_MSG_RECEIVE); + /* Make sure messages wont pass exit signals... */ + if (ERTS_PROC_PENDING_EXIT(c_p)) { + erts_smp_proc_unlock(c_p, ERTS_PROC_LOCKS_MSG_RECEIVE); + return THE_NON_VALUE; /* Will be rescheduled for exit */ + } + ERTS_SMP_MSGQ_MV_INQ2PRIVQ(c_p); + msgp = PEEK_MESSAGE(c_p); + if (msgp) + erts_smp_proc_unlock(c_p, ERTS_PROC_LOCKS_MSG_RECEIVE); + else { + /* XXX: BEAM doesn't need this */ + c_p->hipe_smp.have_receive_locks = 1; +#endif + return THE_NON_VALUE; +#ifdef ERTS_SMP + } +#endif + } + ErtsMoveMsgAttachmentIntoProc(msgp, c_p, c_p->stop, HEAP_TOP(c_p), + c_p->fcalls, (void) 0, (void) 0); + ret = ERL_MESSAGE_TERM(msgp); + if (is_non_value(ret)) { + /* + * A corrupt distribution message that we weren't able to decode; + * remove it... + */ + ASSERT(!msgp->data.attached); + UNLINK_MESSAGE(c_p, msgp); + free_message(msgp); + goto next_message; + } + return ret; +} + +/* + * SMP-specific stuff + */ +#ifdef ERTS_SMP + +/* + * This is like the timeout BEAM instruction. + */ +void hipe_clear_timeout(Process *c_p) +{ + /* + * A timeout has occurred. Reset the save pointer so that the next + * receive statement will examine the first message first. + */ +#ifdef ERTS_SMP + /* XXX: BEAM has different entries for the locked and unlocked + cases. HiPE doesn't, so we must check dynamically. */ + if (c_p->hipe_smp.have_receive_locks) { + c_p->hipe_smp.have_receive_locks = 0; + erts_smp_proc_unlock(c_p, ERTS_PROC_LOCKS_MSG_RECEIVE); + } +#endif + if (IS_TRACED_FL(c_p, F_TRACE_RECEIVE)) { + trace_receive(c_p, am_timeout); + } + c_p->flags &= ~F_TIMO; + JOIN_MESSAGE(c_p); +} + +void hipe_atomic_inc(int *counter) +{ + erts_smp_atomic_inc((erts_smp_atomic_t*)counter); +} + +#endif diff --git a/erts/emulator/hipe/hipe_native_bif.h b/erts/emulator/hipe/hipe_native_bif.h new file mode 100644 index 0000000000..3b55b64a41 --- /dev/null +++ b/erts/emulator/hipe/hipe_native_bif.h @@ -0,0 +1,121 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2001-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + * hipe_native_bif.h + */ + +#ifndef HIPE_NATIVE_BIF_H +#define HIPE_NATIVE_BIF_H + +#include "hipe_arch.h" + +/* + * Prototypes for entry points used by native code. + */ +AEXTERN(Eterm,nbif_callemu,(void)); +AEXTERN(int,nbif_suspend_0,(void)); /* caller ignores retval */ +AEXTERN(int,nbif_suspend_msg,(void)); +AEXTERN(int,nbif_suspend_msg_timeout,(void)); + +AEXTERN(Eterm,nbif_rethrow,(Process*, Eterm, Eterm)); +AEXTERN(Eterm,nbif_set_timeout,(Process*, Eterm)); + +AEXTERN(Eterm,nbif_gc_1,(void)); + +AEXTERN(Eterm,nbif_apply,(void)); +AEXTERN(Eterm,nbif_find_na_or_make_stub,(void)); +AEXTERN(Eterm,nbif_nonclosure_address,(void)); + +AEXTERN(Eterm,nbif_add_2,(void)); +AEXTERN(Eterm,nbif_sub_2,(void)); +AEXTERN(Eterm,nbif_mul_2,(void)); + +AEXTERN(Eterm,nbif_conv_big_to_float,(void)); +AEXTERN(void,nbif_fclearerror_error,(Process*)); + +AEXTERN(int,nbif_bs_put_big_integer,(void)); +AEXTERN(int,nbif_bs_put_small_float,(void)); +AEXTERN(void,nbif_bs_put_bits,(void)); +AEXTERN(Eterm,nbif_bs_get_integer_2,(void)); +AEXTERN(Eterm,nbif_bs_get_float_2,(void)); +AEXTERN(Eterm,nbif_bs_get_binary_2,(void)); +AEXTERN(char*,nbif_bs_allocate,(void)); +AEXTERN(Binary*,nbif_bs_reallocate,(void)); +AEXTERN(Eterm,nbif_bs_utf8_size,(Eterm)); +AEXTERN(Eterm,nbif_bs_put_utf8,(Process*,Eterm,byte*,unsigned int)); +AEXTERN(Eterm,nbif_bs_get_utf8,(void)); +AEXTERN(Eterm,nbif_bs_utf16_size,(Eterm)); +AEXTERN(Eterm,nbif_bs_put_utf16be,(Process*,Eterm,byte*,unsigned int)); +AEXTERN(Eterm,nbif_bs_put_utf16le,(Process*,Eterm,byte*,unsigned int)); +AEXTERN(Eterm,nbif_bs_get_utf16,(void)); +AEXTERN(Eterm,nbif_bs_validate_unicode,(Process*,Eterm)); +AEXTERN(Eterm,nbif_bs_validate_unicode_retract,(void)); + +AEXTERN(void,nbif_select_msg,(Process*)); +AEXTERN(Eterm,nbif_cmp_2,(void)); +AEXTERN(Eterm,nbif_eq_2,(void)); + +Eterm hipe_nonclosure_address(Process*, Eterm, Uint); +Eterm hipe_conv_big_to_float(Process*, Eterm); +void hipe_fclearerror_error(Process*); +void hipe_select_msg(Process*); +void hipe_gc(Process*, Eterm); +Eterm hipe_set_timeout(Process*, Eterm); +void hipe_handle_exception(Process*); +Eterm hipe_rethrow(Process *c_p, Eterm exc, Eterm value); +char *hipe_bs_allocate(int); +Binary *hipe_bs_reallocate(Binary*, int); +int hipe_bs_put_small_float(Process*, Eterm, Uint, byte*, unsigned, unsigned); +void hipe_bs_put_bits(Eterm, Uint, byte*, unsigned, unsigned); +Eterm hipe_bs_utf8_size(Eterm); +Eterm hipe_bs_put_utf8(Process*, Eterm, byte*, unsigned int); +Eterm hipe_bs_utf16_size(Eterm); +Eterm hipe_bs_put_utf16be(Process*, Eterm, byte*, unsigned int); +Eterm hipe_bs_put_utf16le(Process*, Eterm, byte*, unsigned int); +Eterm hipe_bs_validate_unicode(Process*, Eterm); +struct erl_bin_match_buffer; +int hipe_bs_validate_unicode_retract(struct erl_bin_match_buffer*, Eterm); + +/* + * Stuff that is different in SMP and non-SMP. + */ +#ifdef ERTS_SMP +int hipe_bs_put_big_integer(Process*, Eterm, Uint, byte*, unsigned, unsigned); +#else +int hipe_bs_put_big_integer(Eterm, Uint, byte*, unsigned, unsigned); +#endif + +AEXTERN(Eterm,nbif_check_get_msg,(Process*)); +Eterm hipe_check_get_msg(Process*); + +/* + * SMP-specific stuff + */ +#ifdef ERTS_SMP +AEXTERN(void,nbif_atomic_inc,(void)); +AEXTERN(void,nbif_clear_timeout,(Process*)); +void hipe_atomic_inc(int*); +void hipe_clear_timeout(Process*); +#endif + +#define BIF_LIST(M,F,A,C,I) AEXTERN(Eterm,nbif_##C,(void)); +#include "erl_bif_list.h" +#undef BIF_LIST + +#endif /* HIPE_NATIVE_BIF_H */ diff --git a/erts/emulator/hipe/hipe_ops.tab b/erts/emulator/hipe/hipe_ops.tab new file mode 100644 index 0000000000..eb6f824d1c --- /dev/null +++ b/erts/emulator/hipe/hipe_ops.tab @@ -0,0 +1,25 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 2001-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# + +hipe_trap_call +hipe_trap_call_closure +hipe_trap_return +hipe_trap_throw +hipe_trap_resume +hipe_call_count diff --git a/erts/emulator/hipe/hipe_perfctr.c b/erts/emulator/hipe/hipe_perfctr.c new file mode 100644 index 0000000000..69bb648854 --- /dev/null +++ b/erts/emulator/hipe/hipe_perfctr.c @@ -0,0 +1,229 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2004-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#include "sys.h" +#include "error.h" +#include "global.h" +#include "bif.h" +#include "big.h" +#include "erl_binary.h" +#include "hipe_perfctr.h" +#include "libperfctr.h" + +static struct vperfctr *vperfctr; +static unsigned int have_rdtsc; +static double tsc_to_ms; +static unsigned int tsc_on; /* control calls must set tsc_on if have_rdtsc is true */ +static unsigned int nractrs; +static unsigned int users; +#define USER_BIFS (1<<0) +#define USER_HRVTIME (1<<1) + +static int hipe_perfctr_open(unsigned int user) +{ + struct perfctr_info info; + + if (!vperfctr) { + vperfctr = vperfctr_open(); + if (!vperfctr) + return -1; + if (vperfctr_info(vperfctr, &info) >= 0) { + tsc_to_ms = (double)(info.tsc_to_cpu_mult ? : 1) / (double)info.cpu_khz; + have_rdtsc = (info.cpu_features & PERFCTR_FEATURE_RDTSC) ? 1 : 0; + } + tsc_on = 0; + nractrs = 0; + } + users |= user; + return 0; +} + +static void hipe_perfctr_reset(void) +{ + struct vperfctr_control control; + + memset(&control, 0, sizeof control); + if (have_rdtsc) + control.cpu_control.tsc_on = 1; + nractrs = 0; + if (vperfctr_control(vperfctr, &control) >= 0) + tsc_on = 1; +} + +static void hipe_perfctr_close(unsigned int user) +{ + if (!vperfctr) + return; + users &= ~user; + switch (users) { + case 0: + vperfctr_unlink(vperfctr); + vperfctr_close(vperfctr); + vperfctr = NULL; + tsc_on = 0; + nractrs = 0; + break; + case USER_HRVTIME: + hipe_perfctr_reset(); + } +} + +/* + * Interface for HiPE's hrvtime code. + */ + +int hipe_perfctr_hrvtime_open(void) +{ + if (hipe_perfctr_open(USER_HRVTIME) < 0) + return -1; + if (have_rdtsc) { + if (!tsc_on) + hipe_perfctr_reset(); /* note: updates tsc_on */ + if (tsc_on) + return 0; + } + hipe_perfctr_hrvtime_close(); + return -1; +} + +void hipe_perfctr_hrvtime_close(void) +{ + hipe_perfctr_close(USER_HRVTIME); +} + +double hipe_perfctr_hrvtime_get(void) +{ + return (double)vperfctr_read_tsc(vperfctr) * tsc_to_ms; +} + +/* + * BIF interface for user-programmable performance counters. + */ + +BIF_RETTYPE hipe_bifs_vperfctr_open_0(BIF_ALIST_0) +{ + if (hipe_perfctr_open(USER_BIFS) < 0) + BIF_RET(am_false); /* arity 0 BIFs can't fail :-( */ + BIF_RET(am_true); +} + +BIF_RETTYPE hipe_bifs_vperfctr_close_0(BIF_ALIST_0) +{ + hipe_perfctr_close(USER_BIFS); + BIF_RET(NIL); +} + +static Eterm ull_to_integer(unsigned long long x, Process *p) +{ + unsigned long long tmpx; + unsigned int ds, i; + size_t sz; + Eterm *hp; + ErtsDigit *xp; + + if (x <= (unsigned long long)MAX_SMALL) + return make_small(x); + + /* Calculate number of digits. */ + ds = 0; + tmpx = x; + do { + ++ds; + tmpx = (tmpx >> (D_EXP / 2)) >> (D_EXP / 2); + } while (tmpx != 0); + + sz = BIG_NEED_SIZE(ds); /* number of words including arity */ + hp = HAlloc(p, sz); + *hp = make_pos_bignum_header(sz-1); + + xp = (ErtsDigit*)(hp+1); + i = 0; + do { + xp[i++] = (ErtsDigit)x; + x = (x >> (D_EXP / 2)) >> (D_EXP / 2); + } while (i < ds); + while (i & (BIG_DIGITS_PER_WORD-1)) + xp[i++] = 0; + + return make_big(hp); +} + +BIF_RETTYPE hipe_bifs_vperfctr_info_0(BIF_ALIST_0) +{ + struct perfctr_info info; + + if (!vperfctr || vperfctr_info(vperfctr, &info) < 0) + BIF_RET(am_false); /* arity 0 BIFs can't fail :-( */ + BIF_RET(new_binary(BIF_P, (void*)&info, sizeof info)); +} + +BIF_RETTYPE hipe_bifs_vperfctr_read_tsc_0(BIF_ALIST_0) +{ + unsigned long long val; + + if (!vperfctr || !tsc_on) + BIF_RET(am_false); /* arity 0 BIFs can't fail :-( */ + val = vperfctr_read_tsc(vperfctr); + BIF_RET(ull_to_integer(val, BIF_P)); +} + +BIF_RETTYPE hipe_bifs_vperfctr_read_pmc_1(BIF_ALIST_1) +{ + Uint pmc; + unsigned long long val; + + if (!vperfctr || + is_not_small(BIF_ARG_1) || + (pmc = unsigned_val(BIF_ARG_1), pmc >= nractrs)) + BIF_RET(am_false); /* for consistency with the arity 0 BIFs */ + val = vperfctr_read_pmc(vperfctr, pmc); + BIF_RET(ull_to_integer(val, BIF_P)); +} + +BIF_RETTYPE hipe_bifs_vperfctr_control_1(BIF_ALIST_1) +{ + void *bytes; + struct vperfctr_control control; + Uint bitoffs; + Uint bitsize; + + if (!vperfctr) + BIF_ERROR(BIF_P, BADARG); + if (is_not_binary(BIF_ARG_1)) + BIF_ERROR(BIF_P, BADARG); + if (binary_size(BIF_ARG_1) != sizeof control) + BIF_ERROR(BIF_P, BADARG); + ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); + ASSERT(bitoffs == 0); + ASSERT(bitsize == 0); + memcpy(&control, bytes, sizeof control); + if (have_rdtsc) + control.cpu_control.tsc_on = 1; + if (vperfctr_control(vperfctr, &control) < 0) { + hipe_perfctr_reset(); + BIF_ERROR(BIF_P, BADARG); + } + tsc_on = control.cpu_control.tsc_on; + nractrs = control.cpu_control.nractrs; + BIF_RET(NIL); +} diff --git a/erts/emulator/hipe/hipe_perfctr.h b/erts/emulator/hipe/hipe_perfctr.h new file mode 100644 index 0000000000..7b20c68cac --- /dev/null +++ b/erts/emulator/hipe/hipe_perfctr.h @@ -0,0 +1,24 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2004-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + */ + +extern int hipe_perfctr_hrvtime_open(void); +extern void hipe_perfctr_hrvtime_close(void); +extern double hipe_perfctr_hrvtime_get(void); diff --git a/erts/emulator/hipe/hipe_perfctr.tab b/erts/emulator/hipe/hipe_perfctr.tab new file mode 100644 index 0000000000..663522f85e --- /dev/null +++ b/erts/emulator/hipe/hipe_perfctr.tab @@ -0,0 +1,26 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 2004-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# +# $Id$ + +bif hipe_bifs:vperfctr_open/0 +bif hipe_bifs:vperfctr_close/0 +bif hipe_bifs:vperfctr_info/0 +bif hipe_bifs:vperfctr_read_tsc/0 +bif hipe_bifs:vperfctr_read_pmc/1 +bif hipe_bifs:vperfctr_control/1 diff --git a/erts/emulator/hipe/hipe_ppc.c b/erts/emulator/hipe/hipe_ppc.c new file mode 100644 index 0000000000..3a0beedb68 --- /dev/null +++ b/erts/emulator/hipe/hipe_ppc.c @@ -0,0 +1,487 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2004-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + */ +#include <stddef.h> /* offsetof() */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#include "global.h" +#include "erl_binary.h" +#include <sys/mman.h> + +#include "hipe_arch.h" +#include "hipe_native_bif.h" /* nbif_callemu() */ +#include "hipe_bif0.h" + +#if !defined(__powerpc64__) +const unsigned int fconv_constant[2] = { 0x43300000, 0x80000000 }; +#endif + +/* Flush dcache and invalidate icache for a range of addresses. */ +void hipe_flush_icache_range(void *address, unsigned int nbytes) +{ + const unsigned int L1_CACHE_SHIFT = 5; + const unsigned long L1_CACHE_BYTES = 1 << L1_CACHE_SHIFT; + unsigned long start, p; + unsigned int nlines, n; + + if (!nbytes) + return; + + start = (unsigned long)address & ~(L1_CACHE_BYTES-1); + nlines = + (((unsigned long)address & (L1_CACHE_BYTES-1)) + + nbytes + + (L1_CACHE_BYTES-1)) >> L1_CACHE_SHIFT; + + p = start; + n = nlines; + do { + asm volatile("dcbst 0,%0" : : "r"(p) : "memory"); + p += L1_CACHE_BYTES; + } while (--n != 0); + asm volatile("sync"); + p = start; + n = nlines; + do { + asm volatile("icbi 0,%0" : : "r"(p) : "memory"); + p += L1_CACHE_BYTES; + } while (--n != 0); + asm volatile("sync\n\tisync"); +} + +/* + * Management of 32MB code segments for regular code and trampolines. + */ + +#define SEGMENT_NRBYTES (32*1024*1024) /* named constant, _not_ a tunable */ + +static struct segment { + unsigned int *base; /* [base,base+32MB[ */ + unsigned int *code_pos; /* INV: base <= code_pos <= tramp_pos */ + unsigned int *tramp_pos; /* INV: tramp_pos <= base+32MB */ +} curseg; + +#define in_area(ptr,start,nbytes) \ + ((unsigned long)((char*)(ptr) - (char*)(start)) < (nbytes)) + +/* Darwin breakage */ +#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) +#define MAP_ANONYMOUS MAP_ANON +#endif + +#if defined(__powerpc64__) +static void *new_code_mapping(void) +{ + char *map_hint, *map_start; + + /* + * Allocate a new 32MB code segment in the low 2GB of the address space. + * + * This is problematic for several reasons: + * - Linux/ppc64 lacks the MAP_32BIT flag that Linux/x86-64 has. + * - The address space hint to mmap is only respected if that + * area is available. If it isn't, then mmap falls back to its + * defaults, which (according to testing) results in very high + * (and thus useless for us) addresses being returned. + * - Another mapping, presumably the brk, also occupies low addresses. + * + * As initial implementation, simply start allocating at the 0.5GB + * boundary. This leaves plenty of space for the brk before malloc + * needs to switch to mmap, while allowing for 1.5GB of code. + * + * A more robust implementation would be to parse /proc/self/maps, + * reserve all available space between (say) 0.5GB and 2GB with + * PROT_NONE MAP_NORESERVE mappings, and then allocate by releasing + * 32MB segments and re-mapping them properly. This would work on + * Linux/ppc64, I have no idea how things should be done on Darwin64. + */ + if (curseg.base) + map_hint = (char*)curseg.base + SEGMENT_NRBYTES; + else + map_hint = (char*)(512*1024*1024); /* 0.5GB */ + map_start = mmap(map_hint, SEGMENT_NRBYTES, + PROT_EXEC|PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, + -1, 0); + if (map_start != MAP_FAILED && + (((unsigned long)map_start + (SEGMENT_NRBYTES-1)) & ~0x7FFFFFFFUL)) { + fprintf(stderr, "mmap with hint %p returned code memory %p\r\n", map_hint, map_start); + abort(); + } + return map_start; +} +#else +static void *new_code_mapping(void) +{ + return mmap(0, SEGMENT_NRBYTES, + PROT_EXEC|PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, + -1, 0); +} +#endif + +static int check_callees(Eterm callees) +{ + Eterm *tuple; + Uint arity; + Uint i; + + if (is_not_tuple(callees)) + return -1; + tuple = tuple_val(callees); + arity = arityval(tuple[0]); + for (i = 1; i <= arity; ++i) { + Eterm mfa = tuple[i]; + if (is_not_tuple(mfa) || + tuple_val(mfa)[0] != make_arityval(3) || + is_not_atom(tuple_val(mfa)[1]) || + is_not_atom(tuple_val(mfa)[2]) || + is_not_small(tuple_val(mfa)[3]) || + unsigned_val(tuple_val(mfa)[3]) > 255) + return -1; + } + return arity; +} + +static unsigned int *try_alloc(Uint nrwords, int nrcallees, Eterm callees, unsigned int **trampvec) +{ + unsigned int *base, *address, *tramp_pos, nrfreewords; + int trampnr; + + tramp_pos = curseg.tramp_pos; + address = curseg.code_pos; + nrfreewords = tramp_pos - address; + if (nrwords > nrfreewords) + return NULL; + curseg.code_pos = address + nrwords; + nrfreewords -= nrwords; + + base = curseg.base; + for (trampnr = 1; trampnr <= nrcallees; ++trampnr) { + Eterm mfa = tuple_val(callees)[trampnr]; + Eterm m = tuple_val(mfa)[1]; + Eterm f = tuple_val(mfa)[2]; + unsigned int a = unsigned_val(tuple_val(mfa)[3]); + unsigned int *trampoline = hipe_mfa_get_trampoline(m, f, a); + if (!in_area(trampoline, base, SEGMENT_NRBYTES)) { + if (nrfreewords < 4) + return NULL; + nrfreewords -= 4; + tramp_pos = trampoline = tramp_pos - 4; +#if defined(__powerpc64__) + trampoline[0] = 0x3D600000; /* addis r11,0,0 */ + trampoline[1] = 0x616B0000; /* ori r11,r11,0 */ +#else + trampoline[0] = 0x39600000; /* addi r11,r0,0 */ + trampoline[1] = 0x3D6B0000; /* addis r11,r11,0 */ +#endif + trampoline[2] = 0x7D6903A6; /* mtctr r11 */ + trampoline[3] = 0x4E800420; /* bctr */ + hipe_flush_icache_range(trampoline, 4*sizeof(int)); + hipe_mfa_set_trampoline(m, f, a, trampoline); + } + trampvec[trampnr-1] = trampoline; + } + curseg.tramp_pos = tramp_pos; + return address; +} + +void *hipe_alloc_code(Uint nrbytes, Eterm callees, Eterm *trampolines, Process *p) +{ + Uint nrwords; + int nrcallees; + Eterm trampvecbin; + unsigned int **trampvec; + unsigned int *address; + unsigned int *base; + struct segment oldseg; + + if (nrbytes & 0x3) + return NULL; + nrwords = nrbytes >> 2; + + nrcallees = check_callees(callees); + if (nrcallees < 0) + return NULL; + trampvecbin = new_binary(p, NULL, nrcallees*sizeof(unsigned int*)); + trampvec = (unsigned int**)binary_bytes(trampvecbin); + + address = try_alloc(nrwords, nrcallees, callees, trampvec); + if (!address) { + base = new_code_mapping(); + if (base == MAP_FAILED) + return NULL; + oldseg = curseg; + curseg.base = base; + curseg.code_pos = base; + curseg.tramp_pos = (unsigned int*)((char*)base + SEGMENT_NRBYTES); + + address = try_alloc(nrwords, nrcallees, callees, trampvec); + if (!address) { + munmap(base, SEGMENT_NRBYTES); + curseg = oldseg; + return NULL; + } + /* commit to new segment, ignore leftover space in old segment */ + } + *trampolines = trampvecbin; + return address; +} + +static unsigned int *alloc_stub(Uint nrwords) +{ + unsigned int *address; + unsigned int *base; + struct segment oldseg; + + address = try_alloc(nrwords, 0, NIL, NULL); + if (!address) { + base = new_code_mapping(); + if (base == MAP_FAILED) + return NULL; + oldseg = curseg; + curseg.base = base; + curseg.code_pos = base; + curseg.tramp_pos = (unsigned int*)((char*)base + SEGMENT_NRBYTES); + + address = try_alloc(nrwords, 0, NIL, NULL); + if (!address) { + munmap(base, SEGMENT_NRBYTES); + curseg = oldseg; + return NULL; + } + /* commit to new segment, ignore leftover space in old segment */ + } + return address; +} + +static void patch_imm16(Uint32 *address, unsigned int imm16) +{ + unsigned int insn = *address; + *address = (insn & ~0xFFFF) | (imm16 & 0xFFFF); + hipe_flush_icache_word(address); +} + +#if defined(__powerpc64__) +static void patch_li64(Uint32 *address, Uint64 value) +{ + patch_imm16(address+0, value >> 48);/* addis r,0,value@highest */ + patch_imm16(address+1, value >> 32);/* ori r,r,value@higher */ + /* sldi r,r,32 */ + patch_imm16(address+3, value >> 16);/* oris r,r,value@h */ + patch_imm16(address+4, value); /* ori r,r,value@l */ +} + +static int patch_li31(Uint32 *address, Uint32 value) +{ + if ((value >> 31) != 0) + return -1; + patch_imm16(address, value >> 16); /* addis r,0,value@h */ + patch_imm16(address+1, value); /* ori r,r,value@l */ +} + +void hipe_patch_load_fe(Uint *address, Uint value) +{ + patch_li64((Uint32*)address, value); +} + +int hipe_patch_insn(void *address, Uint64 value, Eterm type) +{ + switch (type) { + case am_closure: + case am_constant: + patch_li64((Uint32*)address, value); + return 0; + case am_atom: + case am_c_const: + return patch_li31((Uint32*)address, value); + default: + return -1; + } +} + +void *hipe_make_native_stub(void *beamAddress, unsigned int beamArity) +{ + unsigned int *code; + + if ((unsigned long)&nbif_callemu & ~0x01FFFFFCUL) + abort(); + + code = alloc_stub(7); + + /* addis r12,0,beamAddress@highest */ + code[0] = 0x3d800000 | (((unsigned long)beamAddress >> 48) & 0xffff); + /* ori r12,r12,beamAddress@higher */ + code[1] = 0x618c0000 | (((unsigned long)beamAddress >> 32) & 0xffff); + /* sldi r12,r12,32 (rldicr r12,r12,32,31) */ + code[2] = 0x798c07c6; + /* oris r12,r12,beamAddress@h */ + code[3] = 0x658c0000 | (((unsigned long)beamAddress >> 16) & 0xffff); + /* ori r12,r12,beamAddress@l */ + code[4] = 0x618c0000 | ((unsigned long)beamAddress & 0xffff); + /* addi r0,0,beamArity */ + code[5] = 0x38000000 | (beamArity & 0x7FFF); + /* ba nbif_callemu */ + code[6] = 0x48000002 | (unsigned long)&nbif_callemu; + + hipe_flush_icache_range(code, 7*sizeof(int)); + + return code; +} +#else /* !__powerpc64__ */ +/* + * To load a 32-bit immediate value 'val' into Rd (Rd != R0): + * + * addi Rd, 0, val@l // val & 0xFFFF + * addis Rd, Rd, val@ha // ((val + 0x8000) >> 16) & 0xFFFF + * + * The first addi sign-extends the low 16 bits, so if + * val&(1<<15), the high portion of Rd will be -1 not 0. + * val@ha compensates by adding 1 if val&(1<<15). + */ +static unsigned int at_ha(unsigned int val) +{ + return ((val + 0x8000) >> 16) & 0xFFFF; +} + +static void patch_li(Uint32 *address, Uint32 value) +{ + patch_imm16(address, value); + patch_imm16(address+1, at_ha(value)); +} + +void hipe_patch_load_fe(Uint32 *address, Uint value) +{ + patch_li(address, value); +} + +int hipe_patch_insn(void *address, Uint32 value, Eterm type) +{ + switch (type) { + case am_closure: + case am_constant: + case am_atom: + case am_c_const: + break; + default: + return -1; + } + patch_li((Uint32*)address, value); + return 0; +} + +/* called from hipe_bif0.c:hipe_bifs_make_native_stub_2() + and hipe_bif0.c:hipe_make_stub() */ +void *hipe_make_native_stub(void *beamAddress, unsigned int beamArity) +{ + unsigned int *code; + + /* + * Native code calls BEAM via a stub looking as follows: + * + * addi r12,0,beamAddress@l + * addi r0,0,beamArity + * addis r12,r12,beamAddress@ha + * ba nbif_callemu + * + * I'm using r0 and r12 since the standard SVR4 ABI allows + * them to be modified during function linkage. Trampolines + * (for b/bl to distant targets) may modify r11. + * + * The runtime system code is linked completely below the + * 32MB address boundary. Hence the branch to nbif_callemu + * is done with a 'ba' instruction. + */ + + /* verify that 'ba' can reach nbif_callemu */ + if ((unsigned long)&nbif_callemu & ~0x01FFFFFCUL) + abort(); + + code = alloc_stub(4); + + /* addi r12,0,beamAddress@l */ + code[0] = 0x39800000 | ((unsigned long)beamAddress & 0xFFFF); + /* addi r0,0,beamArity */ + code[1] = 0x38000000 | (beamArity & 0x7FFF); + /* addis r12,r12,beamAddress@ha */ + code[2] = 0x3D8C0000 | at_ha((unsigned long)beamAddress); + /* ba nbif_callemu */ + code[3] = 0x48000002 | (unsigned long)&nbif_callemu; + + hipe_flush_icache_range(code, 4*sizeof(int)); + + return code; +} +#endif /* !__powerpc64__ */ + +static void patch_b(Uint32 *address, Sint32 offset, Uint32 AA) +{ + Uint32 oldI = *address; + Uint32 newI = (oldI & 0xFC000001) | ((offset & 0x00FFFFFF) << 2) | (AA & 2); + *address = newI; + hipe_flush_icache_word(address); +} + +int hipe_patch_call(void *callAddress, void *destAddress, void *trampoline) +{ + if ((Uint32)destAddress == ((Uint32)destAddress & 0x01FFFFFC)) { + /* The destination is in the [0,32MB[ range. + We can reach it with a ba/bla instruction. + This is the typical case for BIFs and primops. + It's also common for trap-to-BEAM stubs (on ppc32). */ + patch_b((Uint32*)callAddress, (Uint32)destAddress >> 2, 2); + } else { + Sint32 destOffset = ((Sint32)destAddress - (Sint32)callAddress) >> 2; + if (destOffset >= -0x800000 && destOffset <= 0x7FFFFF) { + /* The destination is within a [-32MB,+32MB[ range from us. + We can reach it with a b/bl instruction. + This is typical for nearby Erlang code. */ + patch_b((Uint32*)callAddress, destOffset, 0); + } else { + /* The destination is too distant for b/bl/ba/bla. + Must do a b/bl to the trampoline. */ + Sint32 trampOffset = ((Sint32)trampoline - (Sint32)callAddress) >> 2; + if (trampOffset >= -0x800000 && trampOffset <= 0x7FFFFF) { + /* Update the trampoline's address computation. + (May be redundant, but we can't tell.) */ +#if defined(__powerpc64__) + /* This relies on the fact that we allocate code below 2GB. */ + patch_li31((Uint32*)trampoline, (Uint32)destAddress); +#else + patch_li((Uint32*)trampoline, (Uint32)destAddress); +#endif + /* Update this call site. */ + patch_b((Uint32*)callAddress, trampOffset, 0); + } else + return -1; + } + } + return 0; +} + +void hipe_arch_print_pcb(struct hipe_process_state *p) +{ +#define U(n,x) \ + printf(" % 4d | %s | 0x%0*lx | %*s |\r\n", (int)offsetof(struct hipe_process_state,x), n, 2*(int)sizeof(long), (unsigned long)p->x, 2+2*(int)sizeof(long), "") + U("nra ", nra); + U("narity ", narity); +#undef U +} diff --git a/erts/emulator/hipe/hipe_ppc.h b/erts/emulator/hipe/hipe_ppc.h new file mode 100644 index 0000000000..e30ce30ed2 --- /dev/null +++ b/erts/emulator/hipe/hipe_ppc.h @@ -0,0 +1,67 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2004-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + */ +#ifndef HIPE_PPC_H +#define HIPE_PPC_H + +static __inline__ void hipe_flush_icache_word(void *address) +{ + asm volatile("dcbst 0,%0\n" + "\tsync\n" + "\ticbi 0,%0\n" + "\tsync\n" + "\tisync" + : + : "r"(address) + : "memory"); +} + +extern void hipe_flush_icache_range(void *address, unsigned int nbytes); + +/* for stack descriptor hash lookup */ +#define HIPE_RA_LSR_COUNT 2 /* low 2 bits are always zero */ + +/* for hipe_bifs_{read,write}_{s,u}32 */ +static __inline__ int hipe_word32_address_ok(void *address) +{ + return ((unsigned long)address & 0x3) == 0; +} + +/* Native stack growth direction. */ +#define HIPE_NSTACK_GROWS_DOWN + +#if defined(__powerpc64__) +#define hipe_arch_name am_ppc64 +#define AEXTERN(RET,NAME,PROTO) extern const int NAME +AEXTERN(void,hipe_ppc_inc_stack,(void)); +#else +#define hipe_arch_name am_powerpc +extern void hipe_ppc_inc_stack(void); /* we don't have the AEXTERN() fallback :-( */ +#endif + +/* for hipe_bifs_enter_code_2 */ +extern void *hipe_alloc_code(Uint nrbytes, Eterm callees, Eterm *trampolines, Process *p); +#define HIPE_ALLOC_CODE(n,c,t,p) hipe_alloc_code((n),(c),(t),(p)) + +#if !defined(__powerpc64__) +extern const unsigned int fconv_constant[]; +#endif + +#endif /* HIPE_PPC_H */ diff --git a/erts/emulator/hipe/hipe_ppc.tab b/erts/emulator/hipe/hipe_ppc.tab new file mode 100644 index 0000000000..a32dd820e7 --- /dev/null +++ b/erts/emulator/hipe/hipe_ppc.tab @@ -0,0 +1,24 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 2004-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# +# $Id$ +# PowerPC-specific atoms + +atom fconv_constant +atom inc_stack_0 +atom powerpc diff --git a/erts/emulator/hipe/hipe_ppc64.tab b/erts/emulator/hipe/hipe_ppc64.tab new file mode 100644 index 0000000000..513182721c --- /dev/null +++ b/erts/emulator/hipe/hipe_ppc64.tab @@ -0,0 +1,23 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 2005-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# +# $Id$ +# PPC64-specific atoms + +atom inc_stack_0 +atom ppc64 diff --git a/erts/emulator/hipe/hipe_ppc_abi.txt b/erts/emulator/hipe/hipe_ppc_abi.txt new file mode 100644 index 0000000000..4bf41e02b2 --- /dev/null +++ b/erts/emulator/hipe/hipe_ppc_abi.txt @@ -0,0 +1,138 @@ + + %CopyrightBegin% + %CopyrightEnd% + +$Id$ + +HiPE PowerPC ABI +================ +This document describes aspects of HiPE's runtime system +that are specific for the 32 and 64-bit PowerPC architectures. + +Register Usage +-------------- +r1, r2, and r13 are reserved for the C runtime system. + +r29-r31 are fixed (unallocatable). +r29 (HP) is the current process' heap pointer. +r30 (NSP) is the current process' native stack pointer. +r31 (P) is the current process' "Process" pointer. + +r0, r3-r12, and r14-r28 are caller-save. They are used as temporary +scratch registers and for function call parameters and results. + +The runtime system uses temporaries in specific contexts: +r28 (TEMP_LR) is used to preserve LR around BIF calls. +r27 (TEMP_ARG0) is used to preserve the return value in nbif_stack_trap_ra, +and LR in hipe_ppc_inc_stack (the caller saved its LR in TEMP_LR). +r12 is used to pass the callee address in native-to-BEAM traps. +r11 is used to construct callee addresses in calls via trampolines. +r4 (ARG0) is used for MBUF-after-BIF checks, for storing the +arity of a BIF that throws an exception or does GC due to MBUF, +and for checking P->flags for pending timeout. +r3 is used to inspect the type of a thrown exception, and to +return a result token from glue.S back to hipe_mode_switch(). +r0 is used to pass the callee arity in native-to-BEAM traps. + +LR and CTR are caller-save. + +Calling Convention +------------------ +The first NR_ARG_REGS parameters (a tunable parameter between 0 and 7, +inclusive) are passed in r4-r10. + +r3 is not used for parameter passing. This allows the BIF wrappers to +simply move P to r3 without shifting the remaining parameter registers. + +r0/r11/r12 are not used for parameter passing since they may be modified +during function linkage. + +The return value from a function is placed in r3. + +Stack Frame Layout +------------------ +[From top to bottom: formals in left-to-right order, incoming return +address, fixed-size chunk for locals & spills, variable-size area +for actuals, outgoing return address. NSP normally points at the +bottom of the fixed-size chunk, except during a recursive call. +The callee pops the actuals, so no NSP adjustment at return.] + +Stack Descriptors +----------------- +sdesc_fsize() is the frame size excluding the return address word. + +Standard Linux PowerPC Calling Conventions (32-bit) +=================================================== + +Reg Status Role +--- ------ ---- +r0 calleR-save volatile + may be modified during function linkage + r0 cannot be base reg in load/store insns +r1 calleE-save stack pointer, 16-byte aligned, must point + to valid frame with link to previous frame +r2 reserved thread register + (TOC in AIX, GOT in 64-bit, caller-save in OSX) +r3-r4 calleR-save volatile, parameters, return values +r5-r10 calleR-save volatile, parameters +r11 calleR-save volatile, + may be modified during function linkage + (calls by pointer & environment pointer in AIX) +r12 calleR-save volatile, + may be modified during function linkage +r13 reserved small data area pointer + (callee-save in AIX, thread reg in 64-bit, + callee-save in OSX) +r14-r30 calleE-save local variables +r31 calleE-save local variable or "environment pointer" +f0 calleR-save volatile +f1 calleR-save volatile, parameters, return values +f2-f8 calleR-save volatile, parameters +f9-f13 calleR-save volatile +f14-f31 calleE-save local variables +CR0/1/5/6/7 calleR-save condition codes, CR1 used in stdarg calls +CR2/3/4 calleE-save condition codes +LR calleR-save return address +CTR calleR-save counter, indirect jump address +XER calleR-save fixed-point exception register + +Standard PPC64 ELF ABI Calling Conventions +========================================== + +Reg Status Role +--- ------ ---- +r0 calleR-save volatile + may be modified during function linkage + r0 cannot be base reg in load/store insns +r1 calleE-save stack pointer, 16-byte aligned, must point + to valid frame with link to previous frame +r2 reserved TOC pointer +r3 calleR-save volatile, parameters, return values +r4-r10 calleR-save volatile, parameters +r11 calleR-save volatile, + may be modified during function linkage + (calls by pointer & environment pointer) +r12 calleR-save volatile, + may be modified during function linkage + (exception handling and glink code) +r13 reserved system thread ID +r14-r31 calleE-save local variables + +f0 calleR-save volatile +f1-f4 calleR-save volatile, parameters, return values +f5-f13 calleR-save volatile, parameters +f14-f31 calleE-save local variables + +CR0/1/5/6/7 calleR-save volatile condition codes +CR2/3/4 calleE-save non-volatile condition codes + +LR calleR-save return address, volatile +CTR calleR-save counter, indirect jump address (volatile) +XER calleR-save fixed-point exception register (volatile) +FPSCR calleR-save floating-point status and control (volatile) + +v0-v1 calleR-save volatile, scratch +v2-v13 calleR-save volatile, parameters +v14-v19 calleR-save volatile, scratch +v20-v31 calleE-save non-volatile local variables +vrsave calleE-save non-volatile diff --git a/erts/emulator/hipe/hipe_ppc_asm.m4 b/erts/emulator/hipe/hipe_ppc_asm.m4 new file mode 100644 index 0000000000..a0f8b78679 --- /dev/null +++ b/erts/emulator/hipe/hipe_ppc_asm.m4 @@ -0,0 +1,286 @@ +changecom(`/*', `*/')dnl +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2004-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* + * $Id$ + */ +`#ifndef HIPE_PPC_ASM_H +#define HIPE_PPC_ASM_H' + +/* + * Handle 32 vs 64-bit. + */ +ifelse(ARCH,ppc64,` +/* 64-bit PowerPC */ +define(LOAD,ld)dnl +define(STORE,std)dnl +define(CMPI,cmpdi)dnl +define(WSIZE,8)dnl +',` +/* 32-bit PowerPC */ +define(LOAD,lwz)dnl +define(STORE,stw)dnl +define(CMPI,cmpwi)dnl +define(WSIZE,4)dnl +')dnl +`#define LOAD 'LOAD +`#define STORE 'STORE +`#define CMPI 'CMPI + +/* + * Tunables. + */ +define(LEAF_WORDS,16)dnl number of stack words for leaf functions +define(NR_ARG_REGS,4)dnl admissible values are 0 to 6, inclusive + +`#define PPC_LEAF_WORDS 'LEAF_WORDS + +/* + * Workarounds for Darwin. + */ +ifelse(OPSYS,darwin,`` +/* Darwin */ +#define JOIN(X,Y) X##Y +#define CSYM(NAME) JOIN(_,NAME) +#define ASYM(NAME) CSYM(NAME) +#define GLOBAL(NAME) .globl NAME +#define SEMI @ +#define SET_SIZE(NAME) /*empty*/ +#define TYPE_FUNCTION(NAME) /*empty*/ +'',`` +/* Not Darwin */'' +`ifelse(ARCH,ppc64,`` +/* 64-bit */ +#define JOIN(X,Y) X##Y +#define CSYM(NAME) JOIN(.,NAME) +'',`` +/* 32-bit */ +#define CSYM(NAME) NAME +'')' +``#define ASYM(NAME) NAME +#define GLOBAL(NAME) .global NAME +#define SEMI ; +#define SET_SIZE(NAME) .size NAME,.-NAME +#define TYPE_FUNCTION(NAME) .type NAME,@function +#define lo16(X) X@l +#define ha16(X) X@ha + +/* + * Standard register names. + */ +#define r0 0 +#define r1 1 +#define r2 2 +#define r3 3 +#define r4 4 +#define r5 5 +#define r6 6 +#define r7 7 +#define r8 8 +#define r9 9 +#define r10 10 +#define r11 11 +#define r12 12 +#define r13 13 +#define r14 14 +#define r15 15 +#define r16 16 +#define r17 17 +#define r18 18 +#define r19 19 +#define r20 20 +#define r21 21 +#define r22 22 +#define r23 23 +#define r24 24 +#define r25 25 +#define r26 26 +#define r27 27 +#define r28 28 +#define r29 29 +#define r30 30 +#define r31 31 +'')dnl + +/* + * Reserved registers. + */ +`#define P r31' +`#define NSP r30' +`#define HP r29' +`#define TEMP_LR r28' + +/* + * Context switching macros. + * + * RESTORE_CONTEXT and RESTORE_CONTEXT_QUICK do not affect + * the condition register. + */ +`#define SAVE_CONTEXT_QUICK \ + mflr TEMP_LR' + +`#define RESTORE_CONTEXT_QUICK \ + mtlr TEMP_LR' + +`#define SAVE_CACHED_STATE \ + STORE HP, P_HP(P) SEMI\ + STORE NSP, P_NSP(P)' + +`#define RESTORE_CACHED_STATE \ + LOAD HP, P_HP(P) SEMI\ + LOAD NSP, P_NSP(P)' + +`#define SAVE_CONTEXT_BIF \ + mflr TEMP_LR SEMI \ + STORE HP, P_HP(P)' + +`#define RESTORE_CONTEXT_BIF \ + mtlr TEMP_LR SEMI \ + LOAD HP, P_HP(P)' + +`#define SAVE_CONTEXT_GC \ + mflr TEMP_LR SEMI \ + STORE TEMP_LR, P_NRA(P) SEMI \ + STORE NSP, P_NSP(P) SEMI \ + STORE HP, P_HP(P)' + +`#define RESTORE_CONTEXT_GC \ + mtlr TEMP_LR SEMI \ + LOAD HP, P_HP(P)' + +/* + * Argument (parameter) registers. + */ +`#define PPC_NR_ARG_REGS 'NR_ARG_REGS +`#define NR_ARG_REGS 'NR_ARG_REGS + +define(defarg,`define(ARG$1,`$2')dnl +#`define ARG'$1 $2' +)dnl + +ifelse(eval(NR_ARG_REGS >= 1),0,, +`defarg(0,`r4')')dnl +ifelse(eval(NR_ARG_REGS >= 2),0,, +`defarg(1,`r5')')dnl +ifelse(eval(NR_ARG_REGS >= 3),0,, +`defarg(2,`r6')')dnl +ifelse(eval(NR_ARG_REGS >= 4),0,, +`defarg(3,`r7')')dnl +ifelse(eval(NR_ARG_REGS >= 5),0,, +`defarg(4,`r8')')dnl +ifelse(eval(NR_ARG_REGS >= 6),0,, +`defarg(5,`r9')')dnl + +/* + * TEMP_ARG0: + * Used in nbif_stack_trap_ra to preserve the return value. + * Must be a C callee-save register. + * Must be otherwise unused in the return path. + * + * TEMP_ARG0: + * Used in hipe_ppc_inc_stack to preserve the return address + * (TEMP_LR contains the caller's saved return address). + * Must be a C callee-save register. + * Must be otherwise unused in the call path. + */ +`#define TEMP_ARG0 r27' + +dnl XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +dnl X X +dnl X hipe_ppc_glue.S support X +dnl X X +dnl XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX + +dnl +dnl LOAD_ARG_REGS +dnl +define(LAR_1,`LOAD ARG$1, P_ARG$1(P) SEMI ')dnl +define(LAR_N,`ifelse(eval($1 >= 0),0,,`LAR_N(eval($1-1))LAR_1($1)')')dnl +define(LOAD_ARG_REGS,`LAR_N(eval(NR_ARG_REGS-1))')dnl +`#define LOAD_ARG_REGS 'LOAD_ARG_REGS + +dnl +dnl STORE_ARG_REGS +dnl +define(SAR_1,`STORE ARG$1, P_ARG$1(P) SEMI ')dnl +define(SAR_N,`ifelse(eval($1 >= 0),0,,`SAR_N(eval($1-1))SAR_1($1)')')dnl +define(STORE_ARG_REGS,`SAR_N(eval(NR_ARG_REGS-1))')dnl +`#define STORE_ARG_REGS 'STORE_ARG_REGS + +dnl XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +dnl X X +dnl X hipe_ppc_bifs.m4 support X +dnl X X +dnl XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX + +dnl +dnl NBIF_ARG(DST,ARITY,ARGNO) +dnl Access a formal parameter. +dnl It will be a memory load via NSP when ARGNO >= NR_ARG_REGS. +dnl It will be a register move when 0 <= ARGNO < NR_ARG_REGS; if +dnl the source and destination are the same, the move is suppressed. +dnl +define(NBIF_MOVE_REG,`ifelse($1,$2,`# mr $1, $2',`mr $1, $2')')dnl +define(NBIF_REG_ARG,`NBIF_MOVE_REG($1,ARG$2)')dnl +define(NBIF_STK_LOAD,`LOAD $1, $2(NSP)')dnl +define(NBIF_STK_ARG,`NBIF_STK_LOAD($1,eval(WSIZE*(($2-$3)-1)))')dnl +define(NBIF_ARG,`ifelse(eval($3 >= NR_ARG_REGS),0,`NBIF_REG_ARG($1,$3)',`NBIF_STK_ARG($1,$2,$3)')')dnl +`/* #define NBIF_ARG_1_0 'NBIF_ARG(r3,1,0)` */' +`/* #define NBIF_ARG_2_0 'NBIF_ARG(r3,2,0)` */' +`/* #define NBIF_ARG_2_1 'NBIF_ARG(r3,2,1)` */' +`/* #define NBIF_ARG_3_0 'NBIF_ARG(r3,3,0)` */' +`/* #define NBIF_ARG_3_1 'NBIF_ARG(r3,3,1)` */' +`/* #define NBIF_ARG_3_2 'NBIF_ARG(r3,3,2)` */' +`/* #define NBIF_ARG_5_0 'NBIF_ARG(r3,5,0)` */' +`/* #define NBIF_ARG_5_1 'NBIF_ARG(r3,5,1)` */' +`/* #define NBIF_ARG_5_2 'NBIF_ARG(r3,5,2)` */' +`/* #define NBIF_ARG_5_3 'NBIF_ARG(r3,5,3)` */' +`/* #define NBIF_ARG_5_4 'NBIF_ARG(r3,5,4)` */' + +dnl +dnl NBIF_RET(ARITY) +dnl Generates a return from a native BIF, taking care to pop +dnl any stacked formal parameters. +dnl +define(NSP_RETN,`addi NSP, NSP, $1 + blr')dnl +define(NSP_RET0,`blr')dnl +define(RET_POP,`ifelse(eval($1 > NR_ARG_REGS),0,0,eval(WSIZE*($1 - NR_ARG_REGS)))')dnl +define(NBIF_RET_N,`ifelse(eval($1),0,`NSP_RET0',`NSP_RETN($1)')')dnl +define(NBIF_RET,`NBIF_RET_N(eval(RET_POP($1)))')dnl +`/* #define NBIF_RET_0 'NBIF_RET(0)` */' +`/* #define NBIF_RET_1 'NBIF_RET(1)` */' +`/* #define NBIF_RET_2 'NBIF_RET(2)` */' +`/* #define NBIF_RET_3 'NBIF_RET(3)` */' +`/* #define NBIF_RET_5 'NBIF_RET(5)` */' + +dnl +dnl QUICK_CALL_RET(CFUN,ARITY) +dnl Used in nocons_nofail and noproc primop interfaces to optimise +dnl SAVE_CONTEXT_QUICK; bl CFUN; RESTORE_CONTEXT_QUICK; NBIF_RET(ARITY). +dnl +define(NBIF_POP_N,`ifelse(eval($1),0,`',`addi NSP, NSP, $1 SEMI ')')dnl +define(QUICK_CALL_RET,`NBIF_POP_N(eval(RET_POP($2)))b $1')dnl +`/* #define QUICK_CALL_RET_F_0 'QUICK_CALL_RET(F,0)` */' +`/* #define QUICK_CALL_RET_F_1 'QUICK_CALL_RET(F,1)` */' +`/* #define QUICK_CALL_RET_F_2 'QUICK_CALL_RET(F,2)` */' +`/* #define QUICK_CALL_RET_F_3 'QUICK_CALL_RET(F,3)` */' +`/* #define QUICK_CALL_RET_F_5 'QUICK_CALL_RET(F,5)` */' + +`#endif /* HIPE_PPC_ASM_H */' diff --git a/erts/emulator/hipe/hipe_ppc_bifs.m4 b/erts/emulator/hipe/hipe_ppc_bifs.m4 new file mode 100644 index 0000000000..3849d9113a --- /dev/null +++ b/erts/emulator/hipe/hipe_ppc_bifs.m4 @@ -0,0 +1,568 @@ +changecom(`/*', `*/')dnl +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2004-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* + * $Id$ + */ + +include(`hipe/hipe_ppc_asm.m4') +#`include' "hipe_literals.h" + + .text + .p2align 2 + +`#define TEST_GOT_MBUF LOAD r4, P_MBUF(P) SEMI CMPI r4, 0 SEMI bne- 3f SEMI 2: +#define JOIN3(A,B,C) A##B##C +#define HANDLE_GOT_MBUF(ARITY) 3: bl CSYM(JOIN3(nbif_,ARITY,_gc_after_bif)) SEMI b 2b' + +/* + * standard_bif_interface_1(nbif_name, cbif_name) + * standard_bif_interface_2(nbif_name, cbif_name) + * standard_bif_interface_3(nbif_name, cbif_name) + * + * Generate native interface for a BIF with 1-3 parameters and + * standard failure mode. + */ +define(standard_bif_interface_1, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + GLOBAL(ASYM($1)) +ASYM($1): + /* Set up C argument registers. */ + mr r3, P + NBIF_ARG(r4,1,0) + + /* Save caller-save registers and call the C function. */ + SAVE_CONTEXT_BIF + bl CSYM($2) + TEST_GOT_MBUF + + /* Restore registers. Check for exception. */ + CMPI r3, THE_NON_VALUE + RESTORE_CONTEXT_BIF + beq- 1f + NBIF_RET(1) +1: /* workaround for bc:s small offset operand */ + b CSYM(nbif_1_simple_exception) + HANDLE_GOT_MBUF(1) + SET_SIZE(ASYM($1)) + TYPE_FUNCTION(ASYM($1)) +#endif') + +define(standard_bif_interface_2, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + GLOBAL(ASYM($1)) +ASYM($1): + /* Set up C argument registers. */ + mr r3, P + NBIF_ARG(r4,2,0) + NBIF_ARG(r5,2,1) + + /* Save caller-save registers and call the C function. */ + SAVE_CONTEXT_BIF + bl CSYM($2) + TEST_GOT_MBUF + + /* Restore registers. Check for exception. */ + CMPI r3, THE_NON_VALUE + RESTORE_CONTEXT_BIF + beq- 1f + NBIF_RET(2) +1: /* workaround for bc:s small offset operand */ + b CSYM(nbif_2_simple_exception) + HANDLE_GOT_MBUF(2) + SET_SIZE(ASYM($1)) + TYPE_FUNCTION(ASYM($1)) +#endif') + +define(standard_bif_interface_3, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + GLOBAL(ASYM($1)) +ASYM($1): + /* Set up C argument registers. */ + mr r3, P + NBIF_ARG(r4,3,0) + NBIF_ARG(r5,3,1) + NBIF_ARG(r6,3,2) + + /* Save caller-save registers and call the C function. */ + SAVE_CONTEXT_BIF + bl CSYM($2) + TEST_GOT_MBUF + + /* Restore registers. Check for exception. */ + CMPI r3, THE_NON_VALUE + RESTORE_CONTEXT_BIF + beq- 1f + NBIF_RET(3) +1: /* workaround for bc:s small offset operand */ + b CSYM(nbif_3_simple_exception) + HANDLE_GOT_MBUF(3) + SET_SIZE(ASYM($1)) + TYPE_FUNCTION(ASYM($1)) +#endif') + +/* + * fail_bif_interface_0(nbif_name, cbif_name) + * + * Generate native interface for a BIF with 0 parameters and + * standard failure mode. + */ +define(fail_bif_interface_0, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + GLOBAL(ASYM($1)) +ASYM($1): + /* Set up C argument registers. */ + mr r3, P + + /* Save caller-save registers and call the C function. */ + SAVE_CONTEXT_BIF + bl CSYM($2) + TEST_GOT_MBUF + + /* Restore registers. Check for exception. */ + CMPI r3, THE_NON_VALUE + RESTORE_CONTEXT_BIF + beq- 1f + NBIF_RET(0) +1: /* workaround for bc:s small offset operand */ + b CSYM(nbif_0_simple_exception) + HANDLE_GOT_MBUF(0) + SET_SIZE(ASYM($1)) + TYPE_FUNCTION(ASYM($1)) +#endif') + +/* + * gc_bif_interface_0(nbif_name, cbif_name) + * gc_bif_interface_1(nbif_name, cbif_name) + * gc_bif_interface_2(nbif_name, cbif_name) + * + * Generate native interface for a BIF with 0-2 parameters and + * standard failure mode. + * The BIF may do a GC. + */ +define(gc_bif_interface_0, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + GLOBAL(ASYM($1)) +ASYM($1): + /* Set up C argument registers. */ + mr r3, P + + /* Save caller-save registers and call the C function. */ + SAVE_CONTEXT_GC + bl CSYM($2) + TEST_GOT_MBUF + + /* Restore registers. */ + RESTORE_CONTEXT_GC + NBIF_RET(0) + HANDLE_GOT_MBUF(0) + SET_SIZE(ASYM($1)) + TYPE_FUNCTION(ASYM($1)) +#endif') + +define(gc_bif_interface_1, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + GLOBAL(ASYM($1)) +ASYM($1): + /* Set up C argument registers. */ + mr r3, P + NBIF_ARG(r4,1,0) + + /* Save caller-save registers and call the C function. */ + SAVE_CONTEXT_GC + bl CSYM($2) + TEST_GOT_MBUF + + /* Restore registers. Check for exception. */ + CMPI r3, THE_NON_VALUE + RESTORE_CONTEXT_GC + beq- 1f + NBIF_RET(1) +1: /* workaround for bc:s small offset operand */ + b CSYM(nbif_1_simple_exception) + HANDLE_GOT_MBUF(1) + SET_SIZE(ASYM($1)) + TYPE_FUNCTION(ASYM($1)) +#endif') + +define(gc_bif_interface_2, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + GLOBAL(ASYM($1)) +ASYM($1): + /* Set up C argument registers. */ + mr r3, P + NBIF_ARG(r4,2,0) + NBIF_ARG(r5,2,1) + + /* Save caller-save registers and call the C function. */ + SAVE_CONTEXT_GC + bl CSYM($2) + TEST_GOT_MBUF + + /* Restore registers. Check for exception. */ + CMPI r3, THE_NON_VALUE + RESTORE_CONTEXT_GC + beq- 1f + NBIF_RET(2) +1: /* workaround for bc:s small offset operand */ + b CSYM(nbif_2_simple_exception) + HANDLE_GOT_MBUF(2) + SET_SIZE(ASYM($1)) + TYPE_FUNCTION(ASYM($1)) +#endif') + +/* + * gc_nofail_primop_interface_1(nbif_name, cbif_name) + * + * Generate native interface for a primop with implicit P + * parameter, 1 ordinary parameter and no failure mode. + * The primop may do a GC. + */ +define(gc_nofail_primop_interface_1, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + GLOBAL(ASYM($1)) +ASYM($1): + /* Set up C argument registers. */ + mr r3, P + NBIF_ARG(r4,1,0) + + /* Save caller-save registers and call the C function. */ + SAVE_CONTEXT_GC + bl CSYM($2) + + /* Restore registers. */ + RESTORE_CONTEXT_GC + NBIF_RET(1) + SET_SIZE(ASYM($1)) + TYPE_FUNCTION(ASYM($1)) +#endif') + +/* + * nofail_primop_interface_0(nbif_name, cbif_name) + * nofail_primop_interface_1(nbif_name, cbif_name) + * nofail_primop_interface_2(nbif_name, cbif_name) + * nofail_primop_interface_3(nbif_name, cbif_name) + * + * Generate native interface for a primop with implicit P + * parameter, 0-3 ordinary parameters and no failure mode. + * Also used for guard BIFs. + */ +define(nofail_primop_interface_0, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + GLOBAL(ASYM($1)) +ASYM($1): + /* Set up C argument registers. */ + mr r3, P + + /* Save caller-save registers and call the C function. */ + SAVE_CONTEXT_BIF + bl CSYM($2) + TEST_GOT_MBUF + + /* Restore registers. */ + RESTORE_CONTEXT_BIF + NBIF_RET(0) + HANDLE_GOT_MBUF(0) + SET_SIZE(ASYM($1)) + TYPE_FUNCTION(ASYM($1)) +#endif') + +define(nofail_primop_interface_1, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + GLOBAL(ASYM($1)) +ASYM($1): + /* Set up C argument registers. */ + mr r3, P + NBIF_ARG(r4,1,0) + + /* Save caller-save registers and call the C function. */ + SAVE_CONTEXT_BIF + bl CSYM($2) + TEST_GOT_MBUF + + /* Restore registers. */ + RESTORE_CONTEXT_BIF + NBIF_RET(1) + HANDLE_GOT_MBUF(1) + SET_SIZE(ASYM($1)) + TYPE_FUNCTION(ASYM($1)) +#endif') + +define(nofail_primop_interface_2, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + GLOBAL(ASYM($1)) +ASYM($1): + /* Set up C argument registers. */ + mr r3, P + NBIF_ARG(r4,2,0) + NBIF_ARG(r5,2,1) + + /* Save caller-save registers and call the C function. */ + SAVE_CONTEXT_BIF + bl CSYM($2) + TEST_GOT_MBUF + + /* Restore registers. */ + RESTORE_CONTEXT_BIF + NBIF_RET(2) + HANDLE_GOT_MBUF(2) + SET_SIZE(ASYM($1)) + TYPE_FUNCTION(ASYM($1)) +#endif') + +define(nofail_primop_interface_3, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + GLOBAL(ASYM($1)) +ASYM($1): + /* Set up C argument registers. */ + mr r3, P + NBIF_ARG(r4,3,0) + NBIF_ARG(r5,3,1) + NBIF_ARG(r6,3,2) + + /* Save caller-save registers and call the C function. */ + SAVE_CONTEXT_BIF + bl CSYM($2) + TEST_GOT_MBUF + + /* Restore registers. */ + RESTORE_CONTEXT_BIF + NBIF_RET(3) + HANDLE_GOT_MBUF(3) + SET_SIZE(ASYM($1)) + TYPE_FUNCTION(ASYM($1)) +#endif') + +/* + * nocons_nofail_primop_interface_0(nbif_name, cbif_name) + * nocons_nofail_primop_interface_1(nbif_name, cbif_name) + * nocons_nofail_primop_interface_2(nbif_name, cbif_name) + * nocons_nofail_primop_interface_3(nbif_name, cbif_name) + * nocons_nofail_primop_interface_5(nbif_name, cbif_name) + * + * Generate native interface for a primop with implicit P + * parameter, 0-3 or 5 ordinary parameters, and no failure mode. + * The primop cannot CONS or gc. + */ +define(nocons_nofail_primop_interface_0, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + GLOBAL(ASYM($1)) +ASYM($1): + /* Set up C argument registers. */ + mr r3, P + + /* Perform a quick save;call;restore;ret sequence. */ + QUICK_CALL_RET(CSYM($2),0) + SET_SIZE(ASYM($1)) + TYPE_FUNCTION(ASYM($1)) +#endif') + +define(nocons_nofail_primop_interface_1, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + GLOBAL(ASYM($1)) +ASYM($1): + /* Set up C argument registers. */ + mr r3, P + NBIF_ARG(r4,1,0) + + /* Perform a quick save;call;restore;ret sequence. */ + QUICK_CALL_RET(CSYM($2),1) + SET_SIZE(ASYM($1)) + TYPE_FUNCTION(ASYM($1)) +#endif') + +define(nocons_nofail_primop_interface_2, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + GLOBAL(ASYM($1)) +ASYM($1): + /* Set up C argument registers. */ + mr r3, P + NBIF_ARG(r4,2,0) + NBIF_ARG(r5,2,1) + + /* Perform a quick save;call;restore;ret sequence. */ + QUICK_CALL_RET(CSYM($2),2) + SET_SIZE(ASYM($1)) + TYPE_FUNCTION(ASYM($1)) +#endif') + +define(nocons_nofail_primop_interface_3, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + GLOBAL(ASYM($1)) +ASYM($1): + /* Set up C argument registers. */ + mr r3, P + NBIF_ARG(r4,3,0) + NBIF_ARG(r5,3,1) + NBIF_ARG(r6,3,2) + + /* Perform a quick save;call;restore;ret sequence. */ + QUICK_CALL_RET(CSYM($2),3) + SET_SIZE(ASYM($1)) + TYPE_FUNCTION(ASYM($1)) +#endif') + +define(nocons_nofail_primop_interface_5, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + GLOBAL(ASYM($1)) +ASYM($1): + /* Set up C argument registers. */ + mr r3, P + NBIF_ARG(r4,5,0) + NBIF_ARG(r5,5,1) + NBIF_ARG(r6,5,2) + NBIF_ARG(r7,5,3) + NBIF_ARG(r8,5,4) + + /* Perform a quick save;call;restore;ret sequence. */ + QUICK_CALL_RET(CSYM($2),5) + SET_SIZE(ASYM($1)) + TYPE_FUNCTION(ASYM($1)) +#endif') + +/* + * noproc_primop_interface_0(nbif_name, cbif_name) + * noproc_primop_interface_1(nbif_name, cbif_name) + * noproc_primop_interface_2(nbif_name, cbif_name) + * noproc_primop_interface_3(nbif_name, cbif_name) + * noproc_primop_interface_5(nbif_name, cbif_name) + * + * Generate native interface for a primop with no implicit P + * parameter, 0-3 or 5 ordinary parameters, and no failure mode. + * The primop cannot CONS or gc. + */ +define(noproc_primop_interface_0, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + GLOBAL(ASYM($1)) +ASYM($1): + /* XXX: this case is always trivial; how to suppress the branch? */ + /* Perform a quick save;call;restore;ret sequence. */ + QUICK_CALL_RET(CSYM($2),0) + SET_SIZE(ASYM($1)) + TYPE_FUNCTION(ASYM($1)) +#endif') + +define(noproc_primop_interface_1, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + GLOBAL(ASYM($1)) +ASYM($1): + /* Set up C argument registers. */ + NBIF_ARG(r3,1,0) + + /* Perform a quick save;call;restore;ret sequence. */ + QUICK_CALL_RET(CSYM($2),1) + SET_SIZE(ASYM($1)) + TYPE_FUNCTION(ASYM($1)) +#endif') + +define(noproc_primop_interface_2, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + GLOBAL(ASYM($1)) +ASYM($1): + /* Set up C argument registers. */ + NBIF_ARG(r3,2,0) + NBIF_ARG(r4,2,1) + + /* Perform a quick save;call;restore;ret sequence. */ + QUICK_CALL_RET(CSYM($2),2) + SET_SIZE(ASYM($1)) + TYPE_FUNCTION(ASYM($1)) +#endif') + +define(noproc_primop_interface_3, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + GLOBAL(ASYM($1)) +ASYM($1): + /* Set up C argument registers. */ + NBIF_ARG(r3,3,0) + NBIF_ARG(r4,3,1) + NBIF_ARG(r5,3,2) + + /* Perform a quick save;call;restore;ret sequence. */ + QUICK_CALL_RET(CSYM($2),3) + SET_SIZE(ASYM($1)) + TYPE_FUNCTION(ASYM($1)) +#endif') + +define(noproc_primop_interface_5, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + GLOBAL(ASYM($1)) +ASYM($1): + /* Set up C argument registers. */ + NBIF_ARG(r3,5,0) + NBIF_ARG(r4,5,1) + NBIF_ARG(r5,5,2) + NBIF_ARG(r6,5,3) + NBIF_ARG(r7,5,4) + + /* Perform a quick save;call;restore;ret sequence. */ + QUICK_CALL_RET(CSYM($2),5) + SET_SIZE(ASYM($1)) + TYPE_FUNCTION(ASYM($1)) +#endif') + +include(`hipe/hipe_bif_list.m4') + +`#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif' diff --git a/erts/emulator/hipe/hipe_ppc_gc.h b/erts/emulator/hipe/hipe_ppc_gc.h new file mode 100644 index 0000000000..796ebeb20a --- /dev/null +++ b/erts/emulator/hipe/hipe_ppc_gc.h @@ -0,0 +1,29 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2004-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + * Stack walking helpers for native stack GC procedures. + * PowerPC version. + */ +#ifndef HIPE_PPC_GC_H +#define HIPE_PPC_GC_H + +#include "hipe_ppc_asm.h" /* for NR_ARG_REGS */ +#include "hipe_risc_gc.h" + +#endif /* HIPE_PPC_GC_H */ diff --git a/erts/emulator/hipe/hipe_ppc_glue.S b/erts/emulator/hipe/hipe_ppc_glue.S new file mode 100644 index 0000000000..97b07353f9 --- /dev/null +++ b/erts/emulator/hipe/hipe_ppc_glue.S @@ -0,0 +1,582 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2004-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + */ +#include "hipe_ppc_asm.h" +#include "hipe_literals.h" +#define ASM +#include "hipe_mode_switch.h" + + .text + .p2align 2 + +#if defined(__powerpc64__) +/* + * Enter Erlang from C. + * Create a new frame on the C stack. + * Save C callee-save registers (r14-r31) in the frame. + * Save r0 (C return address) in the caller's LR save slot. + * Retrieve the process pointer from the C argument registers. + * Return to LR. + * Do not clobber the C argument registers (r3-r10). + * + * Usage: mflr r0 SEMI bl .enter + */ +.enter: + # Our PPC64 ELF ABI frame must include: + # - 48 (6*8) bytes for AIX-like linkage area + # - 64 (8*8) bytes for AIX-like parameter area for + # recursive C calls with up to 8 parameter words + # - padding to make the frame a multiple of 16 bytes + # - 144 (18*8) bytes for saving r14-r31 + # The final size is 256 bytes. + # stdu is required for atomic alloc+init + stdu r1,-256(r1) /* 0(r1) contains r1+256 */ + std r14, 112(r1) + std r15, 120(r1) + std r16, 128(r1) + std r17, 136(r1) + std r18, 144(r1) + std r19, 152(r1) + std r20, 160(r1) + std r21, 168(r1) + std r22, 176(r1) + std r23, 184(r1) + std r24, 192(r1) + std r25, 200(r1) + std r26, 208(r1) + std r27, 216(r1) + std r28, 224(r1) + std r29, 232(r1) + std r30, 240(r1) + std r31, 248(r1) + std r0, 256+16(r1) /* caller saved LR in r0 */ + mr P, r3 /* get the process pointer */ + blr + +/* + * Return to the calling C function. + * The return value is in r3. + * + * .nosave_exit saves no state + * .flush_exit saves NSP and other cached P state. + * .suspend_exit also saves RA. + */ +.suspend_exit: + /* save RA, so we can be resumed */ + mflr r0 + std r0, P_NRA(P) +.flush_exit: + /* flush cached P state */ + SAVE_CACHED_STATE +.nosave_exit: + /* restore callee-save registers, drop frame, return */ + ld r0, 256+16(r1) + mtlr r0 + ld r14, 112(r1) + ld r15, 120(r1) + ld r16, 128(r1) + ld r17, 136(r1) + ld r18, 144(r1) + ld r19, 152(r1) + ld r20, 160(r1) + ld r21, 168(r1) + ld r22, 176(r1) + ld r23, 184(r1) + ld r24, 192(r1) + ld r25, 200(r1) + ld r26, 208(r1) + ld r27, 216(r1) + ld r28, 224(r1) + ld r29, 232(r1) /* kills HP */ + ld r30, 240(r1) /* kills NSP */ + ld r31, 248(r1) /* kills P */ + addi r1, r1, 256 + blr +#else /* !__powerpc64__ */ +/* + * Enter Erlang from C. + * Create a new frame on the C stack. + * Save C callee-save registers (r14-r31) in the frame. + * Save r0 (C return address) in the frame's LR save slot. + * Retrieve the process pointer from the C argument registers. + * Return to LR. + * Do not clobber the C argument registers (r3-r10). + * + * Usage: mflr r0 SEMI bl .enter + */ +.enter: + # A unified Linux/OSX C frame must include: + # - 24 bytes for AIX/OSX-like linkage area + # - 28 bytes for AIX/OSX-like parameter area for + # recursive C calls with up to 7 parameter words + # - 76 bytes for saving r14-r31 and LR + # - padding to make it a multiple of 16 bytes + # The final size is 128 bytes. + # stwu is required for atomic alloc+init + stwu r1,-128(r1) /* 0(r1) contains r1+128 */ + stw r14, 52(r1) + stw r15, 56(r1) + stw r16, 60(r1) + stw r17, 64(r1) + stw r18, 68(r1) + stw r19, 72(r1) + stw r20, 76(r1) + stw r21, 80(r1) + stw r22, 84(r1) + stw r23, 88(r1) + stw r24, 92(r1) + stw r25, 96(r1) + stw r26, 100(r1) + stw r27, 104(r1) + stw r28, 108(r1) + stw r29, 112(r1) + stw r30, 116(r1) + stw r31, 120(r1) + stw r0, 124(r1) /* caller saved LR in r0 */ + mr P, r3 /* get the process pointer */ + blr + +/* + * Return to the calling C function. + * The return value is in r3. + * + * .nosave_exit saves no state + * .flush_exit saves NSP and other cached P state. + * .suspend_exit also saves RA. + */ +.suspend_exit: + /* save RA, so we can be resumed */ + mflr r0 + stw r0, P_NRA(P) +.flush_exit: + /* flush cached P state */ + SAVE_CACHED_STATE +.nosave_exit: + /* restore callee-save registers, drop frame, return */ + lwz r0, 124(r1) + mtlr r0 + lwz r14, 52(r1) + lwz r15, 56(r1) + lwz r16, 60(r1) + lwz r17, 64(r1) + lwz r18, 68(r1) + lwz r19, 72(r1) + lwz r20, 76(r1) + lwz r21, 80(r1) + lwz r22, 84(r1) + lwz r23, 88(r1) + lwz r24, 92(r1) + lwz r25, 96(r1) + lwz r26, 100(r1) + lwz r27, 104(r1) + lwz r28, 108(r1) + lwz r29, 112(r1) /* kills HP */ + lwz r30, 116(r1) /* kills NSP */ + lwz r31, 120(r1) /* kills P */ + addi r1, r1, 128 + blr +#endif /* !__powerpc64__ */ + +/* + * int hipe_ppc_call_to_native(Process *p); + * Emulated code recursively calls native code. + */ + GLOBAL(CSYM(hipe_ppc_call_to_native)) +CSYM(hipe_ppc_call_to_native): + /* save C context */ + mflr r0 + bl .enter + /* prepare to call the target */ + LOAD r0, P_NCALLEE(P) + mtctr r0 + /* get argument registers */ + LOAD_ARG_REGS + /* cache some P state in registers */ + RESTORE_CACHED_STATE + /* call the target */ + bctrl /* defines LR (a.k.a. NRA) */ +/* FALLTHROUGH + * + * We export this return address so that hipe_mode_switch() can discover + * when native code tailcalls emulated code. + * + * This is where native code returns to emulated code. + */ + GLOBAL(ASYM(nbif_return)) +ASYM(nbif_return): + STORE r3, P_ARG0(P) /* save retval */ + li r3, HIPE_MODE_SWITCH_RES_RETURN + b .flush_exit + +/* + * int hipe_ppc_return_to_native(Process *p); + * Emulated code returns to its native code caller. + */ + GLOBAL(CSYM(hipe_ppc_return_to_native)) +CSYM(hipe_ppc_return_to_native): + /* save C context */ + mflr r0 + bl .enter + /* restore return address */ + LOAD r0, P_NRA(P) + mtlr r0 + /* cache some P state in registers */ + RESTORE_CACHED_STATE + /* get return value */ + LOAD r3, P_ARG0(P) + /* + * Return using the current return address. + * The parameters were popped at the original native-to-emulated + * call (hipe_call_from_native_is_recursive), so a plain ret suffices. + */ + blr + +/* + * int hipe_ppc_tailcall_to_native(Process *p); + * Emulated code tailcalls native code. + */ + GLOBAL(CSYM(hipe_ppc_tailcall_to_native)) +CSYM(hipe_ppc_tailcall_to_native): + /* save C context */ + mflr r0 + bl .enter + /* prepare to call the target */ + LOAD r0, P_NCALLEE(P) + mtctr r0 + /* get argument registers */ + LOAD_ARG_REGS + /* restore return address */ + LOAD r0, P_NRA(P) + mtlr r0 + /* cache some P state in registers */ + RESTORE_CACHED_STATE + /* call the target */ + bctr + +/* + * int hipe_ppc_throw_to_native(Process *p); + * Emulated code throws an exception to its native code caller. + */ + GLOBAL(CSYM(hipe_ppc_throw_to_native)) +CSYM(hipe_ppc_throw_to_native): + /* save C context */ + mflr r0 + bl .enter + /* prepare to invoke handler */ + LOAD r0, P_NCALLEE(P) /* set by hipe_find_handler() */ + mtctr r0 + /* cache some P state in registers */ + RESTORE_CACHED_STATE + /* invoke the handler */ + bctr + +/* + * Native code calls emulated code via a stub + * which should look as follows: + * + * stub for f/N: + * <set r12 to f's BEAM code address> + * <set r0 to N> + * b nbif_callemu + * + * The stub may need to create &nbif_callemu as a 32-bit immediate + * in a scratch register if the branch needs a trampoline. The code + * for creating a 32-bit immediate in r0 is potentially slower than + * for other registers (an add must be replaced by an or, and adds + * are potentially faster than ors), so it is better to use r0 for + * the arity (a small immediate), making r11 available for trampolines. + * (See "The PowerPC Compiler Writer's Guide, section 3.2.3.1.) + * + * XXX: Different stubs for different number of register parameters? + */ + GLOBAL(ASYM(nbif_callemu)) +ASYM(nbif_callemu): + STORE r12, P_BEAM_IP(P) + STORE r0, P_ARITY(P) + STORE_ARG_REGS + li r3, HIPE_MODE_SWITCH_RES_CALL + b .suspend_exit + +/* + * nbif_apply + */ + GLOBAL(ASYM(nbif_apply)) +ASYM(nbif_apply): + STORE_ARG_REGS + li r3, HIPE_MODE_SWITCH_RES_APPLY + b .suspend_exit + +/* + * Native code calls an emulated-mode closure via a stub defined below. + * + * The closure is appended as the last actual parameter, and parameters + * beyond the first few passed in registers are pushed onto the stack in + * left-to-right order. + * Hence, the location of the closure parameter only depends on the number + * of parameters in registers, not the total number of parameters. + */ +#if NR_ARG_REGS >= 6 + GLOBAL(ASYM(nbif_ccallemu6)) +ASYM(nbif_ccallemu6): + STORE ARG5, P_ARG5(P) +#if NR_ARG_REGS > 6 + mr ARG5, ARG6 +#else + LOAD ARG5, 0(NSP) +#endif + /*FALLTHROUGH*/ +#endif + +#if NR_ARG_REGS >= 5 + GLOBAL(ASYM(nbif_ccallemu5)) +ASYM(nbif_ccallemu5): + STORE ARG4, P_ARG4(P) +#if NR_ARG_REGS > 5 + mr ARG4, ARG5 +#else + LOAD ARG4, 0(NSP) +#endif + /*FALLTHROUGH*/ +#endif + +#if NR_ARG_REGS >= 4 + GLOBAL(ASYM(nbif_ccallemu4)) +ASYM(nbif_ccallemu4): + STORE ARG3, P_ARG3(P) +#if NR_ARG_REGS > 4 + mr ARG3, ARG4 +#else + LOAD ARG3, 0(NSP) +#endif + /*FALLTHROUGH*/ +#endif + +#if NR_ARG_REGS >= 3 + GLOBAL(ASYM(nbif_ccallemu3)) +ASYM(nbif_ccallemu3): + STORE ARG2, P_ARG2(P) +#if NR_ARG_REGS > 3 + mr ARG2, ARG3 +#else + LOAD ARG2, 0(NSP) +#endif + /*FALLTHROUGH*/ +#endif + +#if NR_ARG_REGS >= 2 + GLOBAL(ASYM(nbif_ccallemu2)) +ASYM(nbif_ccallemu2): + STORE ARG1, P_ARG1(P) +#if NR_ARG_REGS > 2 + mr ARG1, ARG2 +#else + LOAD ARG1, 0(NSP) +#endif + /*FALLTHROUGH*/ +#endif + +#if NR_ARG_REGS >= 1 + GLOBAL(ASYM(nbif_ccallemu1)) +ASYM(nbif_ccallemu1): + STORE ARG0, P_ARG0(P) +#if NR_ARG_REGS > 1 + mr ARG0, ARG1 +#else + LOAD ARG0, 0(NSP) +#endif + /*FALLTHROUGH*/ +#endif + + GLOBAL(ASYM(nbif_ccallemu0)) +ASYM(nbif_ccallemu0): + /* We use r4 not ARG0 here because ARG0 is not + defined when NR_ARG_REGS == 0. */ +#if NR_ARG_REGS == 0 + LOAD r4, 0(NSP) /* get the closure */ +#endif + STORE r4, P_CLOSURE(P) /* save the closure */ + li r3, HIPE_MODE_SWITCH_RES_CALL_CLOSURE + b .suspend_exit + +/* + * This is where native code suspends. + */ + GLOBAL(ASYM(nbif_suspend_0)) +ASYM(nbif_suspend_0): + li r3, HIPE_MODE_SWITCH_RES_SUSPEND + b .suspend_exit + +/* + * Suspend from a receive (waiting for a message) + */ + GLOBAL(ASYM(nbif_suspend_msg)) +ASYM(nbif_suspend_msg): + li r3, HIPE_MODE_SWITCH_RES_WAIT + b .suspend_exit + +/* + * Suspend from a receive with a timeout (waiting for a message) + * if (!(p->flags & F_TIMO)) { suspend } + * else { return 0; } + */ + GLOBAL(ASYM(nbif_suspend_msg_timeout)) +ASYM(nbif_suspend_msg_timeout): + LOAD r4, P_FLAGS(P) + li r3, HIPE_MODE_SWITCH_RES_WAIT_TIMEOUT + /* this relies on F_TIMO (1<<2) fitting in a uimm16 */ + andi. r0, r4, F_TIMO + beq- .suspend_exit /* sees the CR state from andi. above */ + /* timeout has occurred */ + li r3, 0 + blr + +/* + * This is the default exception handler for native code. + */ + GLOBAL(ASYM(nbif_fail)) +ASYM(nbif_fail): + li r3, HIPE_MODE_SWITCH_RES_THROW + b .flush_exit /* no need to save RA */ + + GLOBAL(CSYM(nbif_0_gc_after_bif)) + GLOBAL(CSYM(nbif_1_gc_after_bif)) + GLOBAL(CSYM(nbif_2_gc_after_bif)) + GLOBAL(CSYM(nbif_3_gc_after_bif)) +CSYM(nbif_0_gc_after_bif): + li r4, 0 + b .gc_after_bif +CSYM(nbif_1_gc_after_bif): + li r4, 1 + b .gc_after_bif +CSYM(nbif_2_gc_after_bif): + li r4, 2 + b .gc_after_bif +CSYM(nbif_3_gc_after_bif): + li r4, 3 + /*FALLTHROUGH*/ +.gc_after_bif: + stw r4, P_NARITY(P) /* Note: narity is a 32-bit field */ + STORE TEMP_LR, P_NRA(P) + STORE NSP, P_NSP(P) + mflr TEMP_LR + mr r4, r3 + mr r3, P + bl CSYM(erts_gc_after_bif_call) + mtlr TEMP_LR + LOAD TEMP_LR, P_NRA(P) + li r4, 0 + stw r4, P_NARITY(P) /* Note: narity is a 32-bit field */ + blr + +/* + * We end up here when a BIF called from native signals an + * exceptional condition. + * The heap pointer was just read from P. + * TEMP_LR contains a copy of LR + */ + GLOBAL(CSYM(nbif_0_simple_exception)) +CSYM(nbif_0_simple_exception): + li r4, 0 + b .nbif_simple_exception + GLOBAL(CSYM(nbif_1_simple_exception)) +CSYM(nbif_1_simple_exception): + li r4, 1 + b .nbif_simple_exception + GLOBAL(CSYM(nbif_2_simple_exception)) +CSYM(nbif_2_simple_exception): + li r4, 2 + b .nbif_simple_exception + GLOBAL(CSYM(nbif_3_simple_exception)) +CSYM(nbif_3_simple_exception): + li r4, 3 + /*FALLTHROUGH*/ +.nbif_simple_exception: + LOAD r3, P_FREASON(P) + CMPI r3, FREASON_TRAP + beq- .handle_trap + /* + * Find and invoke catch handler (it must exist). + * The heap pointer was just read from P. + * TEMP_LR should contain the current call's return address. + * r4 should contain the current call's arity. + */ + STORE NSP, P_NSP(P) + STORE TEMP_LR, P_NRA(P) + stw r4, P_NARITY(P) /* Note: narity is a 32-bit field */ + /* find and prepare to invoke the handler */ + mr r3, P + bl CSYM(hipe_handle_exception) /* Note: hipe_handle_exception() conses */ + /* prepare to invoke handler */ + LOAD r0, P_NCALLEE(P) /* set by hipe_find_handler() */ + mtctr r0 + RESTORE_CACHED_STATE /* NSP updated by hipe_find_handler() */ + /* now invoke the handler */ + bctr + + /* + * A BIF failed with freason TRAP: + * - the BIF's arity is in r4 + * - the native RA was saved in TEMP_LR before the BIF call + * - the native heap/stack/reds registers are saved in P + */ +.handle_trap: + li r3, HIPE_MODE_SWITCH_RES_TRAP + STORE NSP, P_NSP(P) + STORE r4, P_NARITY(P) + STORE TEMP_LR, P_NRA(P) + b .nosave_exit + +/* + * nbif_stack_trap_ra: trap return address for maintaining + * the gray/white stack boundary + */ + GLOBAL(ASYM(nbif_stack_trap_ra)) +ASYM(nbif_stack_trap_ra): /* a return address, not a function */ + # This only handles a single return value. + # If we have more, we need to save them in the PCB. + mr TEMP_ARG0, r3 /* save retval */ + STORE NSP, P_NSP(P) + mr r3, P + bl CSYM(hipe_handle_stack_trap) /* must not cons */ + mtctr r3 /* original RA */ + mr r3, TEMP_ARG0 /* restore retval */ + bctr /* resume at original RA */ + +/* + * hipe_ppc_inc_stack + * Caller saved its LR in TEMP_LR (== TEMP1) before calling us. + */ + GLOBAL(ASYM(hipe_ppc_inc_stack)) +ASYM(hipe_ppc_inc_stack): + STORE_ARG_REGS + mflr TEMP_ARG0 + STORE NSP, P_NSP(P) + mr r3, P + # hipe_inc_nstack reads and writes NSP and NSP_LIMIT, + # but does not access LR/RA, HP, or FCALLS. + bl CSYM(hipe_inc_nstack) + mtlr TEMP_ARG0 + LOAD NSP, P_NSP(P) + LOAD_ARG_REGS + blr + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/erts/emulator/hipe/hipe_ppc_glue.h b/erts/emulator/hipe/hipe_ppc_glue.h new file mode 100644 index 0000000000..dcf5ec7644 --- /dev/null +++ b/erts/emulator/hipe/hipe_ppc_glue.h @@ -0,0 +1,32 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2004-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + */ +#ifndef HIPE_PPC_GLUE_H +#define HIPE_PPC_GLUE_H + +#include "hipe_ppc_asm.h" /* for NR_ARG_REGS, PPC_LEAF_WORDS */ +#define NR_LEAF_WORDS PPC_LEAF_WORDS +#define HIPE_ARCH_CALL_TO_NATIVE hipe_ppc_call_to_native +#define HIPE_ARCH_RETURN_TO_NATIVE hipe_ppc_return_to_native +#define HIPE_ARCH_TAILCALL_TO_NATIVE hipe_ppc_tailcall_to_native +#define HIPE_ARCH_THROW_TO_NATIVE hipe_ppc_throw_to_native +#include "hipe_risc_glue.h" + +#endif /* HIPE_PPC_GLUE_H */ diff --git a/erts/emulator/hipe/hipe_ppc_primops.h b/erts/emulator/hipe/hipe_ppc_primops.h new file mode 100644 index 0000000000..67205fe1d1 --- /dev/null +++ b/erts/emulator/hipe/hipe_ppc_primops.h @@ -0,0 +1,24 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2005-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + */ +#if !defined(__powerpc64__) +PRIMOP_LIST(am_fconv_constant, &fconv_constant) +#endif +PRIMOP_LIST(am_inc_stack_0, &hipe_ppc_inc_stack) diff --git a/erts/emulator/hipe/hipe_primops.h b/erts/emulator/hipe/hipe_primops.h new file mode 100644 index 0000000000..cc2fc425d5 --- /dev/null +++ b/erts/emulator/hipe/hipe_primops.h @@ -0,0 +1,96 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2005-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + */ +#ifndef HIPE_PRIMOPS_H +#define HIPE_PRIMOPS_H + +PRIMOP_LIST(am_suspend_msg, &nbif_suspend_msg) +PRIMOP_LIST(am_suspend_msg_timeout, &nbif_suspend_msg_timeout) +PRIMOP_LIST(am_suspend_0, &nbif_suspend_0) + +PRIMOP_LIST(am_Plus, &nbif_add_2) +PRIMOP_LIST(am_Minus, &nbif_sub_2) +PRIMOP_LIST(am_Times, &nbif_mul_2) +PRIMOP_LIST(am_Div, &nbif_div_2) +PRIMOP_LIST(am_div, &nbif_intdiv_2) +PRIMOP_LIST(am_rem, &nbif_rem_2) +PRIMOP_LIST(am_bsl, &nbif_bsl_2) +PRIMOP_LIST(am_bsr, &nbif_bsr_2) +PRIMOP_LIST(am_band, &nbif_band_2) +PRIMOP_LIST(am_bor, &nbif_bor_2) +PRIMOP_LIST(am_bxor, &nbif_bxor_2) +PRIMOP_LIST(am_bnot, &nbif_bnot_1) + +PRIMOP_LIST(am_gc_1, &nbif_gc_1) +PRIMOP_LIST(am_check_get_msg, &nbif_check_get_msg) +#ifdef ERTS_SMP +PRIMOP_LIST(am_atomic_inc, &nbif_atomic_inc) +PRIMOP_LIST(am_clear_timeout, &nbif_clear_timeout) +#endif +PRIMOP_LIST(am_select_msg, &nbif_select_msg) +PRIMOP_LIST(am_set_timeout, &nbif_set_timeout) +PRIMOP_LIST(am_rethrow, &nbif_rethrow) + + +PRIMOP_LIST(am_bs_get_integer_2, &nbif_bs_get_integer_2) +PRIMOP_LIST(am_bs_get_float_2, &nbif_bs_get_float_2) +PRIMOP_LIST(am_bs_get_binary_2, &nbif_bs_get_binary_2) +PRIMOP_LIST(am_bs_allocate, &nbif_bs_allocate) +PRIMOP_LIST(am_bs_reallocate, &nbif_bs_reallocate) +PRIMOP_LIST(am_bs_put_big_integer, &nbif_bs_put_big_integer) +PRIMOP_LIST(am_bs_put_small_float, &nbif_bs_put_small_float) +PRIMOP_LIST(am_bs_put_bits, &nbif_bs_put_bits) +PRIMOP_LIST(am_bs_utf8_size, &nbif_bs_utf8_size) +PRIMOP_LIST(am_bs_put_utf8, &nbif_bs_put_utf8) +PRIMOP_LIST(am_bs_get_utf8, &nbif_bs_get_utf8) +PRIMOP_LIST(am_bs_utf16_size, &nbif_bs_utf16_size) +PRIMOP_LIST(am_bs_put_utf16be, &nbif_bs_put_utf16be) +PRIMOP_LIST(am_bs_put_utf16le, &nbif_bs_put_utf16le) +PRIMOP_LIST(am_bs_get_utf16, &nbif_bs_get_utf16) +PRIMOP_LIST(am_bs_validate_unicode, &nbif_bs_validate_unicode) +PRIMOP_LIST(am_bs_validate_unicode_retract, &nbif_bs_validate_unicode_retract) + +PRIMOP_LIST(am_cmp_2, &nbif_cmp_2) +PRIMOP_LIST(am_op_exact_eqeq_2, &nbif_eq_2) + +PRIMOP_LIST(am_hipe_apply, &nbif_apply) +PRIMOP_LIST(am_find_na_or_make_stub, &nbif_find_na_or_make_stub) +PRIMOP_LIST(am_nonclosure_address, &nbif_nonclosure_address) + +PRIMOP_LIST(am_conv_big_to_float, &nbif_conv_big_to_float) +PRIMOP_LIST(am_fclearerror_error, &nbif_fclearerror_error) + +#if defined(__sparc__) +#include "hipe_sparc_primops.h" +#endif +#if defined(__i386__) +#include "hipe_x86_primops.h" +#endif +#if defined(__x86_64__) +#include "hipe_amd64_primops.h" +#endif +#if defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__) +#include "hipe_ppc_primops.h" +#endif +#if defined(__arm__) +#include "hipe_arm_primops.h" +#endif + +#endif /* HIPE_PRIMOPS_H */ diff --git a/erts/emulator/hipe/hipe_process.h b/erts/emulator/hipe/hipe_process.h new file mode 100644 index 0000000000..5528e68826 --- /dev/null +++ b/erts/emulator/hipe/hipe_process.h @@ -0,0 +1,80 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2001-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + * HiPE-specific process fields + */ +#ifndef HIPE_PROCESS_H +#define HIPE_PROCESS_H + +#include "erl_alloc.h" + +struct hipe_process_state { + Eterm *nsp; /* Native stack pointer. */ + Eterm *nstack; /* Native stack block start. */ + Eterm *nstend; /* Native stack block end (start+size). */ + /* XXX: ncallee and closure could share space in a union */ + void (*ncallee)(void); /* Native code callee (label) to invoke. */ + Eterm closure; /* Used to pass a closure from native code. */ + Eterm *nstgraylim; /* Gray/white stack boundary. */ + Eterm *nstblacklim; /* Black/gray stack boundary. Must exist if + graylim exists. Ignored if no graylim. */ + void (*ngra)(void); /* Saved original RA from graylim frame. */ +#if defined(__i386__) || defined(__x86_64__) + Eterm *ncsp; /* Saved C stack pointer. */ +#endif +#if defined(__sparc__) || defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__) || defined(__arm__) + void (*nra)(void); /* Native code return address. */ +#endif + unsigned int narity; /* Arity of BIF call, for stack walks. */ +}; + +extern void hipe_arch_print_pcb(struct hipe_process_state *p); + +static __inline__ void hipe_init_process(struct hipe_process_state *p) +{ + p->nsp = NULL; + p->nstack = NULL; + p->nstend = NULL; + p->nstgraylim = NULL; + p->nstblacklim = NULL; + p->ngra = NULL; +#if defined(__sparc__) || defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__) || defined(__arm__) + p->nra = NULL; +#endif + p->narity = 0; +} + +static __inline__ void hipe_delete_process(struct hipe_process_state *p) +{ + if (p->nstack) + erts_free(ERTS_ALC_T_HIPE, (void*)p->nstack); +} + +#ifdef ERTS_SMP +struct hipe_process_state_smp { + int have_receive_locks; +}; + +static __inline__ void hipe_init_process_smp(struct hipe_process_state_smp *p) +{ + p->have_receive_locks = 0; +} +#endif + +#endif /* HIPE_PROCESS_H */ diff --git a/erts/emulator/hipe/hipe_risc_gc.h b/erts/emulator/hipe/hipe_risc_gc.h new file mode 100644 index 0000000000..4a9a7878f0 --- /dev/null +++ b/erts/emulator/hipe/hipe_risc_gc.h @@ -0,0 +1,113 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2008-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + * Stack walking helpers for native stack GC procedures. + * Generic RISC version. + */ +#ifndef HIPE_RISC_GC_H +#define HIPE_RISC_GC_H + +/* arch wrapper includes hipe_${arch}_asm.h to define NR_ARG_REGS */ + +struct nstack_walk_state { + const struct sdesc *sdesc0; /* .sdesc0 must be a pointer rvalue */ +}; + +static inline int nstack_walk_init_check(const Process *p) +{ + return p->hipe.nra ? 1 : 0; +} + +static inline Eterm *nstack_walk_nsp_begin(const Process *p) +{ + unsigned int nstkarity = p->hipe.narity - NR_ARG_REGS; + if ((int)nstkarity < 0) + nstkarity = 0; + return p->hipe.nsp + nstkarity; +} + +static inline const struct sdesc* +nstack_walk_init_sdesc(const Process *p, struct nstack_walk_state *state) +{ + const struct sdesc *sdesc = hipe_find_sdesc((unsigned long)p->hipe.nra); + state->sdesc0 = sdesc; + return sdesc; +} + +static inline void nstack_walk_update_trap(Process *p, const struct sdesc *sdesc0) +{ + Eterm *nsp = p->hipe.nsp; + p->hipe.nsp = nstack_walk_nsp_begin(p); + hipe_update_stack_trap(p, sdesc0); + p->hipe.nsp = nsp; +} + +static inline Eterm *nstack_walk_nsp_end(const Process *p) +{ + return p->hipe.nstend - 1; +} + +static inline void nstack_walk_kill_trap(Process *p, Eterm *nsp_end) +{ + /* remove gray/white boundary trap */ + if ((unsigned long)p->hipe.nra == (unsigned long)&nbif_stack_trap_ra) { + p->hipe.nra = p->hipe.ngra; + } else { + for (;;) { + --nsp_end; + if (nsp_end[0] == (unsigned long)&nbif_stack_trap_ra) { + nsp_end[0] = (unsigned long)p->hipe.ngra; + break; + } + } + } +} + +static inline int nstack_walk_gray_passed_black(const Eterm *gray, const Eterm *black) +{ + return gray > black; +} + +static inline int nstack_walk_nsp_reached_end(const Eterm *nsp, const Eterm *nsp_end) +{ + return nsp >= nsp_end; +} + +static inline unsigned int nstack_walk_frame_size(const struct sdesc *sdesc) +{ + return sdesc_fsize(sdesc) + 1 + sdesc_arity(sdesc); +} + +static inline Eterm *nstack_walk_frame_index(Eterm *nsp, unsigned int i) +{ + return &nsp[i]; +} + +static inline unsigned long +nstack_walk_frame_ra(const Eterm *nsp, const struct sdesc *sdesc) +{ + return nsp[sdesc_fsize(sdesc)]; +} + +static inline Eterm *nstack_walk_next_frame(Eterm *nsp, unsigned int sdesc_size) +{ + return nsp + sdesc_size; +} + +#endif /* HIPE_RISC_GC_H */ diff --git a/erts/emulator/hipe/hipe_risc_glue.h b/erts/emulator/hipe/hipe_risc_glue.h new file mode 100644 index 0000000000..3b2d6498d3 --- /dev/null +++ b/erts/emulator/hipe/hipe_risc_glue.h @@ -0,0 +1,266 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2008-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + */ +#ifndef HIPE_RISC_GLUE_H +#define HIPE_RISC_GLUE_H + +/* arch wrapper does: + * #include "hipe_${arch}_asm.h" // for NR_ARG_REGS, ${ARCH}_LEAF_WORDS + * #define NR_LEAF_WORDS ${ARCH}_LEAF_WORDS + * #define HIPE_ARCH_CALL_TO_NATIVE hipe_${arch}_call_to_native + * #define HIPE_ARCH_RETURN_TO_NATIVE hipe_${arch}_return_to_native + * #define HIPE_ARCH_TAILCALL_TO_NATIVE hipe_${arch}_tailcall_to_native + * #define HIPE_ARCH_THROW_TO_NATIVE hipe_${arch}_throw_to_native + * #include "hipe_risc_glue.h" + */ + +/* Emulated code recursively calls native code. + The return address is `nbif_return', which is exported so that + tailcalls from native to emulated code can be identified. */ +unsigned int HIPE_ARCH_CALL_TO_NATIVE(Process*); +AEXTERN(void,nbif_return,(void)); + +/* Native-mode stubs for calling emulated-mode closures. */ +AEXTERN(void,nbif_ccallemu0,(void)); +AEXTERN(void,nbif_ccallemu1,(void)); +AEXTERN(void,nbif_ccallemu2,(void)); +AEXTERN(void,nbif_ccallemu3,(void)); +AEXTERN(void,nbif_ccallemu4,(void)); +AEXTERN(void,nbif_ccallemu5,(void)); +AEXTERN(void,nbif_ccallemu6,(void)); + +/* Default exception handler for native code. */ +AEXTERN(void,nbif_fail,(void)); + +/* Emulated code returns to its native code caller. */ +unsigned int HIPE_ARCH_RETURN_TO_NATIVE(Process*); + +/* Emulated code tailcalls native code. */ +unsigned int HIPE_ARCH_TAILCALL_TO_NATIVE(Process*); + +/* Emulated code throws an exception to its native code caller. */ +unsigned int HIPE_ARCH_THROW_TO_NATIVE(Process*); + +static __inline__ unsigned int max(unsigned int x, unsigned int y) +{ + return (x > y) ? x : y; +} + +static __inline__ void hipe_arch_glue_init(void) +{ + static struct sdesc_with_exnra nbif_return_sdesc = { + .exnra = (unsigned long)&nbif_fail, + .sdesc = { + .bucket = { .hvalue = (unsigned long)&nbif_return }, + .summary = (1<<8), + }, + }; + hipe_init_sdesc_table(&nbif_return_sdesc.sdesc); +} + +static __inline__ void hipe_push_risc_nra_frame(Process *p) +{ + p->hipe.nsp -= 1; + p->hipe.nsp[0] = (Eterm)p->hipe.nra; +} + +static __inline__ void hipe_pop_risc_nra_frame(Process *p) +{ + p->hipe.nra = (void(*)(void))p->hipe.nsp[0]; + p->hipe.nsp += 1; +} + +/* PRE: arity <= NR_ARG_REGS */ +static __inline__ void +hipe_write_risc_regs(Process *p, unsigned int arity, Eterm reg[]) +{ +#if NR_ARG_REGS > 0 + int i; + for (i = arity; --i >= 0;) + p->def_arg_reg[i] = reg[i]; +#endif +} + +/* PRE: arity <= NR_ARG_REGS */ +static __inline__ void +hipe_read_risc_regs(Process *p, unsigned int arity, Eterm reg[]) +{ +#if NR_ARG_REGS > 0 + int i; + for (i = arity; --i >= 0;) + reg[i] = p->def_arg_reg[i]; +#endif +} + +static __inline__ void +hipe_push_risc_params(Process *p, unsigned int arity, Eterm reg[]) +{ + unsigned int i; + + i = arity; + if (i > NR_ARG_REGS) { + Eterm *nsp = p->hipe.nsp; + i = NR_ARG_REGS; + do { + *--nsp = reg[i++]; + } while (i < arity); + p->hipe.nsp = nsp; + i = NR_ARG_REGS; + } + /* INV: i <= NR_ARG_REGS */ + hipe_write_risc_regs(p, i, reg); +} + +static __inline__ void +hipe_pop_risc_params(Process *p, unsigned int arity, Eterm reg[]) +{ + unsigned int i; + + i = arity; + if (i > NR_ARG_REGS) { + Eterm *nsp = p->hipe.nsp; + do { + reg[--i] = *nsp++; + } while (i > NR_ARG_REGS); + p->hipe.nsp = nsp; + /* INV: i == NR_ARG_REGS */ + } + /* INV: i <= NR_ARG_REGS */ + hipe_read_risc_regs(p, i, reg); +} + +/* BEAM recursively calls native code. */ +static __inline__ unsigned int +hipe_call_to_native(Process *p, unsigned int arity, Eterm reg[]) +{ + int nstkargs; + + if ((nstkargs = arity - NR_ARG_REGS) < 0) + nstkargs = 0; + hipe_check_nstack(p, max(nstkargs + 1, NR_LEAF_WORDS)); + hipe_push_risc_nra_frame(p); /* needs 1 word */ + hipe_push_risc_params(p, arity, reg); /* needs nstkargs words */ + return HIPE_ARCH_CALL_TO_NATIVE(p); +} + +/* Native called BEAM, which now tailcalls native. */ +static __inline__ unsigned int +hipe_tailcall_to_native(Process *p, unsigned int arity, Eterm reg[]) +{ + int nstkargs; + + if ((nstkargs = arity - NR_ARG_REGS) < 0) + nstkargs = 0; + hipe_check_nstack(p, max(nstkargs, NR_LEAF_WORDS)); + hipe_push_risc_params(p, arity, reg); /* needs nstkargs words */ + return HIPE_ARCH_TAILCALL_TO_NATIVE(p); +} + +/* BEAM called native, which has returned. Clean up. */ +static __inline__ void hipe_return_from_native(Process *p) +{ + hipe_pop_risc_nra_frame(p); +} + +/* BEAM called native, which has thrown an exception. Clean up. */ +static __inline__ void hipe_throw_from_native(Process *p) +{ + hipe_pop_risc_nra_frame(p); +} + +/* BEAM called native, which now calls BEAM. + Move the parameters to reg[]. + Return zero if this is a tailcall, non-zero if the call is recursive. + If tailcall, also clean up native stub continuation. */ +static __inline__ int +hipe_call_from_native_is_recursive(Process *p, Eterm reg[]) +{ + hipe_pop_risc_params(p, p->arity, reg); + if (p->hipe.nra != (void(*)(void))&nbif_return) + return 1; + hipe_pop_risc_nra_frame(p); + return 0; +} + +/* Native makes a call which needs to unload the parameters. + This differs from hipe_call_from_native_is_recursive() in + that it doesn't check for or pop the BEAM-calls-native frame. + It's currently only used in the implementation of apply. */ +static __inline__ void +hipe_pop_params(Process *p, unsigned int arity, Eterm reg[]) +{ + hipe_pop_risc_params(p, arity, reg); +} + +/* Native called BEAM, which now returns back to native. */ +static __inline__ unsigned int hipe_return_to_native(Process *p) +{ + return HIPE_ARCH_RETURN_TO_NATIVE(p); +} + +/* Native called BEAM, which now throws an exception back to native. */ +static __inline__ unsigned int hipe_throw_to_native(Process *p) +{ + return HIPE_ARCH_THROW_TO_NATIVE(p); +} + +/* Return the address of a stub switching a native closure call to BEAM. */ +static __inline__ const void *hipe_closure_stub_address(unsigned int arity) +{ +#if NR_ARG_REGS == 0 + return &nbif_ccallemu0; +#else /* > 0 */ + switch (arity) { + case 0: return &nbif_ccallemu0; +#if NR_ARG_REGS == 1 + default: return &nbif_ccallemu1; +#else /* > 1 */ + case 1: return &nbif_ccallemu1; +#if NR_ARG_REGS == 2 + default: return &nbif_ccallemu2; +#else /* > 2 */ + case 2: return &nbif_ccallemu2; +#if NR_ARG_REGS == 3 + default: return &nbif_ccallemu3; +#else /* > 3 */ + case 3: return &nbif_ccallemu3; +#if NR_ARG_REGS == 4 + default: return &nbif_ccallemu4; +#else /* > 4 */ + case 4: return &nbif_ccallemu4; +#if NR_ARG_REGS == 5 + default: return &nbif_ccallemu5; +#else /* > 5 */ + case 5: return &nbif_ccallemu5; +#if NR_ARG_REGS == 6 + default: return &nbif_ccallemu6; +#else +#error "NR_ARG_REGS > 6 NOT YET IMPLEMENTED" +#endif /* > 6 */ +#endif /* > 5 */ +#endif /* > 4 */ +#endif /* > 3 */ +#endif /* > 2 */ +#endif /* > 1 */ + } +#endif /* > 0 */ +} + +#endif /* HIPE_RISC_GLUE_H */ diff --git a/erts/emulator/hipe/hipe_risc_stack.c b/erts/emulator/hipe/hipe_risc_stack.c new file mode 100644 index 0000000000..976ca0b85d --- /dev/null +++ b/erts/emulator/hipe/hipe_risc_stack.c @@ -0,0 +1,312 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2008-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#include "global.h" +#include "bif.h" +#include "hipe_stack.h" + +/* get NR_ARG_REGS from the arch */ +#if defined(__arm__) +#include "hipe_arm_asm.h" +#elif defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__) +#include "hipe_ppc_asm.h" +#elif defined(__sparc__) +#include "hipe_sparc_asm.h" +#endif + +AEXTERN(void,nbif_fail,(void)); +AEXTERN(void,nbif_stack_trap_ra,(void)); + +/* + * hipe_print_nstack() is called from hipe_bifs:show_nstack/1. + */ +static void print_slot(Eterm *sp, unsigned int live) +{ + Eterm val = *sp; + printf(" | 0x%0*lx | 0x%0*lx | ", + 2*(int)sizeof(long), (unsigned long)sp, + 2*(int)sizeof(long), val); + if (live) + erts_printf("%.30T", val); + printf("\r\n"); +} + +void hipe_print_nstack(Process *p) +{ + Eterm *nsp; + Eterm *nsp_end; + const struct sdesc *sdesc1; + const struct sdesc *sdesc; + unsigned long ra; + unsigned long exnra; + unsigned int mask; + unsigned int sdesc_size; + unsigned int i; + unsigned int nstkarity; + static const char dashes[2*sizeof(long)+5] = { + [0 ... 2*sizeof(long)+3] = '-' + }; + + printf(" | NATIVE STACK |\r\n"); + printf(" |%s|%s|\r\n", dashes, dashes); + printf(" | %*s | 0x%0*lx |\r\n", + 2+2*(int)sizeof(long), "heap", + 2*(int)sizeof(long), (unsigned long)p->heap); + printf(" | %*s | 0x%0*lx |\r\n", + 2+2*(int)sizeof(long), "high_water", + 2*(int)sizeof(long), (unsigned long)p->high_water); + printf(" | %*s | 0x%0*lx |\r\n", + 2+2*(int)sizeof(long), "hend", + 2*(int)sizeof(long), (unsigned long)p->htop); + printf(" | %*s | 0x%0*lx |\r\n", + 2+2*(int)sizeof(long), "old_heap", + 2*(int)sizeof(long), (unsigned long)p->old_heap); + printf(" | %*s | 0x%0*lx |\r\n", + 2+2*(int)sizeof(long), "old_hend", + 2*(int)sizeof(long), (unsigned long)p->old_hend); + printf(" | %*s | 0x%0*lx |\r\n", + 2+2*(int)sizeof(long), "nsp", + 2*(int)sizeof(long), (unsigned long)p->hipe.nsp); + printf(" | %*s | 0x%0*lx |\r\n", + 2+2*(int)sizeof(long), "nstend", + 2*(int)sizeof(long), (unsigned long)p->hipe.nstend); + printf(" | %*s| 0x%0*lx |\r\n", + 2+2*(int)sizeof(long)+1, "nstblacklim", + 2*(int)sizeof(long), (unsigned long)p->hipe.nstblacklim); + printf(" | %*s | 0x%0*lx |\r\n", + 2+2*(int)sizeof(long), "nstgraylim", + 2*(int)sizeof(long), (unsigned long)p->hipe.nstgraylim); + printf(" | %*s | 0x%0*lx |\r\n", + 2+2*(int)sizeof(long), "nra", + 2*(int)sizeof(long), (unsigned long)p->hipe.nra); + printf(" | %*s | 0x%0*x |\r\n", + 2+2*(int)sizeof(long), "narity", + 2*(int)sizeof(long), p->hipe.narity); + printf(" |%s|%s|\r\n", dashes, dashes); + printf(" | %*s | %*s |\r\n", + 2+2*(int)sizeof(long), "Address", + 2+2*(int)sizeof(long), "Contents"); + + ra = (unsigned long)p->hipe.nra; + if (!ra) + return; + nsp = p->hipe.nsp; + nsp_end = p->hipe.nstend - 1; + + nstkarity = p->hipe.narity - NR_ARG_REGS; + if ((int)nstkarity < 0) + nstkarity = 0; + + /* First RA not on stack. Dump current args first. */ + printf(" |%s|%s|\r\n", dashes, dashes); + for (i = 0; i < nstkarity; ++i) + print_slot(&nsp[i], 1); + nsp += nstkarity; + + if (ra == (unsigned long)&nbif_stack_trap_ra) + ra = (unsigned long)p->hipe.ngra; + sdesc = hipe_find_sdesc(ra); + + for (;;) { /* INV: nsp at bottom of frame described by sdesc */ + printf(" |%s|%s|\r\n", dashes, dashes); + if (nsp >= nsp_end) { + if (nsp == nsp_end) + return; + fprintf(stderr, "%s: passed end of stack\r\n", __FUNCTION__); + break; + } + ra = nsp[sdesc_fsize(sdesc)]; + if (ra == (unsigned long)&nbif_stack_trap_ra) + sdesc1 = hipe_find_sdesc((unsigned long)p->hipe.ngra); + else + sdesc1 = hipe_find_sdesc(ra); + sdesc_size = sdesc_fsize(sdesc) + 1 + sdesc_arity(sdesc); + i = 0; + mask = sdesc->livebits[0]; + for (;;) { + if (i == sdesc_fsize(sdesc)) { + printf(" | 0x%0*lx | 0x%0*lx | ", + 2*(int)sizeof(long), (unsigned long)&nsp[i], + 2*(int)sizeof(long), ra); + if (ra == (unsigned long)&nbif_stack_trap_ra) + printf("STACK TRAP, ORIG RA 0x%lx", (unsigned long)p->hipe.ngra); + else + printf("NATIVE RA"); + if ((exnra = sdesc_exnra(sdesc1)) != 0) + printf(", EXNRA 0x%lx", exnra); + printf("\r\n"); + } else + print_slot(&nsp[i], (mask & 1)); + if (++i >= sdesc_size) + break; + if (i & 31) + mask >>= 1; + else + mask = sdesc->livebits[i >> 5]; + } + nsp += sdesc_size; + sdesc = sdesc1; + } + abort(); +} + +/* XXX: x86's values, not yet tuned for anyone else */ +#define MINSTACK 128 +#define NSKIPFRAMES 4 + +void hipe_update_stack_trap(Process *p, const struct sdesc *sdesc) +{ + Eterm *nsp; + Eterm *nsp_end; + unsigned long ra; + int n; + + nsp = p->hipe.nsp; + nsp_end = p->hipe.nstend - 1; + if ((unsigned long)((char*)nsp_end - (char*)nsp) < MINSTACK*sizeof(Eterm*)) { + p->hipe.nstgraylim = NULL; + return; + } + n = NSKIPFRAMES; + for (;;) { + nsp += sdesc_fsize(sdesc); + if (nsp >= nsp_end) { + p->hipe.nstgraylim = NULL; + return; + } + ra = nsp[0]; + if (--n <= 0) + break; + nsp += 1 + sdesc_arity(sdesc); + sdesc = hipe_find_sdesc(ra); + } + p->hipe.nstgraylim = nsp + 1 + sdesc_arity(sdesc); + p->hipe.ngra = (void(*)(void))ra; + nsp[0] = (unsigned long)&nbif_stack_trap_ra; +} + +/* + * hipe_handle_stack_trap() is called when the mutator returns to + * nbif_stack_trap_ra, which marks the gray/white stack boundary frame. + * The gray/white boundary is moved back one or more frames. + * + * The function head below is "interesting". + */ +void (*hipe_handle_stack_trap(Process *p))(void) +{ + void (*ngra)(void) = p->hipe.ngra; + const struct sdesc *sdesc = hipe_find_sdesc((unsigned long)ngra); + hipe_update_stack_trap(p, sdesc); + return ngra; +} + +/* + * hipe_find_handler() is called from hipe_handle_exception() to locate + * the current exception handler's PC and SP. + * The native stack MUST contain a stack frame as it appears on + * entry to a function (actuals, caller's frame, caller's return address). + * p->hipe.narity MUST contain the arity (number of actuals). + * On exit, p->hipe.ncallee is set to the handler's PC and p->hipe.nsp + * is set to its SP (low address of its stack frame). + */ +void hipe_find_handler(Process *p) +{ + Eterm *nsp; + Eterm *nsp_end; + unsigned long ra; + unsigned long exnra; + unsigned int arity; + const struct sdesc *sdesc; + + nsp = p->hipe.nsp; + nsp_end = p->hipe.nstend; + arity = p->hipe.narity - NR_ARG_REGS; + if ((int)arity < 0) + arity = 0; + + ra = (unsigned long)p->hipe.nra; + + while (nsp < nsp_end) { + nsp += arity; /* skip actuals */ + if (ra == (unsigned long)&nbif_stack_trap_ra) + ra = (unsigned long)p->hipe.ngra; + sdesc = hipe_find_sdesc(ra); + if ((exnra = sdesc_exnra(sdesc)) != 0 && + (p->catches >= 0 || + exnra == (unsigned long)&nbif_fail)) { + p->hipe.ncallee = (void(*)(void)) exnra; + p->hipe.nsp = nsp; + p->hipe.narity = 0; + /* update the gray/white boundary if we threw past it */ + if (p->hipe.nstgraylim && nsp >= p->hipe.nstgraylim) + hipe_update_stack_trap(p, sdesc); + return; + } + nsp += sdesc_fsize(sdesc); /* skip locals */ + arity = sdesc_arity(sdesc); + ra = *nsp++; /* fetch & skip saved ra */ + } + fprintf(stderr, "%s: no native CATCH found!\r\n", __FUNCTION__); + abort(); +} + +int hipe_fill_stacktrace(Process *p, int depth, Eterm **trace) +{ + Eterm *nsp; + Eterm *nsp_end; + unsigned long ra, prev_ra; + unsigned int arity; + const struct sdesc *sdesc; + int i; + + if (depth < 1) + return 0; + + nsp = p->hipe.nsp; + nsp_end = p->hipe.nstend; + arity = p->hipe.narity - NR_ARG_REGS; + if ((int)arity < 0) + arity = 0; + + ra = (unsigned long)p->hipe.nra; + prev_ra = 0; + i = 0; + for (;;) { + if (ra == (unsigned long)nbif_stack_trap_ra) + ra = (unsigned long)p->hipe.ngra; + if (ra != prev_ra) { + trace[i] = (Eterm*)ra; + ++i; + if (i == depth) + break; + prev_ra = ra; + } + if (nsp >= nsp_end) + break; + sdesc = hipe_find_sdesc(ra); + nsp += arity + sdesc_fsize(sdesc); + arity = sdesc_arity(sdesc); + ra = *nsp++; + } + return i; +} diff --git a/erts/emulator/hipe/hipe_signal.h b/erts/emulator/hipe/hipe_signal.h new file mode 100644 index 0000000000..3c3c844d52 --- /dev/null +++ b/erts/emulator/hipe/hipe_signal.h @@ -0,0 +1,39 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2002-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + * hipe_signal.h + * + * Architecture-specific initialisation of Unix signals. + */ +#ifndef HIPE_SIGNAL_H +#define HIPE_SIGNAL_H + +#if defined(__i386__) || defined(__x86_64__) +extern void hipe_signal_init(void); +#else +static __inline__ void hipe_signal_init(void) { } +#endif + +#if defined(ERTS_SMP) && (defined(__i386__) || defined(__x86_64__)) +extern void hipe_thread_signal_init(void); +#else +static __inline__ void hipe_thread_signal_init(void) { } +#endif + +#endif /* HIPE_SIGNAL_H */ diff --git a/erts/emulator/hipe/hipe_sparc.c b/erts/emulator/hipe/hipe_sparc.c new file mode 100644 index 0000000000..661b42130a --- /dev/null +++ b/erts/emulator/hipe/hipe_sparc.c @@ -0,0 +1,243 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2003-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + */ +#include <stddef.h> /* offsetof() */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#include "global.h" +#include <sys/mman.h> + +#include "hipe_arch.h" +#include "hipe_native_bif.h" /* nbif_callemu() */ + +/* Flush dcache and invalidate icache for a range of addresses. */ +void hipe_flush_icache_range(void *address, unsigned int nbytes) +{ + char *a = (char*)address; + int n = nbytes; + + while (n > 0) { + hipe_flush_icache_word(a); + a += 4; + n -= 4; + } +} + +static void patch_sethi(Uint32 *address, unsigned int imm22) +{ + unsigned int insn = *address; + *address = (insn & 0xFFC00000) | (imm22 & 0x003FFFFF); + hipe_flush_icache_word(address); +} + +static void patch_ori(Uint32 *address, unsigned int imm10) +{ + /* address points to an OR reg,imm,reg insn */ + unsigned int insn = *address; + *address = (insn & 0xFFFFE000) | (imm10 & 0x3FF); + hipe_flush_icache_word(address); +} + +static void patch_sethi_ori(Uint32 *address, Uint32 value) +{ + patch_sethi(address, value >> 10); + patch_ori(address+1, value); +} + +void hipe_patch_load_fe(Uint32 *address, Uint32 value) +{ + patch_sethi_ori(address, value); +} + +int hipe_patch_insn(void *address, Uint32 value, Eterm type) +{ + switch (type) { + case am_load_mfa: + case am_atom: + case am_constant: + case am_closure: + case am_c_const: + break; + default: + return -1; + } + patch_sethi_ori((Uint32*)address, value); + return 0; +} + +int hipe_patch_call(void *callAddress, void *destAddress, void *trampoline) +{ + Uint32 relDest, newI; + + if (trampoline) + return -1; + relDest = (Uint32)((Sint32)destAddress - (Sint32)callAddress); + newI = (1 << 30) | (relDest >> 2); + *(Uint32*)callAddress = newI; + hipe_flush_icache_word(callAddress); + return 0; +} + +/* + * Memory allocator for executable code. + * + * This is required on x86 because some combinations + * of Linux kernels and CPU generations default to + * non-executable memory mappings, causing ordinary + * malloc() memory to be non-executable. + */ +static unsigned int code_bytes; +static char *code_next; + +#if 0 /* change to non-zero to get allocation statistics at exit() */ +static unsigned int total_mapped, nr_joins, nr_splits, total_alloc, nr_allocs, nr_large, total_lost; +static unsigned int atexit_done; + +static void alloc_code_stats(void) +{ + printf("\r\nalloc_code_stats: %u bytes mapped, %u joins, %u splits, %u bytes allocated, %u average alloc, %u large allocs, %u bytes lost\r\n", + total_mapped, nr_joins, nr_splits, total_alloc, nr_allocs ? total_alloc/nr_allocs : 0, nr_large, total_lost); +} + +static void atexit_alloc_code_stats(void) +{ + if (!atexit_done) { + atexit_done = 1; + (void)atexit(alloc_code_stats); + } +} + +#define ALLOC_CODE_STATS(X) do{X;}while(0) +#else +#define ALLOC_CODE_STATS(X) do{}while(0) +#endif + +static void morecore(unsigned int alloc_bytes) +{ + unsigned int map_bytes; + char *map_hint, *map_start; + + /* Page-align the amount to allocate. */ + map_bytes = (alloc_bytes + 4095) & ~4095; + + /* Round up small allocations. */ + if (map_bytes < 1024*1024) + map_bytes = 1024*1024; + else + ALLOC_CODE_STATS(++nr_large); + + /* Create a new memory mapping, ensuring it is executable + and in the low 2GB of the address space. Also attempt + to make it adjacent to the previous mapping. */ + map_hint = code_next + code_bytes; + if ((unsigned long)map_hint & 4095) + abort(); + map_start = mmap(map_hint, map_bytes, + PROT_EXEC|PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS +#ifdef __x86_64__ + |MAP_32BIT +#endif + , + -1, 0); + if (map_start == MAP_FAILED) { + perror("mmap"); + abort(); + } + ALLOC_CODE_STATS(total_mapped += map_bytes); + + /* Merge adjacent mappings, so the trailing portion of the previous + mapping isn't lost. In practice this is quite successful. */ + if (map_start == map_hint) { + ALLOC_CODE_STATS(++nr_joins); + code_bytes += map_bytes; + } else { + ALLOC_CODE_STATS(++nr_splits); + ALLOC_CODE_STATS(total_lost += code_bytes); + code_next = map_start; + code_bytes = map_bytes; + } + + ALLOC_CODE_STATS(atexit_alloc_code_stats()); +} + +static void *alloc_code(unsigned int alloc_bytes) +{ + void *res; + + /* Align function entries. */ + alloc_bytes = (alloc_bytes + 3) & ~3; + + if (code_bytes < alloc_bytes) + morecore(alloc_bytes); + ALLOC_CODE_STATS(++nr_allocs); + ALLOC_CODE_STATS(total_alloc += alloc_bytes); + res = code_next; + code_next += alloc_bytes; + code_bytes -= alloc_bytes; + return res; +} + +void *hipe_alloc_code(Uint nrbytes, Eterm callees, Eterm *trampolines, Process *p) +{ + if (is_not_nil(callees)) + return NULL; + *trampolines = NIL; + return alloc_code(nrbytes); +} + +/* called from hipe_bif0.c:hipe_bifs_make_native_stub_2() + and hipe_bif0.c:hipe_make_stub() */ +void *hipe_make_native_stub(void *beamAddress, unsigned int beamArity) +{ + unsigned int *code; + unsigned int callEmuOffset; + int i; + + code = alloc_code(5*sizeof(int)); + + /* sethi %hi(Address), %i4 */ + code[0] = 0x39000000 | (((unsigned int)beamAddress >> 10) & 0x3FFFFF); + /* or %g0, %o7, %i3 ! mov %o7, %i3 */ + code[1] = 0xB610000F; + /* or %i4, %lo(Address), %i4 */ + code[2] = 0xB8172000 | ((unsigned int)beamAddress & 0x3FF); + /* call callemu */ + callEmuOffset = (char*)nbif_callemu - (char*)&code[3]; + code[3] = (1 << 30) | ((callEmuOffset >> 2) & 0x3FFFFFFF); + /* or %g0, Arity, %i5 ! mov Arity, %i5 */ + code[4] = 0xBA102000 | (beamArity & 0x0FFF); + + /* flush I-cache as if by write_u32() */ + for (i = 0; i < 5; ++i) + hipe_flush_icache_word(&code[i]); + + return code; +} + +void hipe_arch_print_pcb(struct hipe_process_state *p) +{ +#define U(n,x) \ + printf(" % 4d | %s | 0x%0*lx | %*s |\r\n", (int)offsetof(struct hipe_process_state,x), n, 2*(int)sizeof(long), (unsigned long)p->x, 2+2*(int)sizeof(long), "") + U("nra ", nra); + U("narity ", narity); +#undef U +} diff --git a/erts/emulator/hipe/hipe_sparc.h b/erts/emulator/hipe/hipe_sparc.h new file mode 100644 index 0000000000..53cb18ee45 --- /dev/null +++ b/erts/emulator/hipe/hipe_sparc.h @@ -0,0 +1,54 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2003-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + */ +#ifndef HIPE_SPARC_H +#define HIPE_SPARC_H + +static __inline__ void hipe_flush_icache_word(void *address) +{ + asm volatile("flush %0" + : /* no outputs */ + : "r"(address) + : "memory"); +} + +extern void hipe_flush_icache_range(void *address, unsigned int nbytes); + +/* for stack descriptor hash lookup */ +#define HIPE_RA_LSR_COUNT 2 /* low 2 bits are always zero */ + +/* for hipe_bifs_{read,write}_{s,u}32 */ +static __inline__ int hipe_word32_address_ok(void *address) +{ + return ((unsigned long)address & 0x3) == 0; +} + +/* Native stack growth direction. */ +#define HIPE_NSTACK_GROWS_DOWN + +#define hipe_arch_name am_ultrasparc + +extern void hipe_sparc_inc_stack(void); + +/* for hipe_bifs_enter_code_2 */ +extern void *hipe_alloc_code(Uint nrbytes, Eterm callees, Eterm *trampolines, Process *p); +#define HIPE_ALLOC_CODE(n,c,t,p) hipe_alloc_code((n),(c),(t),(p)) + +#endif /* HIPE_SPARC_H */ diff --git a/erts/emulator/hipe/hipe_sparc.tab b/erts/emulator/hipe/hipe_sparc.tab new file mode 100644 index 0000000000..f192e1f81c --- /dev/null +++ b/erts/emulator/hipe/hipe_sparc.tab @@ -0,0 +1,23 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 2004-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# +# $Id$ +# SPARC-specific atoms + +atom inc_stack_0 +atom ultrasparc diff --git a/erts/emulator/hipe/hipe_sparc_abi.txt b/erts/emulator/hipe/hipe_sparc_abi.txt new file mode 100644 index 0000000000..d016a96c1c --- /dev/null +++ b/erts/emulator/hipe/hipe_sparc_abi.txt @@ -0,0 +1,78 @@ + + %CopyrightBegin% + %CopyrightEnd% + +$Id$ + +HiPE SPARC ABI +============== +This document describes aspects of HiPE's runtime system +that are specific for the SPARC architecture. + +Register Usage +-------------- +%g6, %g7, %o6 (%sp), and %i6 (%fp) are reserved for the C runtime system. + +%i0-%i2 are fixed (unallocatable). +%i0 (P) is the current process' "Process" pointer. +%i1 (NSP) is the current process' native stack pointer. +%i2 (HP) is the current process' heap pointer. + +%g1-%g5, %o0-%o5, %o7 (RA), %l0-%l7, %i3-%i5, and %i7 are caller-save. +They are used as temporary scratch registers and for function call +parameters and results. + +The runtime system uses temporaries in specific contexts: +%i5 (TEMP_ARG1) is used to pass the callee arity in native-to-BEAM traps. +%i4 (TEMP_ARG0) is used to preserve the return value in nbif_stack_trap_ra, +preserve RA in hipe_sparc_inc_stack (the caller saved its RA in +TEMP_RA), to pass the callee address in native-to-BEAM traps, +and to contain the target in BEAM-to-native calls. +%i3 (TEMP_RA) is used to preserve RA around BIF calls. +%o1 (ARG0) is used for MBUF-after-BIF checks, for storing the +arity of a BIF that throws an exception or does GC due to MBUF, +and for checking P->flags for pending timeout. +%o0 is used to inspect the type of a thrown exception, and to +return a result token from glue.S back to hipe_mode_switch(). + +Calling Convention +------------------ +The first NR_ARG_REGS parameters (a tunable parameter between 0 and 6, +inclusive) are passed in %o1-%o5 and %o0. + +%o0 is not used for parameter passing. This allows the BIF wrappers to +simply move P to %o0 without shifting the remaining parameter registers. + +%o7 (RA) contains the return address during function calls. + +The return value from a function is placed in %o0. + +Stack Frame Layout +Stack Descriptors +----------------- +Same as AMD64/ARM/PowerPC/x86. + +Standard SPARC Calling Conventions +================================== + +Reg Status Role +--- ------ ---- +%g0 reserved constant 0 +%g1-%g5 calleR-save volatile +%g6-%g7 reserved thread register? got? +%o0 calleR-save volatile, parameter, return value +%o1-%o5 calleR-save volatile, parameters +%o6 reserved stack pointer, 8-byte aligned +%o7 reserved return address +%l0-%l7 calleE-save local variables +%i0-%i5 calleE-save input parameters, local variables +%i6 calleE-save frame pointer (caller's stack pointer) +%i7 calleE-save input return address, local variable + +The stack grows from high to low addresses. +Excess parameters are stored on the stack, at %sp+92 and up. + +See also: +http://soldc.sun.com/articles/sparcv9abi.html +http://www.users.qwest.net/~eballen1/sparc.tech.links.html +http://compilers.iecc.com/comparch/article/93-12-073 diff --git a/erts/emulator/hipe/hipe_sparc_asm.m4 b/erts/emulator/hipe/hipe_sparc_asm.m4 new file mode 100644 index 0000000000..7a4403ac09 --- /dev/null +++ b/erts/emulator/hipe/hipe_sparc_asm.m4 @@ -0,0 +1,214 @@ +changecom(`/*', `*/')dnl +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2007-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* + * $Id$ + */ +`#ifndef HIPE_SPARC_ASM_H +#define HIPE_SPARC_ASM_H' + +/* + * Tunables. + */ +define(LEAF_WORDS,16)dnl number of stack words for leaf functions +define(NR_ARG_REGS,4)dnl admissible values are 0 to 6, inclusive + +`#define SPARC_LEAF_WORDS 'LEAF_WORDS + +/* + * Reserved registers. + */ +`#define RA %o7' +`#define P %i0' +`#define NSP %i1' +`#define HP %i2' +`#define TEMP_RA %i3' + +/* + * Context switching macros. + * + * RESTORE_CONTEXT and RESTORE_CONTEXT_QUICK do not affect + * the condition register. + */ +`#define SAVE_CONTEXT_QUICK \ + mov RA, TEMP_RA' + +`#define RESTORE_CONTEXT_QUICK \ + mov TEMP_RA, RA' + +`#define SAVE_CACHED_STATE \ + st HP, [P+P_HP]; \ + st NSP, [P+P_NSP]' + +`#define RESTORE_CACHED_STATE \ + ld [P+P_HP], HP; \ + ld [P+P_NSP], NSP' + +`#define SAVE_CONTEXT_BIF \ + mov RA, TEMP_RA; \ + st HP, [P+P_HP]' + +`#define RESTORE_CONTEXT_BIF \ + mov TEMP_RA, RA; /* XXX unnecessary */\ + ld [P+P_HP], HP' + +`#define SAVE_CONTEXT_GC \ + mov RA, TEMP_RA; \ + st RA, [P+P_NRA]; \ + st NSP, [P+P_NSP]; \ + st HP, [P+P_HP]' + +`#define RESTORE_CONTEXT_GC \ + mov TEMP_RA, RA; /* XXX unnecessary */\ + ld [P+P_HP], HP' + +/* + * Argument (parameter) registers. + */ +`#define SPARC_NR_ARG_REGS 'NR_ARG_REGS +`#define NR_ARG_REGS 'NR_ARG_REGS + +define(defarg,`define(ARG$1,`$2')dnl +#`define ARG'$1 $2' +)dnl + +ifelse(eval(NR_ARG_REGS >= 1),0,, +`defarg(0,`%o1')')dnl +ifelse(eval(NR_ARG_REGS >= 2),0,, +`defarg(1,`%o2')')dnl +ifelse(eval(NR_ARG_REGS >= 3),0,, +`defarg(2,`%o3')')dnl +ifelse(eval(NR_ARG_REGS >= 4),0,, +`defarg(3,`%o4')')dnl +ifelse(eval(NR_ARG_REGS >= 5),0,, +`defarg(4,`%o5')')dnl +ifelse(eval(NR_ARG_REGS >= 6),0,, +`defarg(5,`%o0')')dnl + +/* + * TEMP_ARG0: + * Used in nbif_stack_trap_ra to preserve the return value. + * Must be a C callee-save register. + * Must be otherwise unused in the return path. + * + * TEMP_ARG0: + * Used in hipe_sparc_inc_stack to preserve the return address + * (TEMP_RA contains the caller's saved return address). + * Must be a C callee-save register. + * Must be otherwise unused in the call path. + * + * TEMP_ARG0: + * Used to pass the callee address in native-to-BEAM traps + * (nbif_callemu). + * Must be otherwise unused in the call path. + * + * TEMP_ARG1: + * Used to pass the callee arity in native-to-BEAM traps + * (nbif_callemu). + * Must be otherwise unused in the call path. + */ +`#define TEMP_ARG0 %i4' +`#define TEMP_ARG1 %i5' + +dnl XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +dnl X X +dnl X hipe_sparc_glue.S support X +dnl X X +dnl XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX + +dnl +dnl LOAD_ARG_REGS +dnl +define(LAR_1,`ld [P+P_ARG$1], ARG$1 ; ')dnl +define(LAR_N,`ifelse(eval($1 >= 0),0,,`LAR_N(eval($1-1))LAR_1($1)')')dnl +define(LOAD_ARG_REGS,`LAR_N(eval(NR_ARG_REGS-1))')dnl +`#define LOAD_ARG_REGS 'LOAD_ARG_REGS + +dnl +dnl STORE_ARG_REGS +dnl +define(SAR_1,`st ARG$1, [P+P_ARG$1] ; ')dnl +define(SAR_N,`ifelse(eval($1 >= 0),0,,`SAR_N(eval($1-1))SAR_1($1)')')dnl +define(STORE_ARG_REGS,`SAR_N(eval(NR_ARG_REGS-1))')dnl +`#define STORE_ARG_REGS 'STORE_ARG_REGS + +dnl XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +dnl X X +dnl X hipe_arm_bifs.m4 support X +dnl X X +dnl XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX + +dnl +dnl NBIF_ARG(DST,ARITY,ARGNO) +dnl Access a formal parameter. +dnl It will be a memory load via NSP when ARGNO >= NR_ARG_REGS. +dnl It will be a register move when 0 <= ARGNO < NR_ARG_REGS; if +dnl the source and destination are the same, the move is suppressed. +dnl +define(NBIF_MOVE_REG,`ifelse($1,$2,`! mov $2, $1',`mov $2, $1')')dnl +define(NBIF_REG_ARG,`NBIF_MOVE_REG($1,ARG$2)')dnl +define(NBIF_STK_LOAD,`ld [NSP+$2], $1')dnl +define(NBIF_STK_ARG,`NBIF_STK_LOAD($1,eval(4*(($2-$3)-1)))')dnl +define(NBIF_ARG,`ifelse(eval($3 >= NR_ARG_REGS),0,`NBIF_REG_ARG($1,$3)',`NBIF_STK_ARG($1,$2,$3)')')dnl +`/* #define NBIF_ARG_1_0 'NBIF_ARG(r1,1,0)` */' +`/* #define NBIF_ARG_2_0 'NBIF_ARG(r1,2,0)` */' +`/* #define NBIF_ARG_2_1 'NBIF_ARG(r2,2,1)` */' +`/* #define NBIF_ARG_3_0 'NBIF_ARG(r1,3,0)` */' +`/* #define NBIF_ARG_3_1 'NBIF_ARG(r2,3,1)` */' +`/* #define NBIF_ARG_3_2 'NBIF_ARG(r3,3,2)` */' +`/* #define NBIF_ARG_5_0 'NBIF_ARG(r1,5,0)` */' +`/* #define NBIF_ARG_5_1 'NBIF_ARG(r2,5,1)` */' +`/* #define NBIF_ARG_5_2 'NBIF_ARG(r3,5,2)` */' +`/* #define NBIF_ARG_5_3 'NBIF_ARG(r4,5,3)` */' +`/* #define NBIF_ARG_5_4 'NBIF_ARG(r5,5,4)` */' + +dnl +dnl NBIF_RET(ARITY) +dnl Generates a return from a native BIF, taking care to pop +dnl any stacked formal parameters. +dnl May only be used in BIF/primop wrappers where SAVE_CONTEXT +dnl has saved RA in TEMP_RA. +dnl +define(NSP_RETN,`jmpl TEMP_RA+8, %g0 + add NSP, $1, NSP')dnl +define(NSP_RET0,`jmpl TEMP_RA+8, %g0 + nop')dnl +define(RET_POP,`ifelse(eval($1 > NR_ARG_REGS),0,0,eval(4*($1 - NR_ARG_REGS)))')dnl +define(NBIF_RET_N,`ifelse(eval($1),0,`NSP_RET0',`NSP_RETN($1)')')dnl +define(NBIF_RET,`NBIF_RET_N(eval(RET_POP($1)))')dnl +`/* #define NBIF_RET_0 'NBIF_RET(0)` */' +`/* #define NBIF_RET_1 'NBIF_RET(1)` */' +`/* #define NBIF_RET_2 'NBIF_RET(2)` */' +`/* #define NBIF_RET_3 'NBIF_RET(3)` */' +`/* #define NBIF_RET_5 'NBIF_RET(5)` */' + +dnl +dnl QUICK_CALL_RET(CFUN,ARITY) +dnl Used in nocons_nofail and noproc primop interfaces to optimise +dnl SAVE_CONTEXT_QUICK; call CFUN; nop; RESTORE_CONTEXT_QUICK; NBIF_RET(ARITY). +dnl +define(NBIF_POP_N,`ifelse(eval($1),0,`nop',`add NSP, $1, NSP')')dnl +define(QUICK_CALL_RET,`ba $1; NBIF_POP_N(eval(RET_POP($2)))')dnl +`/* #define QUICK_CALL_RET_F_0 'QUICK_CALL_RET(F,0)` */' +`/* #define QUICK_CALL_RET_F_1 'QUICK_CALL_RET(F,1)` */' +`/* #define QUICK_CALL_RET_F_2 'QUICK_CALL_RET(F,2)` */' +`/* #define QUICK_CALL_RET_F_3 'QUICK_CALL_RET(F,3)` */' +`/* #define QUICK_CALL_RET_F_5 'QUICK_CALL_RET(F,5)` */' + +`#endif /* HIPE_SPARC_ASM_H */' diff --git a/erts/emulator/hipe/hipe_sparc_bifs.m4 b/erts/emulator/hipe/hipe_sparc_bifs.m4 new file mode 100644 index 0000000000..f3753b3847 --- /dev/null +++ b/erts/emulator/hipe/hipe_sparc_bifs.m4 @@ -0,0 +1,578 @@ +changecom(`/*', `*/')dnl +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2001-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* + * $Id$ + */ + +include(`hipe/hipe_sparc_asm.m4') +#`include' "hipe_literals.h" + + .section ".text" + .align 4 + +/* + * Test for exception. This macro executes its delay slot. + */ +`#define __TEST_GOT_EXN(LABEL) cmp %o0, THE_NON_VALUE; bz,pn %icc, LABEL +#define TEST_GOT_EXN(ARITY) __TEST_GOT_EXN(JOIN3(nbif_,ARITY,_simple_exception))' + +`#define TEST_GOT_MBUF ld [P+P_MBUF], %o1; cmp %o1, 0; bne 3f; nop; 2: +#define JOIN3(A,B,C) A##B##C +#define HANDLE_GOT_MBUF(ARITY) 3: call JOIN3(nbif_,ARITY,_gc_after_bif); nop; b 2b; nop' + +/* + * standard_bif_interface_1(nbif_name, cbif_name) + * standard_bif_interface_2(nbif_name, cbif_name) + * standard_bif_interface_3(nbif_name, cbif_name) + * + * Generate native interface for a BIF with 1-3 parameters and + * standard failure mode. + */ +define(standard_bif_interface_1, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + mov P, %o0 + NBIF_ARG(%o1,1,0) + + /* Save caller-save registers and call the C function. */ + SAVE_CONTEXT_BIF + call $2 + nop + TEST_GOT_MBUF + + /* Restore registers. Check for exception. */ + TEST_GOT_EXN(1) + RESTORE_CONTEXT_BIF + NBIF_RET(1) + HANDLE_GOT_MBUF(1) + .size $1, .-$1 + .type $1, #function +#endif') + +define(standard_bif_interface_2, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + mov P, %o0 + NBIF_ARG(%o1,2,0) + NBIF_ARG(%o2,2,1) + + /* Save caller-save registers and call the C function. */ + SAVE_CONTEXT_BIF + call $2 + nop + TEST_GOT_MBUF + + /* Restore registers. Check for exception. */ + TEST_GOT_EXN(2) + RESTORE_CONTEXT_BIF + NBIF_RET(2) + HANDLE_GOT_MBUF(2) + .size $1, .-$1 + .type $1, #function +#endif') + +define(standard_bif_interface_3, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + mov P, %o0 + NBIF_ARG(%o1,3,0) + NBIF_ARG(%o2,3,1) + NBIF_ARG(%o3,3,2) + + /* Save caller-save registers and call the C function. */ + SAVE_CONTEXT_BIF + call $2 + nop + TEST_GOT_MBUF + + /* Restore registers. Check for exception. */ + TEST_GOT_EXN(3) + RESTORE_CONTEXT_BIF + NBIF_RET(3) + HANDLE_GOT_MBUF(3) + .size $1, .-$1 + .type $1, #function +#endif') + +/* + * fail_bif_interface_0(nbif_name, cbif_name) + * + * Generate native interface for a BIF with 0 parameters and + * standard failure mode. + */ +define(fail_bif_interface_0, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + mov P, %o0 + + /* Save caller-save registers and call the C function. */ + SAVE_CONTEXT_BIF + call $2 + nop + TEST_GOT_MBUF + + /* Restore registers. Check for exception. */ + TEST_GOT_EXN(0) + RESTORE_CONTEXT_BIF + NBIF_RET(0) + HANDLE_GOT_MBUF(0) + .size $1, .-$1 + .type $1, #function +#endif') + +/* + * gc_bif_interface_0(nbif_name, cbif_name) + * gc_bif_interface_1(nbif_name, cbif_name) + * gc_bif_interface_2(nbif_name, cbif_name) + * + * Generate native interface for a BIF with 0-2 parameters and + * standard failure mode. + * The BIF may do a GC. + */ +define(gc_bif_interface_0, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + mov P, %o0 + + /* Save caller-save registers and call the C function. */ + SAVE_CONTEXT_GC + call $2 + nop + TEST_GOT_MBUF + + /* Restore registers. */ + RESTORE_CONTEXT_GC + NBIF_RET(0) + HANDLE_GOT_MBUF(0) + .size $1, .-$1 + .type $1, #function +#endif') + +define(gc_bif_interface_1, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + mov P, %o0 + NBIF_ARG(%o1,1,0) + + /* Save caller-save registers and call the C function. */ + SAVE_CONTEXT_GC + call $2 + nop + TEST_GOT_MBUF + + /* Restore registers. Check for exception. */ + TEST_GOT_EXN(1) + RESTORE_CONTEXT_GC + NBIF_RET(1) + HANDLE_GOT_MBUF(1) + .size $1, .-$1 + .type $1, #function +#endif') + +define(gc_bif_interface_2, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + mov P, %o0 + NBIF_ARG(%o1,2,0) + NBIF_ARG(%o2,2,1) + + /* Save caller-save registers and call the C function. */ + SAVE_CONTEXT_GC + call $2 + nop + TEST_GOT_MBUF + + /* Restore registers. Check for exception. */ + TEST_GOT_EXN(2) + RESTORE_CONTEXT_GC + NBIF_RET(2) + HANDLE_GOT_MBUF(2) + .size $1, .-$1 + .type $1, #function +#endif') + +/* + * gc_nofail_primop_interface_1(nbif_name, cbif_name) + * + * Generate native interface for a primop with implicit P + * parameter, 1 ordinary parameter and no failure mode. + * The primop may do a GC. + */ +define(gc_nofail_primop_interface_1, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + mov P, %o0 + NBIF_ARG(%o1,1,0) + + /* Save caller-save registers and call the C function. */ + SAVE_CONTEXT_GC + call $2 + nop + + /* Restore register. */ + RESTORE_CONTEXT_GC + NBIF_RET(1) + .size $1, .-$1 + .type $1, #function +#endif') + +/* + * nofail_primop_interface_0(nbif_name, cbif_name) + * nofail_primop_interface_1(nbif_name, cbif_name) + * nofail_primop_interface_2(nbif_name, cbif_name) + * nofail_primop_interface_3(nbif_name, cbif_name) + * + * Generate native interface for a primop with implicit P + * parameter, 0-3 ordinary parameters and no failure mode. + * Also used for guard BIFs. + */ +define(nofail_primop_interface_0, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + mov P, %o0 + + /* Save caller-save registers and call the C function. */ + SAVE_CONTEXT_BIF + call $2 + nop + TEST_GOT_MBUF + + /* Restore registers. */ + RESTORE_CONTEXT_BIF + NBIF_RET(0) + HANDLE_GOT_MBUF(0) + .size $1, .-$1 + .type $1, #function +#endif') + +define(nofail_primop_interface_1, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + mov P, %o0 + NBIF_ARG(%o1,1,0) + + /* Save caller-save registers and call the C function. */ + SAVE_CONTEXT_BIF + call $2 + nop + TEST_GOT_MBUF + + /* Restore registers. */ + RESTORE_CONTEXT_BIF + NBIF_RET(1) + HANDLE_GOT_MBUF(1) + .size $1, .-$1 + .type $1, #function +#endif') + +define(nofail_primop_interface_2, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + mov P, %o0 + NBIF_ARG(%o1,2,0) + NBIF_ARG(%o2,2,1) + + /* Save caller-save registers and call the C function. */ + SAVE_CONTEXT_BIF + call $2 + nop + TEST_GOT_MBUF + + /* Restore registers. */ + RESTORE_CONTEXT_BIF + NBIF_RET(2) + HANDLE_GOT_MBUF(2) + .size $1, .-$1 + .type $1, #function +#endif') + +define(nofail_primop_interface_3, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + mov P, %o0 + NBIF_ARG(%o1,3,0) + NBIF_ARG(%o2,3,1) + NBIF_ARG(%o3,3,2) + + /* Save caller-save registers and call the C function. */ + SAVE_CONTEXT_BIF + call $2 + nop + TEST_GOT_MBUF + + /* Restore registers. */ + RESTORE_CONTEXT_BIF + NBIF_RET(3) + HANDLE_GOT_MBUF(3) + .size $1, .-$1 + .type $1, #function +#endif') + +/* + * nocons_nofail_primop_interface_0(nbif_name, cbif_name) + * nocons_nofail_primop_interface_1(nbif_name, cbif_name) + * nocons_nofail_primop_interface_2(nbif_name, cbif_name) + * nocons_nofail_primop_interface_3(nbif_name, cbif_name) + * nocons_nofail_primop_interface_5(nbif_name, cbif_name) + * + * Generate native interface for a primop with implicit P + * parameter, 0-3 or 5 ordinary parameters, and no failure mode. + * The primop cannot CONS or gc. + */ +define(nocons_nofail_primop_interface_0, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + mov P, %o0 + + /* Perform a quick save;call;restore;ret sequence. */ + QUICK_CALL_RET($2,0) + nop + .size $1, .-$1 + .type $1, #function +#endif') + +define(nocons_nofail_primop_interface_1, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + mov P, %o0 + NBIF_ARG(%o1,1,0) + + /* Perform a quick save;call;restore;ret sequence. */ + QUICK_CALL_RET($2,1) + nop + .size $1, .-$1 + .type $1, #function +#endif') + +define(nocons_nofail_primop_interface_2, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + mov P, %o0 + NBIF_ARG(%o1,2,0) + NBIF_ARG(%o2,2,1) + + /* Perform a quick save;call;restore;ret sequence. */ + QUICK_CALL_RET($2,2) + nop + .size $1, .-$1 + .type $1, #function +#endif') + +define(nocons_nofail_primop_interface_3, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + mov P, %o0 + NBIF_ARG(%o1,3,0) + NBIF_ARG(%o2,3,1) + NBIF_ARG(%o3,3,2) + + /* Perform a quick save;call;restore;ret sequence. */ + QUICK_CALL_RET($2,3) + nop + .size $1, .-$1 + .type $1, #function +#endif') + +define(nocons_nofail_primop_interface_5, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + mov P, %o0 + NBIF_ARG(%o1,5,0) + NBIF_ARG(%o2,5,1) + NBIF_ARG(%o3,5,2) + NBIF_ARG(%o4,5,3) + NBIF_ARG(%o5,5,4) + + /* Perform a quick save;call;restore;ret sequence. */ + QUICK_CALL_RET($2,5) + nop + .size $1, .-$1 + .type $1, #function +#endif') + +/* + * noproc_primop_interface_0(nbif_name, cbif_name) + * noproc_primop_interface_1(nbif_name, cbif_name) + * noproc_primop_interface_2(nbif_name, cbif_name) + * noproc_primop_interface_3(nbif_name, cbif_name) + * noproc_primop_interface_5(nbif_name, cbif_name) + * + * Generate native interface for a primop with no implicit P + * parameter, 0-3 or 5 ordinary parameters, and no failure mode. + * The primop cannot CONS or gc. + */ +define(noproc_primop_interface_0, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* XXX: this case is always trivial; how to suppress the branch? */ + /* Perform a quick save;call;restore;ret sequence. */ + QUICK_CALL_RET($2,0) + nop + .size $1, .-$1 + .type $1, #function +#endif') + +define(noproc_primop_interface_1, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + NBIF_ARG(%o0,1,0) + + /* Perform a quick save;call;restore;ret sequence. */ + QUICK_CALL_RET($2,1) + nop + .size $1, .-$1 + .type $1, #function +#endif') + +define(noproc_primop_interface_2, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + NBIF_ARG(%o0,2,0) + NBIF_ARG(%o1,2,1) + + /* Perform a quick save;call;restore;ret sequence. */ + QUICK_CALL_RET($2,2) + nop + .size $1, .-$1 + .type $1, #function +#endif') + +define(noproc_primop_interface_3, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + NBIF_ARG(%o0,3,0) + NBIF_ARG(%o1,3,1) + NBIF_ARG(%o2,3,2) + + /* Perform a quick save;call;restore;ret sequence. */ + QUICK_CALL_RET($2,3) + nop + .size $1, .-$1 + .type $1, #function +#endif') + +define(noproc_primop_interface_5, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + .global $1 +$1: + /* Set up C argument registers. */ + NBIF_ARG(%o0,5,0) + NBIF_ARG(%o1,5,1) + NBIF_ARG(%o2,5,2) + NBIF_ARG(%o3,5,3) + NBIF_ARG(%o4,5,4) + + /* Perform a quick save;call;restore;ret sequence. */ + QUICK_CALL_RET($2,5) + nop + .size $1, .-$1 + .type $1, #function +#endif') + +include(`hipe/hipe_bif_list.m4') + +`#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif' diff --git a/erts/emulator/hipe/hipe_sparc_gc.h b/erts/emulator/hipe/hipe_sparc_gc.h new file mode 100644 index 0000000000..9035f5baee --- /dev/null +++ b/erts/emulator/hipe/hipe_sparc_gc.h @@ -0,0 +1,29 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2004-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + * Stack walking helpers for native stack GC procedures. + * SPARC version. + */ +#ifndef HIPE_SPARC_GC_H +#define HIPE_SPARC_GC_H + +#include "hipe_sparc_asm.h" /* for NR_ARG_REGS */ +#include "hipe_risc_gc.h" + +#endif /* HIPE_SPARC_GC_H */ diff --git a/erts/emulator/hipe/hipe_sparc_glue.S b/erts/emulator/hipe/hipe_sparc_glue.S new file mode 100644 index 0000000000..d1af5c43f5 --- /dev/null +++ b/erts/emulator/hipe/hipe_sparc_glue.S @@ -0,0 +1,448 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2001-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + */ +#include "hipe_sparc_asm.h" +#include "hipe_literals.h" +#define ASM +#include "hipe_mode_switch.h" + + .section ".text" + .align 4 + +/* + * Enter Erlang from C. + * Switch to a new register window. + * Create a new frame on the C stack. + * Save C return address in the frame. + * Retrieve the process pointer from the C argument registers. + */ +#define ENTER_FROM_C \ + save %sp, -112, %sp; \ + st %i7, [%sp+96] + +/* + * Return to the calling C function. + * The return value is in %o0. + * + * .flush_exit saves NSP and other cached P state. + * .suspend_exit also saves RA. + */ +.suspend_exit: + /* save RA, so we can be resumed */ + st RA, [P+P_NRA] +.flush_exit: + /* restore C return address (hoisted to avoid stall) */ + ld [%sp+96], %i7 + /* flush cached P state */ + SAVE_CACHED_STATE + /* restore callee-save registers, drop frame, return */ + jmp %i7+8 /* ret */ + restore %g0, %o0, %o0 /* kills P, moves our %o0 to caller's %o0 */ + +/* + * int hipe_sparc_call_to_native(Process *p); + * Emulated code recursively calls native code. + */ + .global hipe_sparc_call_to_native + .type hipe_sparc_call_to_native, #function + .proc 04 /* ??? */ +hipe_sparc_call_to_native: + ENTER_FROM_C + /* prepare to call the target */ + ld [P+P_NCALLEE], TEMP_ARG0 + /* get argument registers */ + LOAD_ARG_REGS + /* cache some P state in registers */ + RESTORE_CACHED_STATE +/* FALLTHROUGH + * + * We export this return address so that hipe_mode_switch() can discover + * when native code tailcalls emulated code. + * Note: this is SPARC, so the value in the return address register + * is the address of the call/jmpl instruction itself. + */ + .global nbif_return +nbif_return: + /* call the target */ + jmpl TEMP_ARG0, RA + nop +/* FALLTHROUGH + * + * This is where native code returns to emulated code. + */ + st %o0, [P+P_ARG0] /* save retval */ + ba .flush_exit + mov HIPE_MODE_SWITCH_RES_RETURN, %o0 + +/* + * int hipe_sparc_return_to_native(Process *p); + * Emulated code returns to its native code caller. + */ + .global hipe_sparc_return_to_native + .type hipe_sparc_return_to_native, #function + .proc 04 /* ??? */ +hipe_sparc_return_to_native: + ENTER_FROM_C + /* restore return address */ + ld [P+P_NRA], RA + /* cache some P state in registers */ + RESTORE_CACHED_STATE + /* + * Return using the current return address. + * The parameters were popped at the original native-to-emulated + * call (hipe_call_from_native_is_recursive), so a plain ret suffices. + */ + jmp RA+8 + ld [P+P_ARG0], %o0 /* delay slot: get return value */ + +/* + * int hipe_sparc_tailcall_to_native(Process *); + * Emulated code tailcalls native code. + */ + .global hipe_sparc_tailcall_to_native + .type hipe_sparc_tailcall_to_native, #function + .proc 04 /* ??? */ +hipe_sparc_tailcall_to_native: + ENTER_FROM_C + /* prepare to call the target */ + ld [P+P_NCALLEE], TEMP_ARG0 + /* get argument registers */ + LOAD_ARG_REGS + /* cache some P state in registers */ + RESTORE_CACHED_STATE + /* call the target */ + jmp TEMP_ARG0 + ld [P+P_NRA], RA /* delay slot: restore return address */ + +/* + * int hipe_sparc_throw_to_native(Process *p); + * Emulated code throws an exception to its native code caller. + */ + .align 4 + .global hipe_sparc_throw_to_native + .type hipe_sparc_throw_to_native, #function + .proc 04 /* ??? */ +hipe_sparc_throw_to_native: + ENTER_FROM_C + /* prepare to invoke handler */ + ld [P+P_NCALLEE], TEMP_ARG0 /* set by hipe_find_handler() */ + /* cache some P state in registers */ + RESTORE_CACHED_STATE + /* invoke the handler */ + jmp TEMP_ARG0 + nop + +/* + * Native code calls emulated code via a stub + * which should look as follows: + * + * stub for f/N: + * sethi %hi(f's BEAM code address), TEMP_ARG0 + * mov RA, TEMP_RA ! because the call below clobbers RA (%o7) + * or TEMP_ARG0, %lo(f's BEAM code address), TEMP_ARG0 + * call nbif_callemu ! clobbers RA! + * mov N, TEMP_ARG1 ! delay slot: TEMP_ARG1 := ARITY + * + * XXX. Different stubs for different number of register parameters? + */ + .global nbif_callemu +nbif_callemu: + st TEMP_ARG0, [P+P_BEAM_IP] + st TEMP_ARG1, [P+P_ARITY] + st TEMP_RA, [P+P_NRA] + STORE_ARG_REGS + ba .flush_exit + mov HIPE_MODE_SWITCH_RES_CALL, %o0 + +/* + * nbif_apply + */ + .global nbif_apply +nbif_apply: + STORE_ARG_REGS + ba .suspend_exit + mov HIPE_MODE_SWITCH_RES_APPLY, %o0 + +/* + * Native code calls an emulated-mode closure via a stub defined below. + * + * The closure is appended as the last actual parameter, and parameters + * beyond the first few passed in registers are pushed onto the stack in + * left-to-right order. + * Hence, the location of the closure parameter only depends on the number + * of parameters in registers, not the total number of parameters. + */ +#if NR_ARG_REGS >= 6 + .global nbif_ccallemu6 +nbif_ccallemu6: + st ARG5, [P+P_ARG5] +#if NR_ARG_REGS > 6 + mov ARG6, ARG5 +#else + ld [NSP+0], ARG5 +#endif + /*FALLTHROUGH*/ +#endif + +#if NR_ARG_REGS >= 5 + .global nbif_ccallemu5 +nbif_ccallemu5: + st ARG4, [P+P_ARG4] +#if NR_ARG_REGS > 5 + mov ARG5, ARG4 +#else + ld [NSP+0], ARG4 +#endif + /*FALLTHROUGH*/ +#endif + +#if NR_ARG_REGS >= 4 + .global nbif_ccallemu4 +nbif_ccallemu4: + st ARG3, [P+P_ARG3] +#if NR_ARG_REGS > 4 + mov ARG4, ARG3 +#else + ld [NSP+0], ARG3 +#endif + /*FALLTHROUGH*/ +#endif + +#if NR_ARG_REGS >= 3 + .global nbif_ccallemu3 +nbif_ccallemu3: + st ARG2, [P+P_ARG2] +#if NR_ARG_REGS > 3 + mov ARG3, ARG2 +#else + ld [NSP+0], ARG2 +#endif + /*FALLTHROUGH*/ +#endif + +#if NR_ARG_REGS >= 2 + .global nbif_ccallemu2 +nbif_ccallemu2: + st ARG1, [P+P_ARG1] +#if NR_ARG_REGS > 2 + mov ARG2, ARG1 +#else + ld [NSP+0], ARG1 +#endif + /*FALLTHROUGH*/ +#endif + +#if NR_ARG_REGS >= 1 + .global nbif_ccallemu1 +nbif_ccallemu1: + st ARG0, [P+P_ARG0] +#if NR_ARG_REGS > 1 + mov ARG1, ARG0 +#else + ld [NSP+0], ARG0 +#endif + /*FALLTHROUGH*/ +#endif + + .global nbif_ccallemu0 +nbif_ccallemu0: + /* We use %o1 not ARG0 here because ARG0 is not + defined when NR_ARG_REGS == 0. */ +#if NR_ARG_REGS == 0 + ld [NSP+0], %o1 /* get the closure */ +#endif + st %o1, [P+P_CLOSURE] /* save the closure */ + ba .suspend_exit + mov HIPE_MODE_SWITCH_RES_CALL_CLOSURE, %o0 + +/* + * This is where native code suspends. + */ + .global nbif_suspend_0 +nbif_suspend_0: + ba .suspend_exit + mov HIPE_MODE_SWITCH_RES_SUSPEND, %o0 + +/* + * Suspend from a receive (waiting for a message) + */ + .global nbif_suspend_msg +nbif_suspend_msg: + ba .suspend_exit + mov HIPE_MODE_SWITCH_RES_WAIT, %o0 + +/* + * Suspend from a receive with a timeout (waiting for a message) + * if (!(p->flags & F_TIMO)) { suspend } + * else { return 0; } + */ + .global nbif_suspend_msg_timeout +nbif_suspend_msg_timeout: + ld [P+P_FLAGS], %o1 + /* this relies on F_TIMO (1<<2) fitting in a simm13 */ + andcc %o1, F_TIMO, %g0 + bz,a .suspend_exit + mov HIPE_MODE_SWITCH_RES_WAIT_TIMEOUT, %o0 /* delay slot */ + /* timeout has occurred */ + jmp RA+8 + mov 0, %o0 + +/* + * This is the default exception handler for native code. + */ + .global nbif_fail +nbif_fail: + ba .flush_exit + mov HIPE_MODE_SWITCH_RES_THROW, %o0 + + .global nbif_0_gc_after_bif + .global nbif_1_gc_after_bif + .global nbif_2_gc_after_bif + .global nbif_3_gc_after_bif +nbif_0_gc_after_bif: + ba .gc_after_bif + mov 0, %o1 /* delay slot */ +nbif_1_gc_after_bif: + ba .gc_after_bif + mov 1, %o1 /* delay slot */ +nbif_2_gc_after_bif: + ba .gc_after_bif + mov 2, %o1 /* delay slot */ +nbif_3_gc_after_bif: + mov 3, %o1 + /*FALLTHROUGH*/ +.gc_after_bif: + st %o1, [P+P_NARITY] + st TEMP_RA, [P+P_NRA] + st NSP, [P+P_NSP] + mov RA, TEMP_RA + mov %o0, %o1 + call erts_gc_after_bif_call + mov P, %o0 /* delay slot */ + mov TEMP_RA, RA + ld [P+P_NRA], TEMP_RA + jmp RA+8 + st %g0, [P+P_NARITY] /* delay slot */ + +/* + * We end up here when a BIF called from native signals an + * exceptional condition. + * HP has not been read from P. + * NSP has not been saved in P. + * TEMP_LR contains a copy of LR + */ + .global nbif_0_simple_exception +nbif_0_simple_exception: + ba .nbif_simple_exception + mov 0, %o1 /* delay slot */ + .global nbif_1_simple_exception +nbif_1_simple_exception: + ba .nbif_simple_exception + mov 1, %o1 /* delay slot */ + .global nbif_2_simple_exception +nbif_2_simple_exception: + ba .nbif_simple_exception + mov 2, %o1 /* delay slot */ + .global nbif_3_simple_exception +nbif_3_simple_exception: + mov 3, %o1 + /*FALLTHROUGH*/ +.nbif_simple_exception: + ld [P+P_FREASON], %o0 + cmp %o0, FREASON_TRAP + beq .handle_trap + nop + /* + * Find and invoke catch handler (it must exist). + * HP has not been read from P. + * NSP has not been saved in P. + * TEMP_RA should contain the current call's return address. + * %o1 should contain the current call's arity. + */ + st NSP, [P+P_NSP] + st TEMP_RA, [P+P_NRA] + st %o1, [P+P_NARITY] + /* find and prepare to invoke the handler */ + call hipe_handle_exception /* Note: hipe_handle_exception() conses */ + mov P, %o0 /* delay slot */ + /* prepare to invoke the handler */ + ld [P+P_NCALLEE], %o0 /* set by hipe_find_handler() */ + RESTORE_CACHED_STATE + /* now invoke the handler */ + jmp %o0 + nop + + /* + * A BIF failed with freason TRAP: + * - the BIF's arity is in %o1 + * - the native RA was saved in TEMP_RA before the BIF call + * - HP has not been read from P + * - NSP has not been saved in P + */ +.handle_trap: + mov HIPE_MODE_SWITCH_RES_TRAP, %o0 +.bif_exit: + /* restore C return address (hoisted to avoid stall) */ + ld [%sp+96], %i7 + st NSP, [P+P_NSP] + st %o1, [P+P_NARITY] + st TEMP_RA, [P+P_NRA] + jmp %i7+8 + restore %g0, %o0, %o0 + +/* + * nbif_stack_trap_ra: trap return address for maintaining + * the gray/white stack boundary + */ + .global nbif_stack_trap_ra +nbif_stack_trap_ra: /* a return address, not a function */ + nop /* ditto */ + nop /* ditto */ + /* This only handles a single return value. + If we have more, we need to save them in the PCB. */ + mov %o0, TEMP_ARG0 /* save retval */ + st NSP, [P+P_NSP] + call hipe_handle_stack_trap /* must not cons */ + mov P, %o0 /* delay slot */ + mov %o0, RA /* original RA */ + jmp RA+8 /* resume at original RA */ + mov TEMP_ARG0, %o0 /* delay slot: restore retval */ + +/* + * hipe_sparc_inc_stack + * Caller saved its RA in TEMP_RA (== TEMP1) before calling us. + */ + .global hipe_sparc_inc_stack +hipe_sparc_inc_stack: + STORE_ARG_REGS + mov RA, TEMP_ARG0 + st NSP, [P+P_NSP] + /* hipe_inc_nstack reads and writes NSP and NSP_LIMIT, + but does not access LR/RA, HP, or FCALLS. */ + call hipe_inc_nstack + mov P, %o0 /* delay slot */ + LOAD_ARG_REGS + /* this relies on LOAD_ARG_REGS not clobbering TEMP_ARG0 */ + jmp TEMP_ARG0+8 + ld [P+P_NSP], NSP /* delay slot */ + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/erts/emulator/hipe/hipe_sparc_glue.h b/erts/emulator/hipe/hipe_sparc_glue.h new file mode 100644 index 0000000000..3f881d2140 --- /dev/null +++ b/erts/emulator/hipe/hipe_sparc_glue.h @@ -0,0 +1,32 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2001-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + */ +#ifndef HIPE_SPARC_GLUE_H +#define HIPE_SPARC_GLUE_H + +#include "hipe_sparc_asm.h" /* for NR_ARG_REGS, SPARC_LEAF_WORDS */ +#define NR_LEAF_WORDS SPARC_LEAF_WORDS +#define HIPE_ARCH_CALL_TO_NATIVE hipe_sparc_call_to_native +#define HIPE_ARCH_RETURN_TO_NATIVE hipe_sparc_return_to_native +#define HIPE_ARCH_TAILCALL_TO_NATIVE hipe_sparc_tailcall_to_native +#define HIPE_ARCH_THROW_TO_NATIVE hipe_sparc_throw_to_native +#include "hipe_risc_glue.h" + +#endif /* HIPE_SPARC_GLUE_H */ diff --git a/erts/emulator/hipe/hipe_sparc_primops.h b/erts/emulator/hipe/hipe_sparc_primops.h new file mode 100644 index 0000000000..1fbb261c67 --- /dev/null +++ b/erts/emulator/hipe/hipe_sparc_primops.h @@ -0,0 +1,21 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2005-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + */ +PRIMOP_LIST(am_inc_stack_0, &hipe_sparc_inc_stack) diff --git a/erts/emulator/hipe/hipe_stack.c b/erts/emulator/hipe/hipe_stack.c new file mode 100644 index 0000000000..82f7f022b6 --- /dev/null +++ b/erts/emulator/hipe/hipe_stack.c @@ -0,0 +1,187 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2003-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#include "global.h" + +#include "hipe_stack.h" + +/* + * Native-code stack descriptor hash table. + * + * This uses a specialised version of BEAM's hash table code: + * - Hash table size is always a power of two. + * Permits replacing an expensive integer division operation + * with a cheap bitwise 'and' in the hash index calculation + * - Lookups assume the key is in the table. + * Permits removing NULL checks. + * - Switched order of the hash bucket next and hvalue fields. + * The hvalue field, which must always be checked, gets a zero + * structure offset, which is faster on some architectures; + * the next field is only referenced if hvalue didn't match. + * These changes yield a much more efficient lookup operation. + */ +struct hipe_sdesc_table hipe_sdesc_table; + +static struct sdesc **alloc_bucket(unsigned int size) +{ + unsigned long nbytes = size * sizeof(struct sdesc*); + struct sdesc **bucket = erts_alloc(ERTS_ALC_T_HIPE, nbytes); + sys_memzero(bucket, nbytes); + return bucket; +} + +static void hipe_grow_sdesc_table(void) +{ + unsigned int old_size, new_size, new_mask; + struct sdesc **old_bucket, **new_bucket; + unsigned int i; + + old_size = 1 << hipe_sdesc_table.log2size; + hipe_sdesc_table.log2size += 1; + new_size = 1 << hipe_sdesc_table.log2size; + new_mask = new_size - 1; + hipe_sdesc_table.mask = new_mask; + old_bucket = hipe_sdesc_table.bucket; + new_bucket = alloc_bucket(new_size); + hipe_sdesc_table.bucket = new_bucket; + for (i = 0; i < old_size; ++i) { + struct sdesc *b = old_bucket[i]; + while (b != NULL) { + struct sdesc *next = b->bucket.next; + unsigned int j = (b->bucket.hvalue >> HIPE_RA_LSR_COUNT) & new_mask; + b->bucket.next = new_bucket[j]; + new_bucket[j] = b; + b = next; + } + } + erts_free(ERTS_ALC_T_HIPE, old_bucket); +} + +struct sdesc *hipe_put_sdesc(struct sdesc *sdesc) +{ + unsigned long ra; + unsigned int i; + struct sdesc *chain; + unsigned int size; + + ra = sdesc->bucket.hvalue; + i = (ra >> HIPE_RA_LSR_COUNT) & hipe_sdesc_table.mask; + chain = hipe_sdesc_table.bucket[i]; + + for (; chain != NULL; chain = chain->bucket.next) + if (chain->bucket.hvalue == ra) + return chain; /* collision! (shouldn't happen) */ + + sdesc->bucket.next = hipe_sdesc_table.bucket[i]; + hipe_sdesc_table.bucket[i] = sdesc; + hipe_sdesc_table.used += 1; + size = 1 << hipe_sdesc_table.log2size; + if (hipe_sdesc_table.used > (4*size)/5) /* rehash at 80% */ + hipe_grow_sdesc_table(); + return sdesc; +} + +void hipe_init_sdesc_table(struct sdesc *sdesc) +{ + unsigned int log2size, size; + + log2size = 10; + size = 1 << log2size; + hipe_sdesc_table.log2size = log2size; + hipe_sdesc_table.mask = size - 1; + hipe_sdesc_table.used = 0; + hipe_sdesc_table.bucket = alloc_bucket(size); + + hipe_put_sdesc(sdesc); +} + +/* + * XXX: x86 and SPARC currently use the same stack descriptor + * representation. If different representations are needed in + * the future, this code has to be made target dependent. + */ +struct sdesc *hipe_decode_sdesc(Eterm arg) +{ + Uint ra, exnra; + Eterm *live; + Uint fsize, arity, nlive, i, nslots, off; + Uint livebitswords, sdescbytes; + void *p; + struct sdesc *sdesc; + + if (is_not_tuple(arg) || + (tuple_val(arg))[0] != make_arityval(5) || + term_to_Uint((tuple_val(arg))[1], &ra) == 0 || + term_to_Uint((tuple_val(arg))[2], &exnra) == 0 || + is_not_small((tuple_val(arg))[3]) || + (fsize = unsigned_val((tuple_val(arg))[3])) > 65535 || + is_not_small((tuple_val(arg))[4]) || + (arity = unsigned_val((tuple_val(arg))[4])) > 255 || + is_not_tuple((tuple_val(arg))[5])) + return 0; + /* Get tuple with live slots */ + live = tuple_val((tuple_val(arg))[5]) + 1; + /* Get number of live slots */ + nlive = arityval(live[-1]); + /* Calculate size of frame = locals + ra + arguments */ + nslots = fsize + 1 + arity; + /* Check that only valid slots are given. */ + for (i = 0; i < nlive; ++i) { + if (is_not_small(live[i]) || + (off = unsigned_val(live[i]), off >= nslots) || + off == fsize) + return 0; + } + + /* Calculate number of words for the live bitmap. */ + livebitswords = (fsize + arity + 1 + 31) / 32; + /* Calculate number of bytes needed for the stack descriptor. */ + sdescbytes = + (exnra + ? offsetof(struct sdesc_with_exnra, sdesc.livebits) + : offsetof(struct sdesc, livebits)) + + livebitswords * sizeof(int); + p = erts_alloc(ERTS_ALC_T_HIPE, sdescbytes); + /* If we have an exception handler use the + special sdesc_with_exnra structure. */ + if (exnra) { + struct sdesc_with_exnra *sdesc_we = p; + sdesc_we->exnra = exnra; + sdesc = &(sdesc_we->sdesc); + } else + sdesc = p; + + /* Initialise head of sdesc. */ + sdesc->bucket.next = 0; + sdesc->bucket.hvalue = ra; + sdesc->summary = (fsize << 9) | (exnra ? (1<<8) : 0) | arity; + /* Clear all live-bits */ + for (i = 0; i < livebitswords; ++i) + sdesc->livebits[i] = 0; + /* Set live-bits given by caller. */ + for (i = 0; i < nlive; ++i) { + off = unsigned_val(live[i]); + sdesc->livebits[off / 32] |= (1 << (off & 31)); + } + return sdesc; +} diff --git a/erts/emulator/hipe/hipe_stack.h b/erts/emulator/hipe/hipe_stack.h new file mode 100644 index 0000000000..354ac81b4c --- /dev/null +++ b/erts/emulator/hipe/hipe_stack.h @@ -0,0 +1,128 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2001-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + */ +#ifndef HIPE_STACK_H +#define HIPE_STACK_H + +#include "hipe_arch.h" + +/* + * Stack descriptors. + */ + +#include <stddef.h> /* offsetof() */ + +struct sdesc { + struct { + unsigned long hvalue; /* return address */ + struct sdesc *next; /* hash collision chain */ + } bucket; + unsigned int summary; /* frame size, exn handler presence flag, arity */ + unsigned int livebits[1]; /* size depends on arch & data in summary field */ +}; + +struct sdesc_with_exnra { + unsigned long exnra; + struct sdesc sdesc; +}; + +static __inline__ unsigned int sdesc_fsize(const struct sdesc *sdesc) +{ + return sdesc->summary >> 9; +} + +static __inline__ unsigned int sdesc_arity(const struct sdesc *sdesc) +{ + return sdesc->summary & 0xFF; +} + +static __inline__ unsigned long sdesc_exnra(const struct sdesc *sdesc) +{ + if ((sdesc->summary & (1<<8))) { + const char *tmp; + tmp = (const char*)sdesc - offsetof(struct sdesc_with_exnra, sdesc); + return ((const struct sdesc_with_exnra*)tmp)->exnra; + } + return 0; +} + +struct hipe_sdesc_table { + unsigned int log2size; + unsigned int mask; /* INV: mask == (1 << log2size)-1 */ + unsigned int used; + struct sdesc **bucket; +}; +extern struct hipe_sdesc_table hipe_sdesc_table; + +extern struct sdesc *hipe_put_sdesc(struct sdesc*); +extern void hipe_init_sdesc_table(struct sdesc*); +extern struct sdesc *hipe_decode_sdesc(Eterm); + +#if !defined(__GNUC__) || (__GNUC__ < 2) || (__GNUC__ == 2 && __GNUC_MINOR__ < 96) +#define __builtin_expect(x, expected_value) (x) +#endif +#define likely(x) __builtin_expect((x),1) +#define unlikely(x) __builtin_expect((x),0) + +static __inline__ const struct sdesc *hipe_find_sdesc(unsigned long ra) +{ + unsigned int i = (ra >> HIPE_RA_LSR_COUNT) & hipe_sdesc_table.mask; + const struct sdesc *sdesc = hipe_sdesc_table.bucket[i]; + if (likely(sdesc->bucket.hvalue == ra)) + return sdesc; + do { + sdesc = sdesc->bucket.next; + } while (sdesc->bucket.hvalue != ra); + return sdesc; +} + +AEXTERN(void,nbif_stack_trap_ra,(void)); + +extern void hipe_print_nstack(Process*); +extern void hipe_find_handler(Process*); +extern void (*hipe_handle_stack_trap(Process*))(void); +extern void hipe_update_stack_trap(Process*, const struct sdesc*); +extern int hipe_fill_stacktrace(Process*, int, Eterm**); + +#if 0 && defined(HIPE_NSTACK_GROWS_UP) +#define hipe_nstack_start(p) ((p)->hipe.nstack) +#define hipe_nstack_used(p) ((p)->hipe.nsp - (p)->hipe.nstack) +#endif +#if defined(HIPE_NSTACK_GROWS_DOWN) +#define hipe_nstack_start(p) ((p)->hipe.nsp) +#define hipe_nstack_used(p) ((p)->hipe.nstend - (p)->hipe.nsp) +#endif + +/* + * GC support procedures + */ +extern Eterm *fullsweep_nstack(Process *p, Eterm *n_htop); +extern void gensweep_nstack(Process *p, Eterm **ptr_old_htop, Eterm **ptr_n_htop); + +#ifdef HYBRID +#ifdef INCREMENTAL +extern Eterm *ma_fullsweep_nstack(Process *p, Eterm *n_htop, Eterm *n_hend); +#else +extern Eterm *ma_fullsweep_nstack(Process *p, Eterm *n_htop); +#endif +extern void ma_gensweep_nstack(Process *p, Eterm **ptr_old_htop, Eterm **ptr_n_htop); +#endif /* HYBRID */ + +#endif /* HIPE_STACK_H */ diff --git a/erts/emulator/hipe/hipe_x86.c b/erts/emulator/hipe/hipe_x86.c new file mode 100644 index 0000000000..f79a2d53f4 --- /dev/null +++ b/erts/emulator/hipe/hipe_x86.c @@ -0,0 +1,272 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2003-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + */ +#include <stddef.h> /* offsetof() */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#include "global.h" +#include <sys/mman.h> + +#include "hipe_arch.h" +#include "hipe_native_bif.h" /* nbif_callemu() */ + +#undef F_TIMO +#undef THE_NON_VALUE +#undef ERL_FUN_SIZE +#include "hipe_literals.h" + +void hipe_patch_load_fe(Uint32 *address, Uint32 value) +{ + /* address points to a disp32 or imm32 operand */ + *address = value; +} + +int hipe_patch_insn(void *address, Uint32 value, Eterm type) +{ + switch (type) { + case am_closure: + case am_constant: + case am_atom: + case am_c_const: + break; + case am_x86_abs_pcrel: + value += (Uint)address; + break; + default: + return -1; + } + *(Uint32*)address = value; + return 0; +} + +int hipe_patch_call(void *callAddress, void *destAddress, void *trampoline) +{ + Uint rel32; + + if (trampoline) + return -1; + rel32 = (Uint)destAddress - (Uint)callAddress - 4; + *(Uint32*)callAddress = rel32; + hipe_flush_icache_word(callAddress); + return 0; +} + +/* + * Memory allocator for executable code. + * + * This is required on x86 because some combinations + * of Linux kernels and CPU generations default to + * non-executable memory mappings, causing ordinary + * malloc() memory to be non-executable. + */ +static unsigned int code_bytes; +static char *code_next; + +#if 0 /* change to non-zero to get allocation statistics at exit() */ +static unsigned int total_mapped, nr_joins, nr_splits, total_alloc, nr_allocs, nr_large, total_lost; +static unsigned int atexit_done; + +static void alloc_code_stats(void) +{ + printf("\r\nalloc_code_stats: %u bytes mapped, %u joins, %u splits, %u bytes allocated, %u average alloc, %u large allocs, %u bytes lost\r\n", + total_mapped, nr_joins, nr_splits, total_alloc, nr_allocs ? total_alloc/nr_allocs : 0, nr_large, total_lost); +} + +static void atexit_alloc_code_stats(void) +{ + if (!atexit_done) { + atexit_done = 1; + (void)atexit(alloc_code_stats); + } +} + +#define ALLOC_CODE_STATS(X) do{X;}while(0) +#else +#define ALLOC_CODE_STATS(X) do{}while(0) +#endif + +/* FreeBSD 6.1 and Darwin breakage */ +#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) +#define MAP_ANONYMOUS MAP_ANON +#endif + +static void morecore(unsigned int alloc_bytes) +{ + unsigned int map_bytes; + char *map_hint, *map_start; + + /* Page-align the amount to allocate. */ + map_bytes = (alloc_bytes + 4095) & ~4095; + + /* Round up small allocations. */ + if (map_bytes < 1024*1024) + map_bytes = 1024*1024; + else + ALLOC_CODE_STATS(++nr_large); + + /* Create a new memory mapping, ensuring it is executable + and in the low 2GB of the address space. Also attempt + to make it adjacent to the previous mapping. */ + map_hint = code_next + code_bytes; + if ((unsigned long)map_hint & 4095) + abort(); + map_start = mmap(map_hint, map_bytes, + PROT_EXEC|PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS +#ifdef __x86_64__ + |MAP_32BIT +#endif + , + -1, 0); + if (map_start == MAP_FAILED) { + perror("mmap"); + abort(); + } + ALLOC_CODE_STATS(total_mapped += map_bytes); + + /* Merge adjacent mappings, so the trailing portion of the previous + mapping isn't lost. In practice this is quite successful. */ + if (map_start == map_hint) { + ALLOC_CODE_STATS(++nr_joins); + code_bytes += map_bytes; + } else { + ALLOC_CODE_STATS(++nr_splits); + ALLOC_CODE_STATS(total_lost += code_bytes); + code_next = map_start; + code_bytes = map_bytes; + } + + ALLOC_CODE_STATS(atexit_alloc_code_stats()); +} + +static void *alloc_code(unsigned int alloc_bytes) +{ + void *res; + + /* Align function entries. */ + alloc_bytes = (alloc_bytes + 3) & ~3; + + if (code_bytes < alloc_bytes) + morecore(alloc_bytes); + ALLOC_CODE_STATS(++nr_allocs); + ALLOC_CODE_STATS(total_alloc += alloc_bytes); + res = code_next; + code_next += alloc_bytes; + code_bytes -= alloc_bytes; + return res; +} + +void *hipe_alloc_code(Uint nrbytes, Eterm callees, Eterm *trampolines, Process *p) +{ + if (is_not_nil(callees)) + return NULL; + *trampolines = NIL; + return alloc_code(nrbytes); +} + +/* called from hipe_bif0.c:hipe_bifs_make_native_stub_2() + and hipe_bif0.c:hipe_make_stub() */ +void *hipe_make_native_stub(void *beamAddress, unsigned int beamArity) +{ + /* + * This creates a native code stub with the following contents: + * + * movl $Address, P_BEAM_IP(%ebp) + * movb $Arity, P_ARITY(%ebp) + * jmp callemu + * + * The stub has variable size, depending on whether the P_BEAM_IP + * and P_ARITY offsets fit in 8-bit signed displacements or not. + * The rel32 offset in the final jmp depends on its actual location, + * which also depends on the size of the previous instructions. + * Arity is stored with a movb because (a) Bj�rn tells me arities + * are <= 255, and (b) a movb is smaller and faster than a movl. + */ + unsigned int codeSize; + unsigned char *code, *codep; + unsigned int callEmuOffset; + + codeSize = /* 16, 19, or 22 bytes */ + 16 + /* 16 when both offsets are 8-bit */ + (P_BEAM_IP >= 128 ? 3 : 0) + + (P_ARITY >= 128 ? 3 : 0); + codep = code = alloc_code(codeSize); + + /* movl $beamAddress, P_BEAM_IP(%ebp); 3 or 6 bytes, plus 4 */ + codep[0] = 0xc7; +#if P_BEAM_IP >= 128 + codep[1] = 0x85; /* disp32[EBP] */ + codep[2] = P_BEAM_IP & 0xFF; + codep[3] = (P_BEAM_IP >> 8) & 0xFF; + codep[4] = (P_BEAM_IP >> 16) & 0xFF; + codep[5] = (P_BEAM_IP >> 24) & 0xFF; + codep += 6; +#else + codep[1] = 0x45; /* disp8[EBP] */ + codep[2] = P_BEAM_IP; + codep += 3; +#endif + codep[0] = ((unsigned int)beamAddress) & 0xFF; + codep[1] = ((unsigned int)beamAddress >> 8) & 0xFF; + codep[2] = ((unsigned int)beamAddress >> 16) & 0xFF; + codep[3] = ((unsigned int)beamAddress >> 24) & 0xFF; + codep += 4; + + /* movb $beamArity, P_ARITY(%ebp); 3 or 6 bytes */ + codep[0] = 0xc6; +#if P_ARITY >= 128 + codep[1] = 0x85; /* disp32[EBP] */ + codep[2] = P_ARITY & 0xFF; + codep[3] = (P_ARITY >> 8) & 0xFF; + codep[4] = (P_ARITY >> 16) & 0xFF; + codep[5] = (P_ARITY >> 24) & 0xFF; + codep += 6; +#else + codep[1] = 0x45; /* disp8[EBP] */ + codep[2] = P_ARITY; + codep += 3; +#endif + codep[0] = beamArity; + codep += 1; + + /* jmp callemu; 5 bytes */ + callEmuOffset = (unsigned char*)nbif_callemu - (code + codeSize); + codep[0] = 0xe9; + codep[1] = callEmuOffset & 0xFF; + codep[2] = (callEmuOffset >> 8) & 0xFF; + codep[3] = (callEmuOffset >> 16) & 0xFF; + codep[4] = (callEmuOffset >> 24) & 0xFF; + codep += 5; + ASSERT(codep == code + codeSize); + + /* I-cache flush? */ + + return code; +} + +void hipe_arch_print_pcb(struct hipe_process_state *p) +{ +#define U(n,x) \ + printf(" % 4d | %s | 0x%08x | |\r\n", offsetof(struct hipe_process_state,x), n, (unsigned)p->x) + U("ncsp ", ncsp); + U("narity ", narity); +#undef U +} diff --git a/erts/emulator/hipe/hipe_x86.h b/erts/emulator/hipe/hipe_x86.h new file mode 100644 index 0000000000..94ca39fc4f --- /dev/null +++ b/erts/emulator/hipe/hipe_x86.h @@ -0,0 +1,58 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2003-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + */ +#ifndef HIPE_X86_H +#define HIPE_X86_H + +static __inline__ void hipe_flush_icache_word(void *address) +{ + /* Do nothing. This works as long as compiled code is + executed by a single CPU thread. */ +} + +static __inline__ void +hipe_flush_icache_range(void *address, unsigned int nbytes) +{ + /* Do nothing. This works as long as compiled code is + executed by a single CPU thread. */ +} + +/* for stack descriptor hash lookup */ +#define HIPE_RA_LSR_COUNT 0 /* all bits are significant */ + +/* for hipe_bifs_{read,write}_{s,u}32 */ +static __inline__ int hipe_word32_address_ok(void *address) +{ + return 1; +} + +/* Native stack growth direction. */ +#define HIPE_NSTACK_GROWS_DOWN + +#define hipe_arch_name am_x86 + +extern void nbif_inc_stack_0(void); +extern void nbif_handle_fp_exception(void); + +/* for hipe_bifs_enter_code_2 */ +extern void *hipe_alloc_code(Uint nrbytes, Eterm callees, Eterm *trampolines, Process *p); +#define HIPE_ALLOC_CODE(n,c,t,p) hipe_alloc_code((n),(c),(t),(p)) + +#endif /* HIPE_X86_H */ diff --git a/erts/emulator/hipe/hipe_x86.tab b/erts/emulator/hipe/hipe_x86.tab new file mode 100644 index 0000000000..a38fe49156 --- /dev/null +++ b/erts/emulator/hipe/hipe_x86.tab @@ -0,0 +1,24 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 2004-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# +# $Id$ +# x86-specific atoms + +atom handle_fp_exception +atom inc_stack_0 +atom x86 diff --git a/erts/emulator/hipe/hipe_x86_abi.txt b/erts/emulator/hipe/hipe_x86_abi.txt new file mode 100644 index 0000000000..62a704eef3 --- /dev/null +++ b/erts/emulator/hipe/hipe_x86_abi.txt @@ -0,0 +1,128 @@ + + %CopyrightBegin% + + Copyright Ericsson AB 2001-2009. All Rights Reserved. + + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + %CopyrightEnd% + +$Id$ + +HiPE x86 ABI +============ +This document describes aspects of HiPE's runtime system +that are specific for the x86 (IA32) architecture. + +Register Usage +-------------- +%esp and %ebp are fixed and must be preserved by calls (callee-save). +%eax, %edx, %ecx, %ebx, %edi are clobbered by calls (caller-save). +%esi is a fixed global register (unallocatable). + +%esp is the native code stack pointer, growing towards lower addresses. +%ebp (aka P) is the current process' "Process*". +%esi (aka HP) is the current process' heap pointer. (If HP_IN_ESI is true.) + +The caller-save registers are used as temporary scratch registers +and for parameters in function calls. + +[XXX: Eventually, when we have better register allocation in place, +the current "Process*" may be put in %fs instead, which will make +%ebp available as a general-purpose register.] + +Calling Convention +------------------ +The first NR_ARG_REGS (a tunable parameter between 0 and 5, inclusive) +parameters are passed in %eax, %edx, %ecx, %ebx, and %edi. + +The first return value from a function is placed in %eax, the second +(if any) is placed in %edx. + +The callee returns by using the "ret $N" instruction, which also +deallocates the stacked actual parameters. + +Stack Frame Layout +------------------ +[From top to bottom: formals in left-to-right order, incoming return +address, fixed-size chunk for locals & spills, variable-size area +for actuals, outgoing return address. %esp normally points at the +bottom of the fixed-size chunk, except during a recursive call. +The callee pops the actuals, so no %esp adjustment at return.] + +Stack Descriptors +----------------- +sdesc_fsize() is the frame size excluding the return address word. + +Stacks and Unix Signal Handlers +------------------------------- +Each Erlang process has its own private native code stack. +This stack is managed by the compiler and the runtime system. +It is not guaranteed to have space for a Unix signal handler. +The Unix process MUST employ an "alternate signal stack" using +sigaltstack(), and all user-defined signal handlers MUST be +registered with sigaction() and the SA_ONSTACK flag. Failure +to observe these rules may lead to memory corruption errors. + + +Standard Unix x86 Calling Conventions +===================================== + +%eax, %edx, %ecx are clobbered by calls (caller-save) +%esp, %ebp, %ebx, %esi, %edi are preserved by calls (callee-save) +%eax and %edx receive function call return values +%esp is the stack pointer (fixed) +%ebp is optional frame pointer or local variable +actual parameters are pushed right-to-left +caller deallocates parameters after return (addl $N,%esp) + +Windows 32-bit C Calling Conventions +==================================== + +%esp, %ebp, %ebx, %esi, %edi are preserved by calls (callee-save) +%eax and %edx receive function call return values +Parameters not passed in registers are pushed right-to-left on the stack. + +Windows supports several calling conventions on x86 that differ +in whether caller or callee pops off stacked parameters, whether +any parameters are passed in registers, and how function names +are mangled. + +The __cdecl convention +---------------------- +Default for C and C++ application code. +No parameters are passed in registers. +Caller deallocates parameters after return (addl $N, %esp). +A function name is prefixed by a "_". + +The __stdcall convention +------------------------ +Used for calling Win32 API functions. +No parameters are passed in registers. +Callee deallocates parameters during return (ret $N). +A function name is prefixed by a "_" and suffixed by "@" and the +number of bytes of stack space the parameters use in decimal. +Prototypes are required. Varargs functions are converted to __cdecl. + +The __fastcall convention +------------------------ +The first two parameters are passed in %ecx and %edx. +Callee deallocates stacked parameters during return (ret $N). +A function name is prefixed by a "@" and suffixed by "@" and the +number of bytes of stack space the parameters use in decimal. + +The __thiscall convention +------------------------- +Used for C++ member functions. +Similar to __cdecl except for the implicit 'this' parameter +which is passed in %ecx rather than being pushed on the stack. +No name mangling occurs. diff --git a/erts/emulator/hipe/hipe_x86_asm.m4 b/erts/emulator/hipe/hipe_x86_asm.m4 new file mode 100644 index 0000000000..4c1d612ccd --- /dev/null +++ b/erts/emulator/hipe/hipe_x86_asm.m4 @@ -0,0 +1,286 @@ +changecom(`/*', `*/')dnl +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2002-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* + * $Id$ + */ +`#ifndef HIPE_X86_ASM_H +#define HIPE_X86_ASM_H' + +/* + * Tunables. + */ +define(LEAF_WORDS,24)dnl number of stack words for leaf functions +define(NR_ARG_REGS,3)dnl admissible values are 0 to 5, inclusive +define(HP_IN_ESI,1)dnl change to 0 to not reserve a global register for HP +define(SIMULATE_NSP,0)dnl change to 1 to simulate call/ret insns + +`#define X86_LEAF_WORDS 'LEAF_WORDS +`#define LEAF_WORDS 'LEAF_WORDS + +/* + * Workarounds for Darwin. + */ +ifelse(OPSYS,darwin,`` +/* Darwin */ +#define TEXT .text +#define JOIN(X,Y) X##Y +#define CSYM(NAME) JOIN(_,NAME) +#define ASYM(NAME) CSYM(NAME) +#define GLOBAL(NAME) .globl NAME +#define SET_SIZE(NAME) /*empty*/ +#define TYPE_FUNCTION(NAME) /*empty*/ +'',`` +/* Not Darwin */ +#define TEXT .section ".text" +#define CSYM(NAME) NAME +#define ASYM(NAME) NAME +#define GLOBAL(NAME) .global NAME +#define SET_SIZE(NAME) .size NAME,.-NAME +#define TYPE_FUNCTION(NAME) .type NAME,@function +'')dnl + +/* + * Reserved registers. + */ +`#define P %ebp' + +`#define X86_HP_IN_ESI 'HP_IN_ESI +`#if X86_HP_IN_ESI +#define SAVE_HP movl %esi, P_HP(P) +#define RESTORE_HP movl P_HP(P), %esi +#else +#define SAVE_HP /*empty*/ +#define RESTORE_HP /*empty*/ +#endif' + +`#define NSP %esp +#define SAVE_CSP movl %esp, P_CSP(P) +#define RESTORE_CSP movl P_CSP(P), %esp' + +`#define X86_SIMULATE_NSP 'SIMULATE_NSP + +/* + * Context switching macros. + */ +`#define SWITCH_C_TO_ERLANG_QUICK \ + SAVE_CSP; \ + movl P_NSP(P), NSP' + +`#define SWITCH_ERLANG_TO_C_QUICK \ + movl NSP, P_NSP(P); \ + RESTORE_CSP' + +`#define SAVE_CACHED_STATE \ + SAVE_HP' + +`#define RESTORE_CACHED_STATE \ + RESTORE_HP' + +`#define SWITCH_C_TO_ERLANG \ + RESTORE_CACHED_STATE; \ + SWITCH_C_TO_ERLANG_QUICK' + +`#define SWITCH_ERLANG_TO_C \ + SAVE_CACHED_STATE; \ + SWITCH_ERLANG_TO_C_QUICK' + +/* + * Argument (parameter) registers. + */ +`#define X86_NR_ARG_REGS 'NR_ARG_REGS +`#define NR_ARG_REGS 'NR_ARG_REGS + +ifelse(eval(NR_ARG_REGS >= 1),0,, +``#define ARG0 %eax +'')dnl +ifelse(eval(NR_ARG_REGS >= 2),0,, +``#define ARG1 %edx +'')dnl +ifelse(eval(NR_ARG_REGS >= 3),0,, +``#define ARG2 %ecx +'')dnl +ifelse(eval(NR_ARG_REGS >= 4),0,, +``#define ARG3 %ebx +'')dnl +ifelse(eval(NR_ARG_REGS >= 5),0,, +``#define ARG4 %edi +'')dnl + +/* + * TEMP_RV: + * Used in nbif_stack_trap_ra to preserve the return value. + * Must be a C callee-save register. + * Must be otherwise unused in the return path. + */ +`#define TEMP_RV %ebx' + +/* + * TEMP_NSP: + * Used in BIF wrappers to permit copying stacked parameter from + * the native stack to the C stack. + * Set up by NBIF_COPY_NSP(arity) and used by NBIF_ARG(arity,argno). + * TEMP_NSP may alias the last BIF argument register. + * NBIF_COPY_NSP and NBIF_ARG currently fail if ARITY > NR_ARG_REGS! + */ +`#define TEMP_NSP %edi' + +dnl XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +dnl X X +dnl X hipe_x86_glue.S support X +dnl X X +dnl XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX + +dnl +dnl LOAD_ARG_REGS +dnl +define(LAR_1,`movl P_ARG$1(P), ARG$1 ; ')dnl +define(LAR_N,`ifelse(eval($1 >= 0),0,,`LAR_N(eval($1-1))LAR_1($1)')')dnl +define(LOAD_ARG_REGS,`LAR_N(eval(NR_ARG_REGS-1))')dnl +`#define LOAD_ARG_REGS 'LOAD_ARG_REGS + +dnl +dnl STORE_ARG_REGS +dnl +define(SAR_1,`movl ARG$1, P_ARG$1(P) ; ')dnl +define(SAR_N,`ifelse(eval($1 >= 0),0,,`SAR_N(eval($1-1))SAR_1($1)')')dnl +define(STORE_ARG_REGS,`SAR_N(eval(NR_ARG_REGS-1))')dnl +`#define STORE_ARG_REGS 'STORE_ARG_REGS + +dnl +dnl NSP_CALL(FUN) +dnl Emit a CALL FUN instruction, or simulate it. +dnl FUN must not be an NSP-based memory operand. +dnl +ifelse(eval(SIMULATE_NSP),0, +``#define NSP_CALL(FUN) call FUN'', +``#define NSP_CALL(FUN) subl $4,NSP; movl $1f,(NSP); jmp FUN; 1:'')dnl + +dnl +dnl NSP_RETN(NPOP) +dnl Emit a RET $NPOP instruction, or simulate it. +dnl NPOP should be non-zero. +dnl +ifelse(eval(SIMULATE_NSP),0, +``#define NSP_RETN(NPOP) ret $NPOP'', +``#define NSP_RETN(NPOP) movl (NSP),TEMP_RV; addl $4+NPOP,NSP; jmp *TEMP_RV'')dnl + +dnl +dnl NSP_RET0 +dnl Emit a RET instruction, or simulate it. +dnl +ifelse(eval(SIMULATE_NSP),0, +``#define NSP_RET0 ret'', +``#define NSP_RET0 movl (NSP),TEMP_RV; addl $4,NSP; jmp *TEMP_RV'')dnl + +dnl XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +dnl X X +dnl X hipe_x86_bifs.m4 support X +dnl X X +dnl XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX + +dnl +dnl NBIF_COPY_NSP(ARITY) +dnl if ARITY > NR_ARG_REGS then TEMP_NSP := %esp. +dnl Allows the stacked formals to be referenced via TEMP_NSP after the stack switch. +dnl +define(NBIF_COPY_NSP,`ifelse(eval($1 > NR_ARG_REGS),0,,`movl %esp, TEMP_NSP')')dnl +`/* #define NBIF_COPY_NSP_0 'NBIF_COPY_NSP(0)` */' +`/* #define NBIF_COPY_NSP_1 'NBIF_COPY_NSP(1)` */' +`/* #define NBIF_COPY_NSP_2 'NBIF_COPY_NSP(2)` */' +`/* #define NBIF_COPY_NSP_3 'NBIF_COPY_NSP(3)` */' +`/* #define NBIF_COPY_NSP_5 'NBIF_COPY_NSP(5)` */' + +dnl +dnl BASE_OFFSET(N) +dnl Generates a base-register offset operand for the value N. +dnl When N is zero the offset becomes the empty string, as this +dnl may allow the assembler to choose a more compat encoding. +dnl +define(BASE_OFFSET,`ifelse(eval($1),0,`',`$1')')dnl + +dnl +dnl NBIF_ARG_OPND(ARITY,ARGNO) +dnl Generates an operand for this formal parameter. +dnl It will be a register operand when 0 <= ARGNO < NR_ARG_REGS. +dnl It will be a memory operand via TEMP_NSP when ARGNO >= NR_ARG_REGS. +dnl +define(NBIF_ARG_OPND,`ifelse(eval($2 >= NR_ARG_REGS),0,`ARG'$2,BASE_OFFSET(eval(($1-NR_ARG_REGS)*4-($2-NR_ARG_REGS)*4))`(TEMP_NSP)')')dnl +`/* #define NBIF_ARG_OPND_1_0 'NBIF_ARG_OPND(1,0)` */' +`/* #define NBIF_ARG_OPND_2_0 'NBIF_ARG_OPND(2,0)` */' +`/* #define NBIF_ARG_OPND_2_1 'NBIF_ARG_OPND(2,1)` */' +`/* #define NBIF_ARG_OPND_3_0 'NBIF_ARG_OPND(3,0)` */' +`/* #define NBIF_ARG_OPND_3_1 'NBIF_ARG_OPND(3,1)` */' +`/* #define NBIF_ARG_OPND_3_2 'NBIF_ARG_OPND(3,2)` */' +`/* #define NBIF_ARG_OPND_5_0 'NBIF_ARG_OPND(5,0)` */' +`/* #define NBIF_ARG_OPND_5_1 'NBIF_ARG_OPND(5,1)` */' +`/* #define NBIF_ARG_OPND_5_2 'NBIF_ARG_OPND(5,2)` */' +`/* #define NBIF_ARG_OPND_5_3 'NBIF_ARG_OPND(5,3)` */' +`/* #define NBIF_ARG_OPND_5_4 'NBIF_ARG_OPND(5,4)` */' + +dnl +dnl NBIF_ARG_REG(CARGNO,REG) +dnl Generates code to move REG to C argument number CARGNO. +dnl +define(NBIF_ARG_REG,`movl $2,BASE_OFFSET(eval(4*$1))(%esp)')dnl +`/* #define NBIF_ARG_REG_0_P 'NBIF_ARG_REG(0,P)` */' + +dnl +dnl NBIF_ARG(CARGNO,ARITY,ARGNO) +dnl Generates code to move Erlang parameter number ARGNO +dnl in a BIF of arity ARITY to C parameter number CARGNO. +dnl +dnl This must be called after NBIF_COPY_NSP(ARITY). +dnl +dnl NBIF_ARG(_,_,ARGNO2) must be called after NBIF_ARG(_,_,ARGNO1) +dnl if ARGNO2 > ARGNO1. (ARG0 may be reused as a temporary register +dnl for Erlang parameters passed on the stack.) +dnl +define(NBIF_ARG_MEM,`movl NBIF_ARG_OPND($2,$3),%eax; NBIF_ARG_REG($1,%eax)')dnl +define(NBIF_ARG,`ifelse(eval($3 >= NR_ARG_REGS),0,`NBIF_ARG_REG($1,`ARG'$3)',`NBIF_ARG_MEM($1,$2,$3)')')dnl + +dnl +dnl NBIF_RET(ARITY) +dnl Generates a return from a native BIF, taking care to pop +dnl any stacked formal parameters. +dnl +define(RET_POP,`ifelse(eval($1 > NR_ARG_REGS),0,0,eval(4*($1 - NR_ARG_REGS)))')dnl +define(NBIF_RET_N,`ifelse(eval($1),0,`NSP_RET0',`NSP_RETN($1)')')dnl +define(NBIF_RET,`NBIF_RET_N(eval(RET_POP($1)))')dnl +`/* #define NBIF_RET_0 'NBIF_RET(0)` */' +`/* #define NBIF_RET_1 'NBIF_RET(1)` */' +`/* #define NBIF_RET_2 'NBIF_RET(2)` */' +`/* #define NBIF_RET_3 'NBIF_RET(3)` */' +`/* #define NBIF_RET_5 'NBIF_RET(5)` */' + +dnl +dnl STORE_CALLER_SAVE +dnl LOAD_CALLER_SAVE +dnl Used to save and restore C caller-save argument registers around +dnl calls to hipe_inc_nstack. The first 3 arguments registers are C +dnl caller-save, remaining ones are C callee-save. +dnl +define(NBIF_MIN,`ifelse(eval($1 > $2),0,$1,$2)')dnl +define(NR_CALLER_SAVE,NBIF_MIN(NR_ARG_REGS,3))dnl +define(STORE_CALLER_SAVE,`SAR_N(eval(NR_CALLER_SAVE-1))')dnl +define(LOAD_CALLER_SAVE,`LAR_N(eval(NR_CALLER_SAVE-1))')dnl +`#define STORE_CALLER_SAVE 'STORE_CALLER_SAVE +`#define LOAD_CALLER_SAVE 'LOAD_CALLER_SAVE + +`#endif /* HIPE_X86_ASM_H */' diff --git a/erts/emulator/hipe/hipe_x86_bifs.m4 b/erts/emulator/hipe/hipe_x86_bifs.m4 new file mode 100644 index 0000000000..80be74f7b2 --- /dev/null +++ b/erts/emulator/hipe/hipe_x86_bifs.m4 @@ -0,0 +1,635 @@ +changecom(`/*', `*/')dnl +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2001-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* + * $Id$ + */ + +include(`hipe/hipe_x86_asm.m4') +#`include' "hipe_literals.h" + +`#if THE_NON_VALUE == 0 +#define TEST_GOT_EXN testl %eax,%eax +#else +#define TEST_GOT_EXN cmpl $THE_NON_VALUE,%eax +#endif' + +`#define TEST_GOT_MBUF movl P_MBUF(P), %edx; testl %edx, %edx; jnz 3f; 2: +#define JOIN3(A,B,C) A##B##C +#define HANDLE_GOT_MBUF(ARITY) 3: call JOIN3(nbif_,ARITY,_gc_after_bif); jmp 2b' + +/* + * standard_bif_interface_1(nbif_name, cbif_name) + * standard_bif_interface_2(nbif_name, cbif_name) + * standard_bif_interface_3(nbif_name, cbif_name) + * + * Generate native interface for a BIF with 1-3 parameters and + * standard failure mode. + */ +define(standard_bif_interface_1, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + TEXT + .align 4 + GLOBAL(ASYM($1)) +ASYM($1): + /* copy native stack pointer */ + NBIF_COPY_NSP(1) + + /* switch to C stack */ + SWITCH_ERLANG_TO_C + + /* make the call on the C stack */ + NBIF_ARG_REG(0,P) + NBIF_ARG(1,1,0) + call CSYM($2) + TEST_GOT_MBUF + + /* switch to native stack */ + SWITCH_C_TO_ERLANG + + /* throw exception if failure, otherwise return */ + TEST_GOT_EXN + jz nbif_1_simple_exception + NBIF_RET(1) + HANDLE_GOT_MBUF(1) + SET_SIZE(ASYM($1)) + TYPE_FUNCTION(ASYM($1)) +#endif') + +define(standard_bif_interface_2, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + TEXT + .align 4 + GLOBAL(ASYM($1)) +ASYM($1): + /* copy native stack pointer */ + NBIF_COPY_NSP(2) + + /* switch to C stack */ + SWITCH_ERLANG_TO_C + + /* make the call on the C stack */ + NBIF_ARG_REG(0,P) + NBIF_ARG(1,2,0) + NBIF_ARG(2,2,1) + call CSYM($2) + TEST_GOT_MBUF + + /* switch to native stack */ + SWITCH_C_TO_ERLANG + + /* throw exception if failure, otherwise return */ + TEST_GOT_EXN + jz nbif_2_simple_exception + NBIF_RET(2) + HANDLE_GOT_MBUF(2) + SET_SIZE(ASYM($1)) + TYPE_FUNCTION(ASYM($1)) +#endif') + +define(standard_bif_interface_3, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + TEXT + .align 4 + GLOBAL(ASYM($1)) +ASYM($1): + /* copy native stack pointer */ + NBIF_COPY_NSP(3) + + /* switch to C stack */ + SWITCH_ERLANG_TO_C + + /* make the call on the C stack */ + NBIF_ARG_REG(0,P) + NBIF_ARG(1,3,0) + NBIF_ARG(2,3,1) + NBIF_ARG(3,3,2) + call CSYM($2) + TEST_GOT_MBUF + + /* switch to native stack */ + SWITCH_C_TO_ERLANG + + /* throw exception if failure, otherwise return */ + TEST_GOT_EXN + jz nbif_3_simple_exception + NBIF_RET(3) + HANDLE_GOT_MBUF(3) + SET_SIZE(ASYM($1)) + TYPE_FUNCTION(ASYM($1)) +#endif') + +/* + * fail_bif_interface_0(nbif_name, cbif_name) + * + * Generate native interface for a BIF with 0 parameters and + * standard failure mode. + */ +define(fail_bif_interface_0, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + TEXT + .align 4 + GLOBAL(ASYM($1)) +ASYM($1): + /* switch to C stack */ + SWITCH_ERLANG_TO_C + + /* make the call on the C stack */ + NBIF_ARG_REG(0,P) + call CSYM($2) + TEST_GOT_MBUF + + /* switch to native stack */ + SWITCH_C_TO_ERLANG + + /* throw exception if failure, otherwise return */ + TEST_GOT_EXN + jz nbif_0_simple_exception + NBIF_RET(0) + HANDLE_GOT_MBUF(0) + SET_SIZE(ASYM($1)) + TYPE_FUNCTION(ASYM($1)) +#endif') + +/* + * nofail_primop_interface_0(nbif_name, cbif_name) + * nofail_primop_interface_1(nbif_name, cbif_name) + * nofail_primop_interface_2(nbif_name, cbif_name) + * nofail_primop_interface_3(nbif_name, cbif_name) + * + * Generate native interface for a primop with implicit P + * parameter, 0-3 ordinary parameters and no failure mode. + * Also used for guard BIFs. + */ +define(nofail_primop_interface_0, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + TEXT + .align 4 + GLOBAL(ASYM($1)) +ASYM($1): + /* switch to C stack */ + SWITCH_ERLANG_TO_C + + /* make the call on the C stack */ + NBIF_ARG_REG(0,P) + call CSYM($2) + TEST_GOT_MBUF + + /* switch to native stack */ + SWITCH_C_TO_ERLANG + + /* return */ + NBIF_RET(0) + HANDLE_GOT_MBUF(0) + SET_SIZE(ASYM($1)) + TYPE_FUNCTION(ASYM($1)) +#endif') + +define(nofail_primop_interface_1, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + TEXT + .align 4 + GLOBAL(ASYM($1)) +ASYM($1): + /* copy native stack pointer */ + NBIF_COPY_NSP(1) + + /* switch to C stack */ + SWITCH_ERLANG_TO_C + + /* make the call on the C stack */ + NBIF_ARG_REG(0,P) + NBIF_ARG(1,1,0) + call CSYM($2) + TEST_GOT_MBUF + + /* switch to native stack */ + SWITCH_C_TO_ERLANG + + /* return */ + NBIF_RET(1) + HANDLE_GOT_MBUF(1) + SET_SIZE(ASYM($1)) + TYPE_FUNCTION(ASYM($1)) +#endif') + +define(nofail_primop_interface_2, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + TEXT + .align 4 + GLOBAL(ASYM($1)) +ASYM($1): + /* copy native stack pointer */ + NBIF_COPY_NSP(2) + + /* switch to C stack */ + SWITCH_ERLANG_TO_C + + /* make the call on the C stack */ + NBIF_ARG_REG(0,P) + NBIF_ARG(1,2,0) + NBIF_ARG(2,2,1) + call CSYM($2) + TEST_GOT_MBUF + + /* switch to native stack */ + SWITCH_C_TO_ERLANG + + /* return */ + NBIF_RET(2) + HANDLE_GOT_MBUF(2) + SET_SIZE(ASYM($1)) + TYPE_FUNCTION(ASYM($1)) +#endif') + +define(nofail_primop_interface_3, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + TEXT + .align 4 + GLOBAL(ASYM($1)) +ASYM($1): + /* copy native stack pointer */ + NBIF_COPY_NSP(3) + + /* switch to C stack */ + SWITCH_ERLANG_TO_C + + /* make the call on the C stack */ + NBIF_ARG_REG(0,P) + NBIF_ARG(1,3,0) + NBIF_ARG(2,3,1) + NBIF_ARG(3,3,2) + call CSYM($2) + TEST_GOT_MBUF + + /* switch to native stack */ + SWITCH_C_TO_ERLANG + + /* return */ + NBIF_RET(3) + HANDLE_GOT_MBUF(3) + SET_SIZE(ASYM($1)) + TYPE_FUNCTION(ASYM($1)) +#endif') + +/* + * nocons_nofail_primop_interface_0(nbif_name, cbif_name) + * nocons_nofail_primop_interface_1(nbif_name, cbif_name) + * nocons_nofail_primop_interface_2(nbif_name, cbif_name) + * nocons_nofail_primop_interface_3(nbif_name, cbif_name) + * nocons_nofail_primop_interface_5(nbif_name, cbif_name) + * + * Generate native interface for a primop with implicit P + * parameter, 0-3 or 5 ordinary parameters, and no failure mode. + * The primop cannot CONS or gc. + */ +define(nocons_nofail_primop_interface_0, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + TEXT + .align 4 + GLOBAL(ASYM($1)) +ASYM($1): + /* switch to C stack */ + SWITCH_ERLANG_TO_C_QUICK + + /* make the call on the C stack */ + NBIF_ARG_REG(0,P) + call CSYM($2) + + /* switch to native stack */ + SWITCH_C_TO_ERLANG_QUICK + + /* return */ + NBIF_RET(0) + SET_SIZE(ASYM($1)) + TYPE_FUNCTION(ASYM($1)) +#endif') + +define(nocons_nofail_primop_interface_1, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + TEXT + .align 4 + GLOBAL(ASYM($1)) +ASYM($1): + /* copy native stack pointer */ + NBIF_COPY_NSP(1) + + /* switch to C stack */ + SWITCH_ERLANG_TO_C_QUICK + + /* make the call on the C stack */ + NBIF_ARG_REG(0,P) + NBIF_ARG(1,1,0) + call CSYM($2) + + /* switch to native stack */ + SWITCH_C_TO_ERLANG_QUICK + + /* return */ + NBIF_RET(1) + SET_SIZE(ASYM($1)) + TYPE_FUNCTION(ASYM($1)) +#endif') + +define(nocons_nofail_primop_interface_2, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + TEXT + .align 4 + GLOBAL(ASYM($1)) +ASYM($1): + /* copy native stack pointer */ + NBIF_COPY_NSP(2) + + /* switch to C stack */ + SWITCH_ERLANG_TO_C_QUICK + + /* make the call on the C stack */ + NBIF_ARG_REG(0,P) + NBIF_ARG(1,2,0) + NBIF_ARG(2,2,1) + call CSYM($2) + + /* switch to native stack */ + SWITCH_C_TO_ERLANG_QUICK + + /* return */ + NBIF_RET(2) + SET_SIZE(ASYM($1)) + TYPE_FUNCTION(ASYM($1)) +#endif') + +define(nocons_nofail_primop_interface_3, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + TEXT + .align 4 + GLOBAL(ASYM($1)) +ASYM($1): + /* copy native stack pointer */ + NBIF_COPY_NSP(3) + + /* switch to C stack */ + SWITCH_ERLANG_TO_C_QUICK + + /* make the call on the C stack */ + NBIF_ARG_REG(0,P) + NBIF_ARG(1,3,0) + NBIF_ARG(2,3,1) + NBIF_ARG(3,3,2) + call CSYM($2) + + /* switch to native stack */ + SWITCH_C_TO_ERLANG_QUICK + + /* return */ + NBIF_RET(3) + SET_SIZE(ASYM($1)) + TYPE_FUNCTION(ASYM($1)) +#endif') + +define(nocons_nofail_primop_interface_5, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + TEXT + .align 4 + GLOBAL(ASYM($1)) +ASYM($1): + /* copy native stack pointer */ + NBIF_COPY_NSP(5) + + /* switch to C stack */ + SWITCH_ERLANG_TO_C_QUICK + + /* make the call on the C stack */ + NBIF_ARG_REG(0,P) + NBIF_ARG(1,5,0) + NBIF_ARG(2,5,1) + NBIF_ARG(3,5,2) + NBIF_ARG(4,5,3) + NBIF_ARG(5,5,4) + call CSYM($2) + + /* switch to native stack */ + SWITCH_C_TO_ERLANG_QUICK + + /* return */ + NBIF_RET(5) + SET_SIZE(ASYM($1)) + TYPE_FUNCTION(ASYM($1)) +#endif') + +/* + * noproc_primop_interface_0(nbif_name, cbif_name) + * noproc_primop_interface_1(nbif_name, cbif_name) + * noproc_primop_interface_2(nbif_name, cbif_name) + * noproc_primop_interface_3(nbif_name, cbif_name) + * noproc_primop_interface_5(nbif_name, cbif_name) + * + * Generate native interface for a primop with no implicit P + * parameter, 0-3 or 5 ordinary parameters, and no failure mode. + * The primop cannot CONS or gc. + */ +define(noproc_primop_interface_0, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + TEXT + .align 4 + GLOBAL(ASYM($1)) +ASYM($1): + /* switch to C stack */ + SWITCH_ERLANG_TO_C_QUICK + + /* make the call on the C stack */ + call CSYM($2) + + /* switch to native stack */ + SWITCH_C_TO_ERLANG_QUICK + + /* return */ + NBIF_RET(0) + SET_SIZE(ASYM($1)) + TYPE_FUNCTION(ASYM($1)) +#endif') + +define(noproc_primop_interface_1, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + TEXT + .align 4 + GLOBAL(ASYM($1)) +ASYM($1): + /* copy native stack pointer */ + NBIF_COPY_NSP(1) + + /* switch to C stack */ + SWITCH_ERLANG_TO_C_QUICK + + /* make the call on the C stack */ + NBIF_ARG(0,1,0) + call CSYM($2) + + /* switch to native stack */ + SWITCH_C_TO_ERLANG_QUICK + + /* return */ + NBIF_RET(1) + SET_SIZE(ASYM($1)) + TYPE_FUNCTION(ASYM($1)) +#endif') + +define(noproc_primop_interface_2, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + TEXT + .align 4 + GLOBAL(ASYM($1)) +ASYM($1): + /* copy native stack pointer */ + NBIF_COPY_NSP(2) + + /* switch to C stack */ + SWITCH_ERLANG_TO_C_QUICK + + /* make the call on the C stack */ + NBIF_ARG(0,2,0) + NBIF_ARG(1,2,1) + call CSYM($2) + + /* switch to native stack */ + SWITCH_C_TO_ERLANG_QUICK + + /* return */ + NBIF_RET(2) + SET_SIZE(ASYM($1)) + TYPE_FUNCTION(ASYM($1)) +#endif') + +define(noproc_primop_interface_3, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + TEXT + .align 4 + GLOBAL(ASYM($1)) +ASYM($1): + /* copy native stack pointer */ + NBIF_COPY_NSP(3) + + /* switch to C stack */ + SWITCH_ERLANG_TO_C_QUICK + + /* make the call on the C stack */ + NBIF_ARG(0,3,0) + NBIF_ARG(1,3,1) + NBIF_ARG(2,3,2) + call CSYM($2) + + /* switch to native stack */ + SWITCH_C_TO_ERLANG_QUICK + + /* return */ + NBIF_RET(3) + SET_SIZE(ASYM($1)) + TYPE_FUNCTION(ASYM($1)) +#endif') + +define(noproc_primop_interface_5, +` +#ifndef HAVE_$1 +#`define' HAVE_$1 + TEXT + .align 4 + GLOBAL(ASYM($1)) +ASYM($1): + /* copy native stack pointer */ + NBIF_COPY_NSP(5) + + /* switch to C stack */ + SWITCH_ERLANG_TO_C_QUICK + + /* make the call on the C stack */ + NBIF_ARG(0,5,0) + NBIF_ARG(1,5,1) + NBIF_ARG(2,5,2) + NBIF_ARG(3,5,3) + NBIF_ARG(4,5,4) + call CSYM($2) + + /* switch to native stack */ + SWITCH_C_TO_ERLANG_QUICK + + /* return */ + NBIF_RET(5) + SET_SIZE(ASYM($1)) + TYPE_FUNCTION(ASYM($1)) +#endif') + +/* + * x86-specific primops. + */ +noproc_primop_interface_0(nbif_handle_fp_exception, erts_restore_fpu) + +/* + * Implement gc_bif_interface_0 as nofail_primop_interface_0. + */ +define(gc_bif_interface_0,`nofail_primop_interface_0($1, $2)') + +/* + * Implement gc_bif_interface_N as standard_bif_interface_N (N=1,2). + */ +define(gc_bif_interface_1,`standard_bif_interface_1($1, $2)') +define(gc_bif_interface_2,`standard_bif_interface_2($1, $2)') + +/* + * Implement gc_nofail_primop_interface_1 as nofail_primop_interface_1. + */ +define(gc_nofail_primop_interface_1,`nofail_primop_interface_1($1, $2)') + +include(`hipe/hipe_bif_list.m4') + +`#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif' diff --git a/erts/emulator/hipe/hipe_x86_gc.h b/erts/emulator/hipe/hipe_x86_gc.h new file mode 100644 index 0000000000..4f17f767df --- /dev/null +++ b/erts/emulator/hipe/hipe_x86_gc.h @@ -0,0 +1,138 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2004-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + * Stack walking helpers for native stack GC procedures. + */ +#ifndef HIPE_X86_GC_H +#define HIPE_X86_GC_H + +#include "hipe_x86_asm.h" /* for NR_ARG_REGS */ + +/* uncomment to simulate & test what the initial PowerPC port will do */ +//#define SKIP_YOUNGEST_FRAME + +struct nstack_walk_state { +#ifdef SKIP_YOUNGEST_FRAME + const struct sdesc *sdesc0; /* .sdesc0 must be a pointer rvalue */ +#else + struct sdesc sdesc0[1]; /* .sdesc0 must be a pointer rvalue */ +#endif +}; + +static inline int nstack_walk_init_check(const Process *p) +{ +#ifdef SKIP_YOUNGEST_FRAME + if (!p->hipe.nsp || p->hipe.nsp == p->hipe.nstend) + return 0; +#endif + return 1; +} + +static inline Eterm *nstack_walk_nsp_begin(const Process *p) +{ +#ifdef SKIP_YOUNGEST_FRAME + unsigned int nstkarity = p->hipe.narity - NR_ARG_REGS; + if ((int)nstkarity < 0) + nstkarity = 0; + return p->hipe.nsp + 1 + nstkarity; +#else + return p->hipe.nsp; +#endif +} + +static inline const struct sdesc* +nstack_walk_init_sdesc(const Process *p, struct nstack_walk_state *state) +{ +#ifdef SKIP_YOUNGEST_FRAME + const struct sdesc *sdesc = hipe_find_sdesc(p->hipe.nsp[0]); + state->sdesc0 = sdesc; + return sdesc; +#else + unsigned int nstkarity = p->hipe.narity - NR_ARG_REGS; + if ((int)nstkarity < 0) + nstkarity = 0; + state->sdesc0[0].summary = (0 << 9) | (0 << 8) | nstkarity; + state->sdesc0[0].livebits[0] = 0; + /* XXX: this appears to prevent a gcc-4.1.1 bug on x86 */ + __asm__ __volatile__("" : : "m"(*state) : "memory"); + return &state->sdesc0[0]; +#endif +} + +static inline void nstack_walk_update_trap(Process *p, const struct sdesc *sdesc0) +{ +#ifdef SKIP_YOUNGEST_FRAME + Eterm *nsp = p->hipe.nsp; + p->hipe.nsp = nstack_walk_nsp_begin(p); + hipe_update_stack_trap(p, sdesc0); + p->hipe.nsp = nsp; +#else + hipe_update_stack_trap(p, sdesc0); +#endif +} + +static inline Eterm *nstack_walk_nsp_end(const Process *p) +{ + return p->hipe.nstend; +} + +static inline void nstack_walk_kill_trap(Process *p, Eterm *nsp_end) +{ + /* remove gray/white boundary trap */ + for (;;) { + --nsp_end; + if (nsp_end[0] == (unsigned long)nbif_stack_trap_ra) { + nsp_end[0] = (unsigned long)p->hipe.ngra; + break; + } + } +} + +static inline int nstack_walk_gray_passed_black(const Eterm *gray, const Eterm *black) +{ + return gray > black; +} + +static inline int nstack_walk_nsp_reached_end(const Eterm *nsp, const Eterm *nsp_end) +{ + return nsp >= nsp_end; +} + +static inline unsigned int nstack_walk_frame_size(const struct sdesc *sdesc) +{ + return sdesc_fsize(sdesc) + 1 + sdesc_arity(sdesc); +} + +static inline Eterm *nstack_walk_frame_index(Eterm *nsp, unsigned int i) +{ + return &nsp[i]; +} + +static inline unsigned long +nstack_walk_frame_ra(const Eterm *nsp, const struct sdesc *sdesc) +{ + return nsp[sdesc_fsize(sdesc)]; +} + +static inline Eterm *nstack_walk_next_frame(Eterm *nsp, unsigned int sdesc_size) +{ + return nsp + sdesc_size; +} + +#endif /* HIPE_X86_GC_H */ diff --git a/erts/emulator/hipe/hipe_x86_glue.S b/erts/emulator/hipe/hipe_x86_glue.S new file mode 100644 index 0000000000..2f7dff39f5 --- /dev/null +++ b/erts/emulator/hipe/hipe_x86_glue.S @@ -0,0 +1,420 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2001-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* + * $Id$ + */ + +#include "hipe_x86_asm.h" +#include "hipe_literals.h" +#define ASM +#include "hipe_mode_switch.h" + +/* + * Enter Erlang from C. + * Create a new frame on the C stack. + * Save C callee-save registers in the frame. + * Retrieve the process pointer from the C parameters. + * SWITCH_C_TO_ERLANG. + * + * Our C frame includes: + * - 4*4 == 16 bytes for saving %edi, %esi, %ebx, and %ebp + * - 6*4 == 24 bytes of parameter area for recursive calls + * to C BIFs: actual parameters are moved to it, not pushed + * - 8 bytes to pad the frame to a multiple of 16 bytes, + * minus 4 bytes for the return address pushed by the caller. + * OSX requires 16-byte alignment of %esp at calls (for SSE2). + */ +#define ENTER_FROM_C \ + /* create stack frame and save C callee-save registers in it */ \ + subl $44, %esp; \ + movl %edi, 28(%esp); \ + movl %esi, 32(%esp); \ + movl %ebx, 36(%esp); \ + movl %ebp, 40(%esp); \ + /* get the process pointer */ \ + movl 48(%esp), P; \ + /* switch to native stack */ \ + SWITCH_C_TO_ERLANG + + TEXT + +/* + * int x86_call_to_native(Process *p); + * Emulated code recursively calls native code. + */ + .align 4 + GLOBAL(CSYM(x86_call_to_native)) + GLOBAL(ASYM(nbif_return)) +CSYM(x86_call_to_native): + ENTER_FROM_C + /* get argument registers */ + LOAD_ARG_REGS + /* call the target */ + NSP_CALL(*P_NCALLEE(P)) +/* + * We export this return address so that hipe_mode_switch() can discover + * when native code tailcalls emulated code. + * + * This is where native code returns to emulated code. + */ +ASYM(nbif_return): + movl %eax, P_ARG0(P) # save retval + movl $HIPE_MODE_SWITCH_RES_RETURN, %eax +/* FALLTHROUGH to .flush_exit + * + * Return to the calling C function with result token in %eax. + * + * .nosave_exit saves no state + * .flush_exit saves cached P state + * .suspend_exit also saves RA + */ +.suspend_exit: + /* save RA, no-op on x86 */ +.flush_exit: + /* flush cached P state */ + SAVE_CACHED_STATE +.nosave_exit: + /* switch to C stack */ + SWITCH_ERLANG_TO_C_QUICK + /* restore C callee-save registers, drop frame, return */ + movl 28(%esp), %edi + movl 32(%esp), %esi # kills HP, if HP_IN_ESI is true + movl 36(%esp), %ebx + movl 40(%esp), %ebp # kills P + addl $44, %esp + ret + +/* + * Native code calls emulated code via a linker-generated + * stub (hipe_x86_loader.erl) which should look as follows: + * + * stub for f/N: + * movl $<f's BEAM code address>, P_BEAM_IP(P) + * movb $<N>, P_ARITY(P) + * jmp nbif_callemu + * + * XXX: Different stubs for different number of register parameters? + */ + .align 4 + GLOBAL(ASYM(nbif_callemu)) +ASYM(nbif_callemu): + STORE_ARG_REGS + movl $HIPE_MODE_SWITCH_RES_CALL, %eax + jmp .suspend_exit + +/* + * nbif_apply + */ + .align 4 + GLOBAL(ASYM(nbif_apply)) +ASYM(nbif_apply): + STORE_ARG_REGS + movl $HIPE_MODE_SWITCH_RES_APPLY, %eax + jmp .suspend_exit + +/* + * Native code calls an emulated-mode closure via a stub defined below. + * + * The closure is appended as the last actual parameter, and parameters + * beyond the first few passed in registers are pushed onto the stack in + * left-to-right order. + * Hence, the location of the closure parameter only depends on the number + * of parameters in registers, not the total number of parameters. + */ +#if X86_NR_ARG_REGS == 5 + .align 4 + GLOBAL(ASYM(nbif_ccallemu5)) +ASYM(nbif_ccallemu5): + movl ARG4, P_ARG4(P) + movl 4(NSP), ARG4 + /*FALLTHROUGH*/ +#endif + +#if X86_NR_ARG_REGS >= 4 + .align 4 + GLOBAL(ASYM(nbif_ccallemu4)) +ASYM(nbif_ccallemu4): + movl ARG3, P_ARG3(P) +#if X86_NR_ARG_REGS > 4 + movl ARG4, ARG3 +#else + movl 4(NSP), ARG3 +#endif + /*FALLTHROUGH*/ +#endif + +#if X86_NR_ARG_REGS >= 3 + .align 4 + GLOBAL(ASYM(nbif_ccallemu3)) +ASYM(nbif_ccallemu3): + movl ARG2, P_ARG2(P) +#if X86_NR_ARG_REGS > 3 + movl ARG3, ARG2 +#else + movl 4(NSP), ARG2 +#endif + /*FALLTHROUGH*/ +#endif + +#if X86_NR_ARG_REGS >= 2 + .align 4 + GLOBAL(ASYM(nbif_ccallemu2)) +ASYM(nbif_ccallemu2): + movl ARG1, P_ARG1(P) +#if X86_NR_ARG_REGS > 2 + movl ARG2, ARG1 +#else + movl 4(NSP), ARG1 +#endif + /*FALLTHROUGH*/ +#endif + +#if X86_NR_ARG_REGS >= 1 + .align 4 + GLOBAL(ASYM(nbif_ccallemu1)) +ASYM(nbif_ccallemu1): + movl ARG0, P_ARG0(P) +#if X86_NR_ARG_REGS > 1 + movl ARG1, ARG0 +#else + movl 4(NSP), ARG0 +#endif + /*FALLTHROUGH*/ +#endif + + .align 4 + GLOBAL(ASYM(nbif_ccallemu0)) +ASYM(nbif_ccallemu0): + /* We use %eax not ARG0 here because ARG0 is not + defined when NR_ARG_REGS == 0. */ +#if X86_NR_ARG_REGS == 0 + movl 4(NSP), %eax +#endif + movl %eax, P_CLOSURE(P) + movl $HIPE_MODE_SWITCH_RES_CALL_CLOSURE, %eax + jmp .suspend_exit + +/* + * This is where native code suspends. + */ + .align 4 + GLOBAL(ASYM(nbif_suspend_0)) +ASYM(nbif_suspend_0): + movl $HIPE_MODE_SWITCH_RES_SUSPEND, %eax + jmp .suspend_exit + +/* + * Suspend from a receive (waiting for a message) + */ + .align 4 + GLOBAL(ASYM(nbif_suspend_msg)) +ASYM(nbif_suspend_msg): + movl $HIPE_MODE_SWITCH_RES_WAIT, %eax + jmp .suspend_exit + +/* + * Suspend from a receive with a timeout (waiting for a message) + * if (!(p->flags & F_TIMO)) { suspend } + * else { return 0; } + */ + .align 4 + GLOBAL(ASYM(nbif_suspend_msg_timeout)) +ASYM(nbif_suspend_msg_timeout): + movl P_FLAGS(P), %eax + /* this relies on F_TIMO (1<<2) fitting in a byte */ + testb $F_TIMO, %al # F_TIMO set? + jz .no_timeout # if not set, suspend + /* timeout has occurred */ + xorl %eax, %eax # return 0 to signal timeout + NSP_RET0 +.no_timeout: + movl $HIPE_MODE_SWITCH_RES_WAIT_TIMEOUT, %eax + jmp .suspend_exit + +/* + * int x86_return_to_native(Process *p); + * Emulated code returns to its native code caller. + */ + .align 4 + GLOBAL(CSYM(x86_return_to_native)) +CSYM(x86_return_to_native): + ENTER_FROM_C + /* get return value */ + movl P_ARG0(P), %eax + /* + * Return using the stacked return address. + * The parameters were popped at the original native-to-emulated + * call (hipe_call_from_native_is_recursive), so a plain ret suffices. + */ + NSP_RET0 + +/* + * int x86_tailcall_to_native(Process *p); + * Emulated code tailcalls native code. + */ + .align 4 + GLOBAL(CSYM(x86_tailcall_to_native)) +CSYM(x86_tailcall_to_native): + ENTER_FROM_C + /* get argument registers */ + LOAD_ARG_REGS + /* jump to the target label */ + jmp *P_NCALLEE(P) + +/* + * int x86_throw_to_native(Process *p); + * Emulated code throws an exception to its native code caller. + */ + .align 4 + GLOBAL(CSYM(x86_throw_to_native)) +CSYM(x86_throw_to_native): + ENTER_FROM_C + /* invoke the handler */ + jmp *P_NCALLEE(P) # set by hipe_find_handler() + +/* + * This is the default exception handler for native code. + */ + .align 4 + GLOBAL(ASYM(nbif_fail)) +ASYM(nbif_fail): + movl $HIPE_MODE_SWITCH_RES_THROW, %eax + jmp .flush_exit + + GLOBAL(nbif_0_gc_after_bif) + GLOBAL(nbif_1_gc_after_bif) + GLOBAL(nbif_2_gc_after_bif) + GLOBAL(nbif_3_gc_after_bif) + .align 4 +nbif_0_gc_after_bif: + xorl %edx, %edx + jmp .gc_after_bif + .align 4 +nbif_1_gc_after_bif: + movl $1, %edx + jmp .gc_after_bif + .align 4 +nbif_2_gc_after_bif: + movl $2, %edx + jmp .gc_after_bif + .align 4 +nbif_3_gc_after_bif: + movl $3, %edx + /*FALLTHROUGH*/ + .align 4 +.gc_after_bif: + movl %edx, P_NARITY(P) + subl $(16-4), %esp + movl P, (%esp) + movl %eax, 4(%esp) + call CSYM(erts_gc_after_bif_call) + addl $(16-4), %esp + movl $0, P_NARITY(P) + ret + +/* + * We end up here when a BIF called from native signals an + * exceptional condition. + * The stack/heap registers were just read from P. + */ + GLOBAL(nbif_0_simple_exception) + GLOBAL(nbif_1_simple_exception) + GLOBAL(nbif_2_simple_exception) + GLOBAL(nbif_3_simple_exception) + .align 4 +nbif_0_simple_exception: + xorl %eax, %eax + jmp .nbif_simple_exception + .align 4 +nbif_1_simple_exception: + movl $1, %eax + jmp .nbif_simple_exception + .align 4 +nbif_2_simple_exception: + movl $2, %eax + jmp .nbif_simple_exception + .align 4 +nbif_3_simple_exception: + movl $3, %eax + /*FALLTHROUGH*/ + .align 4 +.nbif_simple_exception: + cmpl $FREASON_TRAP, P_FREASON(P) + je .handle_trap + /* + * Find and invoke catch handler (it must exist). + * The stack/heap registers were just read from P. + * - %eax should contain the current call's arity + */ + movl %eax, P_NARITY(P) + /* find and prepare to invoke the handler */ + SWITCH_ERLANG_TO_C_QUICK # The cached state is clean and need not be saved. + movl P, (%esp) + call CSYM(hipe_handle_exception) # Note: hipe_handle_exception() conses + SWITCH_C_TO_ERLANG # %esp updated by hipe_find_handler() + /* now invoke the handler */ + jmp *P_NCALLEE(P) # set by hipe_find_handler() + + /* + * A BIF failed with freason TRAP: + * - the BIF's arity is in %eax + * - the native heap/stack/reds registers are saved in P + */ +.handle_trap: + movl %eax, P_NARITY(P) + movl $HIPE_MODE_SWITCH_RES_TRAP, %eax + jmp .nosave_exit + +/* + * nbif_stack_trap_ra: trap return address for maintaining + * the gray/white stack boundary + */ + GLOBAL(ASYM(nbif_stack_trap_ra)) + .align 4 +ASYM(nbif_stack_trap_ra): # a return address, not a function + # This only handles a single return value. + # If we have more, we need to save them in the PCB. + movl %eax, TEMP_RV # save retval + SWITCH_ERLANG_TO_C_QUICK + movl P, (%esp) + call CSYM(hipe_handle_stack_trap) # must not cons; preserves TEMP_RV + movl %eax, %edx # original RA + SWITCH_C_TO_ERLANG_QUICK + movl TEMP_RV, %eax # restore retval + jmp *%edx # resume at original RA + +/* + * nbif_inc_stack_0 + */ + .align 4 + GLOBAL(ASYM(nbif_inc_stack_0)) +ASYM(nbif_inc_stack_0): + SWITCH_ERLANG_TO_C_QUICK + STORE_CALLER_SAVE + movl P, (%esp) + # hipe_inc_nstack reads and writes NSP and NSP_LIMIT, + # but does not access HP or FCALLS (or the non-x86 NRA). + call CSYM(hipe_inc_nstack) + LOAD_CALLER_SAVE + SWITCH_C_TO_ERLANG_QUICK + NSP_RET0 + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/erts/emulator/hipe/hipe_x86_glue.h b/erts/emulator/hipe/hipe_x86_glue.h new file mode 100644 index 0000000000..4c9c92c52f --- /dev/null +++ b/erts/emulator/hipe/hipe_x86_glue.h @@ -0,0 +1,265 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2001-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* + * $Id$ + */ +#ifndef HIPE_X86_GLUE_H +#define HIPE_X86_GLUE_H + +#include "hipe_x86_asm.h" /* for NR_ARG_REGS and LEAF_WORDS */ + +/* Emulated code recursively calls native code. + The return address is `nbif_return', which is exported so that + tailcalls from native to emulated code can be identified. */ +extern unsigned int x86_call_to_native(Process*); +extern void nbif_return(void); + +/* Native-mode stubs for calling emulated-mode closures. */ +extern void nbif_ccallemu0(void); +extern void nbif_ccallemu1(void); +extern void nbif_ccallemu2(void); +extern void nbif_ccallemu3(void); +extern void nbif_ccallemu4(void); +extern void nbif_ccallemu5(void); +extern void nbif_ccallemu6(void); + +/* Default exception handler for native code. */ +extern void nbif_fail(void); + +/* Emulated code returns to its native code caller. */ +extern unsigned int x86_return_to_native(Process*); + +/* Emulated code tailcalls native code. */ +extern unsigned int x86_tailcall_to_native(Process*); + +/* Emulated code throws an exception to its native code caller. */ +extern unsigned int x86_throw_to_native(Process*); + +static __inline__ unsigned int max(unsigned int x, unsigned int y) +{ + return (x > y) ? x : y; +} + +static __inline__ void hipe_arch_glue_init(void) +{ + static struct sdesc_with_exnra nbif_return_sdesc = { + .exnra = (unsigned long)nbif_fail, + .sdesc = { + .bucket = { .hvalue = (unsigned long)nbif_return }, + .summary = (1<<8), + }, + }; + hipe_init_sdesc_table(&nbif_return_sdesc.sdesc); +} + +/* PRE: arity <= NR_ARG_REGS */ +static __inline__ void +hipe_write_x86_regs(Process *p, unsigned int arity, Eterm reg[]) +{ +#if NR_ARG_REGS > 0 + int i; + for (i = arity; --i >= 0;) + p->def_arg_reg[i] = reg[i]; +#endif +} + +/* PRE: arity <= NR_ARG_REGS */ +static __inline__ void +hipe_read_x86_regs(Process *p, unsigned int arity, Eterm reg[]) +{ +#if NR_ARG_REGS > 0 + int i; + for (i = arity; --i >= 0;) + reg[i] = p->def_arg_reg[i]; +#endif +} + +static __inline__ void +hipe_push_x86_params(Process *p, unsigned int arity, Eterm reg[]) +{ + unsigned int i; + + i = arity; + if (i > NR_ARG_REGS) { + Eterm *nsp = p->hipe.nsp; + i = NR_ARG_REGS; + do { + *--nsp = reg[i++]; + } while (i < arity); + p->hipe.nsp = nsp; + i = NR_ARG_REGS; + } + /* INV: i <= NR_ARG_REGS */ + hipe_write_x86_regs(p, i, reg); +} + +static __inline__ void +hipe_pop_x86_params(Process *p, unsigned int arity, Eterm reg[]) +{ + unsigned int i; + + i = arity; + if (i > NR_ARG_REGS) { + Eterm *nsp = p->hipe.nsp; + do { + reg[--i] = *nsp++; + } while (i > NR_ARG_REGS); + p->hipe.nsp = nsp; + /* INV: i == NR_ARG_REGS */ + } + /* INV: i <= NR_ARG_REGS */ + hipe_read_x86_regs(p, i, reg); +} + +/* BEAM recursively calls native code. */ +static __inline__ unsigned int +hipe_call_to_native(Process *p, unsigned int arity, Eterm reg[]) +{ + int nstkargs; + + /* Note that call_to_native() needs two words on the stack: + one for the nbif_return return address, and one for the + callee's return address should it need to call inc_stack_0. */ + if ((nstkargs = arity - NR_ARG_REGS) < 0) + nstkargs = 0; + hipe_check_nstack(p, max(nstkargs+1+1, LEAF_WORDS)); + hipe_push_x86_params(p, arity, reg); /* needs nstkargs words */ + return x86_call_to_native(p); /* needs 1+1 words */ +} + +/* Native called BEAM, which now tailcalls native. */ +static __inline__ unsigned int +hipe_tailcall_to_native(Process *p, unsigned int arity, Eterm reg[]) +{ + int nstkargs; + + if ((nstkargs = arity - NR_ARG_REGS) < 0) + nstkargs = 0; + /* +1 so callee can call inc_stack_0 */ + hipe_check_nstack(p, max(nstkargs+1, LEAF_WORDS)); + if (nstkargs) { + Eterm nra; + nra = *(p->hipe.nsp++); + hipe_push_x86_params(p, arity, reg); + *--(p->hipe.nsp) = nra; + } else + hipe_write_x86_regs(p, arity, reg); + return x86_tailcall_to_native(p); +} + +/* BEAM called native, which has returned. Clean up. */ +static __inline__ void hipe_return_from_native(Process *p) { } + +/* BEAM called native, which has thrown an exception. Clean up. */ +static __inline__ void hipe_throw_from_native(Process *p) { } + +/* BEAM called native, which now calls BEAM. + Move the parameters to reg[]. + Return zero if this is a tailcall, non-zero if the call is recursive. + If tailcall, also clean up native stub continuation. */ +static __inline__ int +hipe_call_from_native_is_recursive(Process *p, Eterm reg[]) +{ + Eterm nra; + + nra = *(p->hipe.nsp++); + hipe_pop_x86_params(p, p->arity, reg); + if (nra != (Eterm)nbif_return) { + *--(p->hipe.nsp) = nra; + return 1; + } + return 0; +} + +/* Native makes a call which needs to unload the parameters. + This differs from hipe_call_from_native_is_recursive() in + that it doesn't check for or pop the BEAM-calls-native frame. + It's currently only used in the implementation of apply. */ +static __inline__ void +hipe_pop_params(Process *p, unsigned int arity, Eterm reg[]) +{ + if (arity > NR_ARG_REGS) { + /* for apply/3 this will only happen if we configure + the runtime system with fewer argument registers + than default (i.e., 3) */ + Eterm nra = *(p->hipe.nsp++); + hipe_pop_x86_params(p, arity, reg); + *--(p->hipe.nsp) = nra; + } else { + /* arity <= NR_ARG_REGS so we optimise and + use hipe_read_x86_regs() directly */ + hipe_read_x86_regs(p, arity, reg); + } +} + +/* Native called BEAM, which now returns back to native. */ +static __inline__ unsigned int hipe_return_to_native(Process *p) +{ + return x86_return_to_native(p); +} + +/* Native called BEAM, which now throws an exception back to native. */ +static __inline__ unsigned int hipe_throw_to_native(Process *p) +{ + return x86_throw_to_native(p); +} + +/* Return the address of a stub switching a native closure call to BEAM. */ +static __inline__ void *hipe_closure_stub_address(unsigned int arity) +{ +#if NR_ARG_REGS == 0 + return nbif_ccallemu0; +#else /* > 0 */ + switch (arity) { + case 0: return nbif_ccallemu0; +#if NR_ARG_REGS == 1 + default: return nbif_ccallemu1; +#else /* > 1 */ + case 1: return nbif_ccallemu1; +#if NR_ARG_REGS == 2 + default: return nbif_ccallemu2; +#else /* > 2 */ + case 2: return nbif_ccallemu2; +#if NR_ARG_REGS == 3 + default: return nbif_ccallemu3; +#else /* > 3 */ + case 3: return nbif_ccallemu3; +#if NR_ARG_REGS == 4 + default: return nbif_ccallemu4; +#else /* > 4 */ + case 4: return nbif_ccallemu4; +#if NR_ARG_REGS == 5 + default: return nbif_ccallemu5; +#else /* > 5 */ + case 5: return nbif_ccallemu5; +#if NR_ARG_REGS == 6 + default: return nbif_ccallemu6; +#else +#error "NR_ARG_REGS > 6 NOT YET IMPLEMENTED" +#endif /* > 6 */ +#endif /* > 5 */ +#endif /* > 4 */ +#endif /* > 3 */ +#endif /* > 2 */ +#endif /* > 1 */ + } +#endif /* > 0 */ +} + +#endif /* HIPE_X86_GLUE_H */ diff --git a/erts/emulator/hipe/hipe_x86_primops.h b/erts/emulator/hipe/hipe_x86_primops.h new file mode 100644 index 0000000000..757da484ad --- /dev/null +++ b/erts/emulator/hipe/hipe_x86_primops.h @@ -0,0 +1,22 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2005-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + */ +PRIMOP_LIST(am_inc_stack_0, &nbif_inc_stack_0) +PRIMOP_LIST(am_handle_fp_exception, &nbif_handle_fp_exception) diff --git a/erts/emulator/hipe/hipe_x86_signal.c b/erts/emulator/hipe/hipe_x86_signal.c new file mode 100644 index 0000000000..a4fff4ce31 --- /dev/null +++ b/erts/emulator/hipe/hipe_x86_signal.c @@ -0,0 +1,355 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2001-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + * hipe_x86_signal.c + * + * Erlang code compiled to x86 native code uses the x86 %esp as its + * stack pointer. This improves performance in several ways: + * - It permits the use of the x86 call and ret instructions, which + * reduces code volume and improves branch prediction. + * - It avoids stealing a gp register to act as a stack pointer. + * + * Unix signal handlers are by default delivered onto the current + * stack, i.e. %esp. This is a problem since our native-code stacks + * are small and may not have room for the Unix signal handler. + * + * There is a way to redirect signal handlers to an "alternate" signal + * stack by using the SA_ONSTACK flag with the sigaction() library call. + * Unfortunately, this has to be specified explicitly for each signal, + * and it is difficult to enforce given the presence of libraries. + * + * Our solution is to override the C library's signal handler setup + * procedure with our own which enforces the SA_ONSTACK flag. + * + * XXX: This code only supports Linux with glibc-2.1 or above, + * and Solaris 8. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#ifdef ERTS_SMP +#include "sys.h" +#include "erl_alloc.h" +#endif +#include "hipe_signal.h" + +#if __GLIBC__ == 2 && (__GLIBC_MINOR__ >= 3) +/* See comment below for glibc 2.2. */ +#ifndef __USE_GNU +#define __USE_GNU /* to un-hide RTLD_NEXT */ +#endif +#include <dlfcn.h> +static int (*__next_sigaction)(int, const struct sigaction*, struct sigaction*); +#define init_done() (__next_sigaction != 0) +extern int __sigaction(int, const struct sigaction*, struct sigaction*); +#define __SIGACTION __sigaction +static void do_init(void) +{ + __next_sigaction = dlsym(RTLD_NEXT, "__sigaction"); + if (__next_sigaction != 0) + return; + perror("dlsym"); + abort(); +} +#define INIT() do { if (!init_done()) do_init(); } while (0) +#endif /* glibc 2.3 */ + +#if __GLIBC__ == 2 && (__GLIBC_MINOR__ == 2 /*|| __GLIBC_MINOR__ == 3*/) +/* + * __libc_sigaction() is the core routine. + * Without libpthread, sigaction() and __sigaction() are both aliases + * for __libc_sigaction(). + * libpthread redefines __sigaction() as a non-trivial wrapper around + * __libc_sigaction(), and makes sigaction() an alias for __sigaction(). + * glibc has internal calls to both sigaction() and __sigaction(). + * + * Overriding __libc_sigaction() would be ideal, but doing so breaks + * libpthread (threads hang). + * + * Overriding __sigaction(), using dlsym RTLD_NEXT to find glibc's + * version of __sigaction(), works with glibc-2.2.4 and 2.2.5. + * Unfortunately, this solution doesn't work with earlier versions, + * including glibc-2.2.2 and glibc-2.1.92 (2.2 despite its name): + * 2.2.2 SIGSEGVs in dlsym RTLD_NEXT (known glibc bug), and 2.1.92 + * SIGSEGVs inexplicably in two test cases in the HiPE test suite. + * + * Instead we only override sigaction() and call __sigaction() + * directly. This should work for HiPE/x86 as long as only the Posix + * signal interface is used, i.e. there are no calls to simulated + * old BSD or SysV interfaces. + * glibc's internal calls to __sigaction() appear to be mostly safe. + * hipe_signal_init() fixes some unsafe ones, e.g. the SIGPROF handler. + * + * Tested with glibc-2.1.92 on RedHat 7.0, glibc-2.2.2 on RedHat 7.1, + * glibc-2.2.4 on RedHat 7.2, and glibc-2.2.5 on RedHat 7.3. + */ +#if 0 +/* works with 2.2.5 and 2.2.4, but not 2.2.2 or 2.1.92 */ +#define __USE_GNU /* to un-hide RTLD_NEXT */ +#include <dlfcn.h> +static int (*__next_sigaction)(int, const struct sigaction*, struct sigaction*); +#define init_done() (__next_sigaction != 0) +#define __SIGACTION __sigaction +static void do_init(void) +{ + __next_sigaction = dlsym(RTLD_NEXT, "__sigaction"); + if (__next_sigaction != 0) + return; + perror("dlsym"); + abort(); +} +#define INIT() do { if (!init_done()) do_init(); } while (0) +#else +/* semi-works with all 2.2 versions so far */ +extern int __sigaction(int, const struct sigaction*, struct sigaction*); +#define __next_sigaction __sigaction /* pthreads-aware version */ +#undef __SIGACTION /* we can't override __sigaction() */ +#define INIT() do{}while(0) +#endif +#endif /* glibc 2.2 */ + +#if __GLIBC__ == 2 && __GLIBC_MINOR__ == 1 +/* + * __sigaction() is the core routine. + * Without libpthread, sigaction() is an alias for __sigaction(). + * libpthread redefines sigaction() as a non-trivial wrapper around + * __sigaction(). + * glibc has internal calls to both sigaction() and __sigaction(). + * + * Overriding __sigaction() would be ideal, but doing so breaks + * libpthread (threads hang). Instead we override sigaction() and + * use dlsym RTLD_NEXT to find glibc's version of sigaction(). + * glibc's internal calls to __sigaction() appear to be mostly safe. + * hipe_signal_init() fixes some unsafe ones, e.g. the SIGPROF handler. + * + * Tested with glibc-2.1.3 on RedHat 6.2. + */ +#include <dlfcn.h> +static int (*__next_sigaction)(int, const struct sigaction*, struct sigaction*); +#define init_done() (__next_sigaction != 0) +#undef __SIGACTION +static void do_init(void) +{ + __next_sigaction = dlsym(RTLD_NEXT, "sigaction"); + if (__next_sigaction != 0) + return; + perror("dlsym"); + abort(); +} +#define INIT() do { if (!init_done()) do_init(); } while (0) +#endif /* glibc 2.1 */ + +/* Is there no standard identifier for Darwin/MacOSX ? */ +#if defined(__APPLE__) && defined(__MACH__) && !defined(__DARWIN__) +#define __DARWIN__ 1 +#endif + +#if defined(__DARWIN__) +/* + * Assumes Mac OS X >= 10.3 (dlsym operations not available in 10.2 and + * earlier). + * + * The code below assumes that is is part of the main image (earlier + * in the load order than libSystem and certainly before any dylib + * that might use sigaction) -- a standard RTLD_NEXT caveat. + * + * _sigaction lives in /usr/lib/libSystem.B.dylib and can be found + * with the standard dlsym(RTLD_NEXT) call. The proviso on Mac OS X + * being that the symbol for dlsym doesn't include a leading '_'. + * + * The other _sigaction, _sigaction_no_bind I don't understand the purpose + * of and don't modify. + */ +#include <dlfcn.h> +static int (*__next_sigaction)(int, const struct sigaction*, struct sigaction*); +#define init_done() (__next_sigaction != 0) +#define __SIGACTION _sigaction +static void do_init(void) +{ + __next_sigaction = dlsym(RTLD_NEXT, "sigaction"); + if (__next_sigaction != 0) + return; + perror("dlsym_darwin"); + abort(); +} +#define _NSIG NSIG +#define INIT() do { if (!init_done()) do_init(); } while (0) +#endif /* __DARWIN__ */ + +#if !defined(__GLIBC__) && !defined(__DARWIN__) +/* + * Assume Solaris/x86 2.8. + * There is a number of sigaction() procedures in libc: + * * sigaction(): weak reference to _sigaction(). + * * _sigaction(): apparently a simple wrapper around __sigaction(). + * * __sigaction(): apparently the procedure doing the actual system call. + * * _libc_sigaction(): apparently some thread-related wrapper, which ends + * up calling __sigaction(). + * The threads library redefines sigaction() and _sigaction() to its + * own wrapper, which checks for and restricts access to threads-related + * signals. The wrapper appears to eventually call libc's __sigaction(). + * + * We catch and override _sigaction() since overriding __sigaction() + * causes fatal errors in some cases. + * + * When linked with thread support, there are calls to sigaction() before + * our init routine has had a chance to find _sigaction()'s address. + * This forces us to initialise at the first call. + */ +#include <dlfcn.h> +static int (*__next_sigaction)(int, const struct sigaction*, struct sigaction*); +#define init_done() (__next_sigaction != 0) +#define __SIGACTION _sigaction +static void do_init(void) +{ + __next_sigaction = dlsym(RTLD_NEXT, "_sigaction"); + if (__next_sigaction != 0) + return; + perror("dlsym"); + abort(); +} +#define _NSIG NSIG +#define INIT() do { if (!init_done()) do_init(); } while (0) +#endif /* not glibc or darwin */ + +/* + * This is our wrapper for sigaction(). sigaction() can be called before + * hipe_signal_init() has been executed, especially when threads support + * has been linked with the executable. Therefore, we must initialise + * __next_sigaction() dynamically, the first time it's needed. + */ +static int my_sigaction(int signum, const struct sigaction *act, struct sigaction *oldact) +{ + struct sigaction newact; + + INIT(); + + if (act && + act->sa_handler != SIG_DFL && + act->sa_handler != SIG_IGN && + !(act->sa_flags & SA_ONSTACK)) { + newact = *act; + newact.sa_flags |= SA_ONSTACK; + act = &newact; + } + return __next_sigaction(signum, act, oldact); +} + +/* + * This overrides the C library's core sigaction() procedure, catching + * all its internal calls. + */ +#ifdef __SIGACTION +int __SIGACTION(int signum, const struct sigaction *act, struct sigaction *oldact) +{ + return my_sigaction(signum, act, oldact); +} +#endif + +/* + * This catches the application's own sigaction() calls. + */ +#if !defined(__DARWIN__) +int sigaction(int signum, const struct sigaction *act, struct sigaction *oldact) +{ + return my_sigaction(signum, act, oldact); +} +#endif + +/* + * Set alternate signal stack for the invoking thread. + */ +static void hipe_sigaltstack(void *ss_sp) +{ + stack_t ss; + + ss.ss_sp = ss_sp; + ss.ss_flags = SS_ONSTACK; + ss.ss_size = SIGSTKSZ; + if (sigaltstack(&ss, NULL) < 0) { + /* might be a broken pre-2.4 Linux kernel, try harder */ + ss.ss_flags = 0; + if (sigaltstack(&ss, NULL) < 0) { + perror("sigaltstack"); + abort(); + } + } +} + +#ifdef ERTS_SMP +/* + * Set up alternate signal stack for an Erlang process scheduler thread. + */ +void hipe_thread_signal_init(void) +{ + hipe_sigaltstack(erts_alloc(ERTS_ALC_T_HIPE, SIGSTKSZ)); +} +#endif + +/* + * Set up alternate signal stack for the main thread, + * unless this is a multithreaded runtime system. + */ +static void hipe_sigaltstack_init(void) +{ +#if !defined(ERTS_SMP) + static unsigned long my_sigstack[SIGSTKSZ/sizeof(long)]; + hipe_sigaltstack(my_sigstack); +#endif +} + +/* + * 1. Set up alternate signal stack for the main thread. + * 2. Add SA_ONSTACK to existing user-defined signal handlers. + */ +void hipe_signal_init(void) +{ + struct sigaction sa; + int i; + + INIT(); + + hipe_sigaltstack_init(); + + for (i = 1; i < _NSIG; ++i) { + if (sigaction(i, NULL, &sa)) { + /* This will fail with EINVAL on Solaris if 'i' is one of the + thread library's private signals. We DO catch the initial + setup of these signals, so things MAY be OK anyway. */ + continue; + } + if (sa.sa_handler == SIG_DFL || + sa.sa_handler == SIG_IGN || + (sa.sa_flags & SA_ONSTACK)) + continue; + sa.sa_flags |= SA_ONSTACK; + if (sigaction(i, &sa, NULL)) { +#ifdef SIGCANCEL + /* Solaris 9 x86 refuses to let us modify SIGCANCEL. */ + if (i == SIGCANCEL) + continue; +#endif + perror("sigaction"); + abort(); + } + } +} diff --git a/erts/emulator/hipe/hipe_x86_stack.c b/erts/emulator/hipe/hipe_x86_stack.c new file mode 100644 index 0000000000..b459593883 --- /dev/null +++ b/erts/emulator/hipe/hipe_x86_stack.c @@ -0,0 +1,296 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2001-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* $Id$ + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#include "global.h" +#include "bif.h" +#include "hipe_stack.h" +#ifdef __x86_64__ +#include "hipe_amd64_asm.h" /* for NR_ARG_REGS */ +#else +#include "hipe_x86_asm.h" /* for NR_ARG_REGS */ +#endif + +extern void nbif_fail(void); +extern void nbif_stack_trap_ra(void); + +/* + * hipe_print_nstack() is called from hipe_bifs:show_nstack/1. + */ +static void print_slot(Eterm *sp, unsigned int live) +{ + Eterm val = *sp; + printf(" | 0x%0*lx | 0x%0*lx | ", + 2*(int)sizeof(long), (unsigned long)sp, + 2*(int)sizeof(long), val); + if (live) + erts_printf("%.30T", val); + printf("\r\n"); +} + +void hipe_print_nstack(Process *p) +{ + Eterm *nsp; + Eterm *nsp_end; + struct sdesc sdesc0; + const struct sdesc *sdesc1; + const struct sdesc *sdesc; + unsigned long ra; + unsigned long exnra; + unsigned int mask; + unsigned int sdesc_size; + unsigned int i; + unsigned int nstkarity; + static const char dashes[2*sizeof(long)+5] = { + [0 ... 2*sizeof(long)+3] = '-' + }; + + nsp = p->hipe.nsp; + nsp_end = p->hipe.nstend; + + nstkarity = p->hipe.narity - NR_ARG_REGS; + if ((int)nstkarity < 0) + nstkarity = 0; + sdesc0.summary = nstkarity; + sdesc0.livebits[0] = ~1; + sdesc = &sdesc0; + + printf(" | NATIVE STACK |\r\n"); + printf(" |%s|%s|\r\n", dashes, dashes); + printf(" | %*s | 0x%0*lx |\r\n", + 2+2*(int)sizeof(long), "heap", + 2*(int)sizeof(long), (unsigned long)p->heap); + printf(" | %*s | 0x%0*lx |\r\n", + 2+2*(int)sizeof(long), "high_water", + 2*(int)sizeof(long), (unsigned long)p->high_water); + printf(" | %*s | 0x%0*lx |\r\n", + 2+2*(int)sizeof(long), "hend", + 2*(int)sizeof(long), (unsigned long)p->htop); + printf(" | %*s | 0x%0*lx |\r\n", + 2+2*(int)sizeof(long), "old_heap", + 2*(int)sizeof(long), (unsigned long)p->old_heap); + printf(" | %*s | 0x%0*lx |\r\n", + 2+2*(int)sizeof(long), "old_hend", + 2*(int)sizeof(long), (unsigned long)p->old_hend); + printf(" | %*s | 0x%0*lx |\r\n", + 2+2*(int)sizeof(long), "nsp", + 2*(int)sizeof(long), (unsigned long)p->hipe.nsp); + printf(" | %*s | 0x%0*lx |\r\n", + 2+2*(int)sizeof(long), "nstend", + 2*(int)sizeof(long), (unsigned long)p->hipe.nstend); + printf(" | %*s| 0x%0*lx |\r\n", + 2+2*(int)sizeof(long)+1, "nstblacklim", + 2*(int)sizeof(long), (unsigned long)p->hipe.nstblacklim); + printf(" | %*s | 0x%0*lx |\r\n", + 2+2*(int)sizeof(long), "nstgraylim", + 2*(int)sizeof(long), (unsigned long)p->hipe.nstgraylim); + printf(" | %*s | 0x%0*x |\r\n", + 2+2*(int)sizeof(long), "narity", + 2*(int)sizeof(long), p->hipe.narity); + printf(" |%s|%s|\r\n", dashes, dashes); + printf(" | %*s | %*s |\r\n", + 2+2*(int)sizeof(long), "Address", + 2+2*(int)sizeof(long), "Contents"); + + for (;;) { + printf(" |%s|%s|\r\n", dashes, dashes); + if (nsp >= nsp_end) { + if (nsp == nsp_end) + return; + fprintf(stderr, "%s: passed end of stack\r\n", __FUNCTION__); + break; + } + ra = nsp[sdesc_fsize(sdesc)]; + if (ra == (unsigned long)nbif_stack_trap_ra) + sdesc1 = hipe_find_sdesc((unsigned long)p->hipe.ngra); + else + sdesc1 = hipe_find_sdesc(ra); + sdesc_size = sdesc_fsize(sdesc) + 1 + sdesc_arity(sdesc); + i = 0; + mask = sdesc->livebits[0]; + for (;;) { + if (i == sdesc_fsize(sdesc)) { + printf(" | 0x%0*lx | 0x%0*lx | ", + 2*(int)sizeof(long), (unsigned long)&nsp[i], + 2*(int)sizeof(long), ra); + if (ra == (unsigned long)nbif_stack_trap_ra) + printf("STACK TRAP, ORIG RA 0x%lx", (unsigned long)p->hipe.ngra); + else + printf("NATIVE RA"); + if ((exnra = sdesc_exnra(sdesc1)) != 0) + printf(", EXNRA 0x%lx", exnra); + printf("\r\n"); + } else + print_slot(&nsp[i], (mask & 1)); + if (++i >= sdesc_size) + break; + if (i & 31) + mask >>= 1; + else + mask = sdesc->livebits[i >> 5]; + } + nsp += sdesc_size; + sdesc = sdesc1; + } + abort(); +} + +#define MINSTACK 128 +#define NSKIPFRAMES 4 + +void hipe_update_stack_trap(Process *p, const struct sdesc *sdesc) +{ + Eterm *nsp; + Eterm *nsp_end; + unsigned long ra; + int n; + + nsp = p->hipe.nsp; + nsp_end = p->hipe.nstend; + if ((unsigned long)((char*)nsp_end - (char*)nsp) < MINSTACK*sizeof(Eterm*)) { + p->hipe.nstgraylim = NULL; + return; + } + n = NSKIPFRAMES; + for (;;) { + nsp += sdesc_fsize(sdesc); + if (nsp >= nsp_end) { + p->hipe.nstgraylim = NULL; + return; + } + ra = nsp[0]; + if (--n <= 0) + break; + nsp += 1 + sdesc_arity(sdesc); + sdesc = hipe_find_sdesc(ra); + } + p->hipe.nstgraylim = nsp + 1 + sdesc_arity(sdesc); + p->hipe.ngra = (void(*)(void))ra; + nsp[0] = (unsigned long)nbif_stack_trap_ra; +} + +/* + * hipe_handle_stack_trap() is called when the mutator returns to + * nbif_stack_trap_ra, which marks the gray/white stack boundary frame. + * The gray/white boundary is moved back one or more frames. + * + * The function head below is "interesting". + */ +void (*hipe_handle_stack_trap(Process *p))(void) +{ + void (*ngra)(void) = p->hipe.ngra; + const struct sdesc *sdesc = hipe_find_sdesc((unsigned long)ngra); + hipe_update_stack_trap(p, sdesc); + return ngra; +} + +/* + * hipe_find_handler() is called from hipe_handle_exception() to locate + * the current exception handler's PC and SP. + * The native stack MUST contain a stack frame as it appears on + * entry to a function (return address, actuals, caller's frame). + * p->hipe.narity MUST contain the arity (number of actuals). + * On exit, p->hipe.ncallee is set to the handler's PC and p->hipe.nsp + * is set to its SP (low address of its stack frame). + */ +void hipe_find_handler(Process *p) +{ + Eterm *nsp; + Eterm *nsp_end; + unsigned long ra; + unsigned long exnra; + unsigned int arity; + const struct sdesc *sdesc; + unsigned int nstkarity; + + nsp = p->hipe.nsp; + nsp_end = p->hipe.nstend; + nstkarity = p->hipe.narity - NR_ARG_REGS; + if ((int)nstkarity < 0) + nstkarity = 0; + arity = nstkarity; + + while (nsp < nsp_end) { + ra = nsp[0]; + if (ra == (unsigned long)nbif_stack_trap_ra) + ra = (unsigned long)p->hipe.ngra; + sdesc = hipe_find_sdesc(ra); + /* nsp = nsp + 1 + arity + sdesc_fsize(sdesc); */ + nsp += 1; /* skip ra */ + nsp += arity; /* skip actuals */ + if ((exnra = sdesc_exnra(sdesc)) != 0 && + (p->catches >= 0 || + exnra == (unsigned long)nbif_fail)) { + p->hipe.ncallee = (void(*)(void)) exnra; + p->hipe.nsp = nsp; + p->hipe.narity = 0; + /* update the gray/white boundary if we threw past it */ + if (p->hipe.nstgraylim && nsp >= p->hipe.nstgraylim) + hipe_update_stack_trap(p, sdesc); + return; + } + nsp += sdesc_fsize(sdesc); + arity = sdesc_arity(sdesc); + } + fprintf(stderr, "%s: no native CATCH found!\r\n", __FUNCTION__); + abort(); +} + +int hipe_fill_stacktrace(Process *p, int depth, Eterm **trace) +{ + Eterm *nsp; + Eterm *nsp_end; + unsigned long ra, prev_ra; + unsigned int arity; + const struct sdesc *sdesc; + unsigned int nstkarity; + int i; + + if (depth < 1) + return 0; + + nsp = p->hipe.nsp; + nsp_end = p->hipe.nstend; + nstkarity = p->hipe.narity - NR_ARG_REGS; + if ((int)nstkarity < 0) + nstkarity = 0; + arity = nstkarity; + + prev_ra = 0; + i = 0; + while (nsp < nsp_end) { /* INV: i < depth */ + ra = nsp[0]; + if (ra == (unsigned long)nbif_stack_trap_ra) + ra = (unsigned long)p->hipe.ngra; + if (ra != prev_ra) { + trace[i] = (Eterm*)ra; + ++i; + if (i == depth) + break; + prev_ra = ra; + } + sdesc = hipe_find_sdesc(ra); + nsp += 1 + arity + sdesc_fsize(sdesc); + arity = sdesc_arity(sdesc); + } + return i; +} |