diff options
Diffstat (limited to 'erts/include/internal/i386/ethr_dw_atomic.h')
-rw-r--r-- | erts/include/internal/i386/ethr_dw_atomic.h | 278 |
1 files changed, 278 insertions, 0 deletions
diff --git a/erts/include/internal/i386/ethr_dw_atomic.h b/erts/include/internal/i386/ethr_dw_atomic.h new file mode 100644 index 0000000000..9fb89bbe43 --- /dev/null +++ b/erts/include/internal/i386/ethr_dw_atomic.h @@ -0,0 +1,278 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2011. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ + +/* + * Description: Native double word atomics for x86/x86_64 + * Author: Rickard Green + */ + +#ifndef ETHR_X86_DW_ATOMIC_H__ +#define ETHR_X86_DW_ATOMIC_H__ + +#ifdef ETHR_GCC_HAVE_DW_CMPXCHG_ASM_SUPPORT + +#define ETHR_HAVE_NATIVE_DW_ATOMIC +#define ETHR_NATIVE_DW_ATOMIC_IMPL "ethread" + +/* + * If ETHR_RTCHK_USE_NATIVE_DW_ATOMIC_IMPL__ is defined, it will be used + * at runtime in order to determine if native or fallback implementation + * should be used. + */ +#define ETHR_RTCHK_USE_NATIVE_DW_ATOMIC_IMPL__ \ + ETHR_X86_RUNTIME_CONF_HAVE_DW_CMPXCHG__ + +#if ETHR_SIZEOF_PTR == 4 +typedef volatile ethr_sint64_t * ethr_native_dw_ptr_t; +# define ETHR_DW_NATMC_ALIGN_MASK__ 0x7 +# define ETHR_DW_CMPXCHG_SFX__ "8b" +# define ETHR_NATIVE_SU_DW_SINT_T ethr_sint64_t +#else +#ifdef ETHR_HAVE_INT128_T +# define ETHR_NATIVE_SU_DW_SINT_T ethr_sint128_t +typedef volatile ethr_sint128_t * ethr_native_dw_ptr_t; +#else +typedef struct { + ethr_sint64_t sint64[2]; +} ethr_native_sint128_t__; +typedef volatile ethr_native_sint128_t__ * ethr_native_dw_ptr_t; +#endif +# define ETHR_DW_NATMC_ALIGN_MASK__ 0xf +# define ETHR_DW_CMPXCHG_SFX__ "16b" +#endif + +/* + * We need 16 byte aligned memory in 64-bit mode, and 8 byte aligned + * memory in 32-bit mode. 16 byte aligned malloc in 64-bit mode is + * not common, and at least some glibc malloc implementations + * only 4 byte align in 32-bit mode. + * + * This code assumes 8 byte aligned memory in 64-bit mode, and 4 byte + * aligned memory in 32-bit mode. A malloc implementation that does + * not adhere to these alignment requirements is seriously broken, + * and we wont bother trying to work around it. + * + * Since memory alignment may be off by one word we need to align at + * runtime. We, therefore, need an extra word allocated. + */ +#define ETHR_DW_NATMC_MEM__(VAR) \ + (&var->c[(int) ((ethr_uint_t) &(VAR)->c[0]) & ETHR_DW_NATMC_ALIGN_MASK__]) +typedef union { +#ifdef ETHR_NATIVE_SU_DW_SINT_T + volatile ETHR_NATIVE_SU_DW_SINT_T dw_sint; +#endif + volatile ethr_sint_t sint[3]; + volatile char c[ETHR_SIZEOF_PTR*3]; +} ethr_native_dw_atomic_t; + + +#if (defined(ETHR_TRY_INLINE_FUNCS) \ + || defined(ETHR_ATOMIC_IMPL__) \ + || defined(ETHR_X86_SSE2_ASM_C__)) \ + && ETHR_SIZEOF_PTR == 4 \ + && defined(ETHR_GCC_HAVE_SSE2_ASM_SUPPORT) +ethr_sint64_t +ethr_sse2_native_su_dw_atomic_read(ethr_native_dw_atomic_t *var); +void +ethr_sse2_native_su_dw_atomic_set(ethr_native_dw_atomic_t *var, + ethr_sint64_t val); +#endif + +#if (defined(ETHR_TRY_INLINE_FUNCS) \ + || defined(ETHR_ATOMIC_IMPL__) \ + || defined(ETHR_X86_SSE2_ASM_C__)) +# ifdef ETHR_DEBUG +# define ETHR_DW_DBG_ALIGNED__(PTR) \ + ETHR_ASSERT((((ethr_uint_t) (PTR)) & ETHR_DW_NATMC_ALIGN_MASK__) == 0); +# else +# define ETHR_DW_DBG_ALIGNED__(PTR) +# endif +#endif + +#if defined(ETHR_TRY_INLINE_FUNCS) || defined(ETHR_ATOMIC_IMPL__) + +#define ETHR_HAVE_ETHR_NATIVE_DW_ATOMIC_ADDR +static ETHR_INLINE ethr_sint_t * +ethr_native_dw_atomic_addr(ethr_native_dw_atomic_t *var) +{ + return (ethr_sint_t *) ETHR_DW_NATMC_MEM__(var); +} + +#if ETHR_SIZEOF_PTR == 4 && defined(__PIC__) && __PIC__ +/* + * When position independent code is used in 32-bit mode, the EBX register + * is used for storage of global offset table address, and we may not + * use it as input or output in an asm. We need to save and restore the + * EBX register explicitly (for some reason gcc doesn't provide this + * service to us). + */ +# define ETHR_NO_CLOBBER_EBX__ 1 +#else +# define ETHR_NO_CLOBBER_EBX__ 0 +#endif + +#if ETHR_NO_CLOBBER_EBX__ && !defined(ETHR_CMPXCHG8B_REGISTER_SHORTAGE) +/* When no optimization is on, we'll run into a register shortage */ +# if defined(ETHR_DEBUG) || defined(DEBUG) || defined(VALGRIND) \ + || defined(GCOV) || defined(PURIFY) || defined(PURECOV) +# define ETHR_CMPXCHG8B_REGISTER_SHORTAGE 1 +# else +# define ETHR_CMPXCHG8B_REGISTER_SHORTAGE 0 +# endif +#endif + + +#define ETHR_HAVE_ETHR_NATIVE_DW_ATOMIC_CMPXCHG_MB + +static ETHR_INLINE int +ethr_native_dw_atomic_cmpxchg_mb(ethr_native_dw_atomic_t *var, + ethr_sint_t *new, + ethr_sint_t *xchg) +{ + ethr_native_dw_ptr_t p = (ethr_native_dw_ptr_t) ETHR_DW_NATMC_MEM__(var); + char xchgd; + + ETHR_DW_DBG_ALIGNED__(p); + + __asm__ __volatile__( +#if ETHR_NO_CLOBBER_EBX__ + "pushl %%ebx\n\t" +# if ETHR_CMPXCHG8B_REGISTER_SHORTAGE + "movl (%7), %%ebx\n\t" + "movl 4(%7), %%ecx\n\t" +# else + "movl %8, %%ebx\n\t" +# endif +#endif + "lock; cmpxchg" ETHR_DW_CMPXCHG_SFX__ " %0\n\t" + "setz %3\n\t" +#if ETHR_NO_CLOBBER_EBX__ + "popl %%ebx\n\t" +#endif + : "=m"(*p), "=d"(xchg[1]), "=a"(xchg[0]), "=c"(xchgd) + : "m"(*p), "1"(xchg[1]), "2"(xchg[0]), +#if ETHR_NO_CLOBBER_EBX__ +# if ETHR_CMPXCHG8B_REGISTER_SHORTAGE + "3"(new) +# else + "3"(new[1]), + "r"(new[0]) +# endif +#else + "3"(new[1]), + "b"(new[0]) +#endif + : "cc", "memory"); + + return (int) xchgd; +} + +#undef ETHR_NO_CLOBBER_EBX__ + +#if ETHR_SIZEOF_PTR == 4 && defined(ETHR_GCC_HAVE_SSE2_ASM_SUPPORT) + +typedef union { + ethr_sint64_t sint64; + ethr_sint_t sint[2]; +} ethr_dw_atomic_no_sse2_convert_t; + +#define ETHR_HAVE_ETHR_NATIVE_SU_DW_ATOMIC_READ + +static ETHR_INLINE ethr_sint64_t +ethr_native_su_dw_atomic_read(ethr_native_dw_atomic_t *var) +{ + if (ETHR_X86_RUNTIME_CONF_HAVE_SSE2__) + return ethr_sse2_native_su_dw_atomic_read(var); + else { + ethr_sint_t new[2]; + ethr_dw_atomic_no_sse2_convert_t xchg; + new[0] = new[1] = xchg.sint[0] = xchg.sint[1] = 0x83838383; + (void) ethr_native_dw_atomic_cmpxchg_mb(var, new, xchg.sint); + return xchg.sint64; + } +} + +#define ETHR_HAVE_ETHR_NATIVE_SU_DW_ATOMIC_SET + +static ETHR_INLINE void +ethr_native_su_dw_atomic_set(ethr_native_dw_atomic_t *var, + ethr_sint64_t val) +{ + if (ETHR_X86_RUNTIME_CONF_HAVE_SSE2__) + ethr_sse2_native_su_dw_atomic_set(var, val); + else { + ethr_sint_t xchg[2] = {0, 0}; + ethr_dw_atomic_no_sse2_convert_t new; + new.sint64 = val; + while (!ethr_native_dw_atomic_cmpxchg_mb(var, new.sint, xchg)); + } +} + +#endif /* ETHR_SIZEOF_PTR == 4 */ + +#endif /* ETHR_TRY_INLINE_FUNCS */ + +#if defined(ETHR_X86_SSE2_ASM_C__) \ + && ETHR_SIZEOF_PTR == 4 \ + && defined(ETHR_GCC_HAVE_SSE2_ASM_SUPPORT) + +/* + * 8-byte aligned loads and stores of 64-bit values are atomic from + * pentium and forward. An ordinary volatile load or store in 32-bit + * mode generates two 32-bit operations (at least with gcc-4.1.2 using + * -msse2). In order to guarantee one 64-bit load/store operation + * from/to memory we load/store via an xmm register using movq. + * + * Load/store can be achieved using cmpxchg8b, however, using movq is + * much faster. Unfortunately we cannot do the same thing in 64-bit + * mode; instead, we have to do loads and stores via cmpxchg16b. + * + * We do not inline these, but instead compile these into a separate + * object file using -msse2. This since we don't want to use -msse2 for + * the whole system. If we detect sse2 support (pentium4 and forward) + * at runtime, we use them; otherwise, we fall back to using cmpxchg8b + * for loads and stores. This way the binary can be moved between + * processors with and without sse2 support. + */ + +ethr_sint64_t +ethr_sse2_native_su_dw_atomic_read(ethr_native_dw_atomic_t *var) +{ + ethr_native_dw_ptr_t p = (ethr_native_dw_ptr_t) ETHR_DW_NATMC_MEM__(var); + ethr_sint64_t val; + ETHR_DW_DBG_ALIGNED__(p); + __asm__ __volatile__("movq %1, %0\n\t" : "=x"(val) : "m"(*p) : "memory"); + return val; +} + +void +ethr_sse2_native_su_dw_atomic_set(ethr_native_dw_atomic_t *var, + ethr_sint64_t val) +{ + ethr_native_dw_ptr_t p = (ethr_native_dw_ptr_t) ETHR_DW_NATMC_MEM__(var); + ETHR_DW_DBG_ALIGNED__(p); + __asm__ __volatile__("movq %1, %0\n\t" : "=m"(*p) : "x"(val) : "memory"); +} + +#endif /* ETHR_X86_SSE2_ASM_C__ */ + +#endif /* ETHR_GCC_HAVE_DW_CMPXCHG_ASM_SUPPORT */ + +#endif /* ETHR_X86_DW_ATOMIC_H__ */ + |