diff options
author | Rickard Green <[email protected]> | 2011-07-08 13:56:10 +0200 |
---|---|---|
committer | Rickard Green <[email protected]> | 2011-07-08 13:56:10 +0200 |
commit | c6bc815813f57fb7dfffe704c31a4124a0fe755e (patch) | |
tree | 6d142c893ee4e9f800b094bba26537147cf5b2de /erts/include/internal/i386 | |
parent | 5c62fdc1914000f3da921c82a82b6dc30783db53 (diff) | |
parent | 0204e80cba378dfc1140a7f98d96705d470bddde (diff) | |
download | otp-c6bc815813f57fb7dfffe704c31a4124a0fe755e.tar.gz otp-c6bc815813f57fb7dfffe704c31a4124a0fe755e.tar.bz2 otp-c6bc815813f57fb7dfffe704c31a4124a0fe755e.zip |
Merge branch 'rickard/atomics-api/OTP-9014' into major
* rickard/atomics-api/OTP-9014:
Use new atomic API in runtime system
Improve ethread atomics
Diffstat (limited to 'erts/include/internal/i386')
-rw-r--r-- | erts/include/internal/i386/atomic.h | 334 | ||||
-rw-r--r-- | erts/include/internal/i386/ethr_dw_atomic.h | 278 | ||||
-rw-r--r-- | erts/include/internal/i386/ethr_membar.h | 114 | ||||
-rw-r--r-- | erts/include/internal/i386/ethread.h | 8 | ||||
-rw-r--r-- | erts/include/internal/i386/rwlock.h | 5 | ||||
-rw-r--r-- | erts/include/internal/i386/spinlock.h | 27 |
6 files changed, 567 insertions, 199 deletions
diff --git a/erts/include/internal/i386/atomic.h b/erts/include/internal/i386/atomic.h index 4e402f261a..fc1b619935 100644 --- a/erts/include/internal/i386/atomic.h +++ b/erts/include/internal/i386/atomic.h @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2005-2010. All Rights Reserved. + * Copyright Ericsson AB 2005-2011. All Rights Reserved. * * The contents of this file are subject to the Erlang Public License, * Version 1.1, (the "License"); you may not use this file except in @@ -25,53 +25,42 @@ */ #undef ETHR_INCLUDE_ATOMIC_IMPL__ -#if !defined(ETHR_X86_ATOMIC32_H__) && defined(ETHR_ATOMIC_WANT_32BIT_IMPL__) -#define ETHR_X86_ATOMIC32_H__ -#define ETHR_INCLUDE_ATOMIC_IMPL__ 4 -#undef ETHR_ATOMIC_WANT_32BIT_IMPL__ -#elif !defined(ETHR_X86_ATOMIC64_H__) && defined(ETHR_ATOMIC_WANT_64BIT_IMPL__) -#define ETHR_X86_ATOMIC64_H__ -#define ETHR_INCLUDE_ATOMIC_IMPL__ 8 -#undef ETHR_ATOMIC_WANT_64BIT_IMPL__ +#if !defined(ETHR_X86_ATOMIC32_H__) \ + && defined(ETHR_ATOMIC_WANT_32BIT_IMPL__) +# define ETHR_X86_ATOMIC32_H__ +# define ETHR_INCLUDE_ATOMIC_IMPL__ 4 +# undef ETHR_ATOMIC_WANT_32BIT_IMPL__ +#elif !defined(ETHR_X86_ATOMIC64_H__) \ + && defined(ETHR_ATOMIC_WANT_64BIT_IMPL__) +# define ETHR_X86_ATOMIC64_H__ +# define ETHR_INCLUDE_ATOMIC_IMPL__ 8 +# undef ETHR_ATOMIC_WANT_64BIT_IMPL__ #endif #ifdef ETHR_INCLUDE_ATOMIC_IMPL__ -#ifndef ETHR_X86_ATOMIC_COMMON__ -#define ETHR_X86_ATOMIC_COMMON__ - -#define ETHR_ATOMIC_HAVE_INC_DEC_INSTRUCTIONS 1 - -#if defined(__x86_64__) || !defined(ETHR_PRE_PENTIUM4_COMPAT) -#define ETHR_MEMORY_BARRIER __asm__ __volatile__("mfence" : : : "memory") -#define ETHR_WRITE_MEMORY_BARRIER __asm__ __volatile__("sfence" : : : "memory") -#define ETHR_READ_MEMORY_BARRIER __asm__ __volatile__("lfence" : : : "memory") -#define ETHR_READ_DEPEND_MEMORY_BARRIER __asm__ __volatile__("" : : : "memory") -#else -#define ETHR_MEMORY_BARRIER \ -do { \ - volatile ethr_sint32_t x___ = 0; \ - __asm__ __volatile__("lock; incl %0" : "=m"(x___) : "m"(x___) : "memory"); \ -} while (0) -#endif - -#endif /* ETHR_X86_ATOMIC_COMMON__ */ - -#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4 -#define ETHR_HAVE_NATIVE_ATOMIC32 1 -#define ETHR_NATMC_FUNC__(X) ethr_native_atomic32_ ## X -#define ETHR_ATMC_T__ ethr_native_atomic32_t -#define ETHR_AINT_T__ ethr_sint32_t -#define ETHR_AINT_SUFFIX__ "l" -#elif ETHR_INCLUDE_ATOMIC_IMPL__ == 8 -#define ETHR_HAVE_NATIVE_ATOMIC64 1 -#define ETHR_NATMC_FUNC__(X) ethr_native_atomic64_ ## X -#define ETHR_ATMC_T__ ethr_native_atomic64_t -#define ETHR_AINT_T__ ethr_sint64_t -#define ETHR_AINT_SUFFIX__ "q" -#else -#error "Unsupported integer size" -#endif +# ifndef ETHR_X86_ATOMIC_COMMON__ +# define ETHR_X86_ATOMIC_COMMON__ +# define ETHR_ATOMIC_HAVE_INC_DEC_INSTRUCTIONS 1 +# endif /* ETHR_X86_ATOMIC_COMMON__ */ + +# if ETHR_INCLUDE_ATOMIC_IMPL__ == 4 +# define ETHR_HAVE_NATIVE_ATOMIC32 1 +# define ETHR_NATIVE_ATOMIC32_IMPL "ethread" +# define ETHR_NATMC_FUNC__(X) ethr_native_atomic32_ ## X +# define ETHR_ATMC_T__ ethr_native_atomic32_t +# define ETHR_AINT_T__ ethr_sint32_t +# define ETHR_AINT_SUFFIX__ "l" +# elif ETHR_INCLUDE_ATOMIC_IMPL__ == 8 +# define ETHR_HAVE_NATIVE_ATOMIC64 1 +# define ETHR_NATIVE_ATOMIC64_IMPL "ethread" +# define ETHR_NATMC_FUNC__(X) ethr_native_atomic64_ ## X +# define ETHR_ATMC_T__ ethr_native_atomic64_t +# define ETHR_AINT_T__ ethr_sint64_t +# define ETHR_AINT_SUFFIX__ "q" +# else +# error "Unsupported integer size" +# endif /* An atomic is an aligned ETHR_AINT_T__ accessed via locked operations. */ @@ -81,87 +70,28 @@ typedef struct { #if defined(ETHR_TRY_INLINE_FUNCS) || defined(ETHR_ATOMIC_IMPL__) +#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4 +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_ADDR 1 +#else +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_ADDR 1 +#endif + static ETHR_INLINE ETHR_AINT_T__ * ETHR_NATMC_FUNC__(addr)(ETHR_ATMC_T__ *var) { return (ETHR_AINT_T__ *) &var->counter; } -static ETHR_INLINE void -ETHR_NATMC_FUNC__(init)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ i) -{ - var->counter = i; -} - -static ETHR_INLINE void -ETHR_NATMC_FUNC__(set)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ i) -{ - var->counter = i; -} - -static ETHR_INLINE ETHR_AINT_T__ -ETHR_NATMC_FUNC__(read)(ETHR_ATMC_T__ *var) -{ - return var->counter; -} - -static ETHR_INLINE void -ETHR_NATMC_FUNC__(add)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ incr) -{ - __asm__ __volatile__( - "lock; add" ETHR_AINT_SUFFIX__ " %1, %0" - : "=m"(var->counter) - : "ir"(incr), "m"(var->counter)); -} - -static ETHR_INLINE void -ETHR_NATMC_FUNC__(inc)(ETHR_ATMC_T__ *var) -{ - __asm__ __volatile__( - "lock; inc" ETHR_AINT_SUFFIX__ " %0" - : "=m"(var->counter) - : "m"(var->counter)); -} - -static ETHR_INLINE void -ETHR_NATMC_FUNC__(dec)(ETHR_ATMC_T__ *var) -{ - __asm__ __volatile__( - "lock; dec" ETHR_AINT_SUFFIX__ " %0" - : "=m"(var->counter) - : "m"(var->counter)); -} - -static ETHR_INLINE ETHR_AINT_T__ -ETHR_NATMC_FUNC__(add_return)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ incr) -{ - ETHR_AINT_T__ tmp; - - tmp = incr; - __asm__ __volatile__( - "lock; xadd" ETHR_AINT_SUFFIX__ " %0, %1" /* xadd didn't exist prior to the 486 */ - : "=r"(tmp) - : "m"(var->counter), "0"(tmp)); - /* now tmp is the atomic's previous value */ - return tmp + incr; -} - -static ETHR_INLINE ETHR_AINT_T__ -ETHR_NATMC_FUNC__(inc_return)(ETHR_ATMC_T__ *var) -{ - return ETHR_NATMC_FUNC__(add_return)(var, (ETHR_AINT_T__) 1); -} - -static ETHR_INLINE ETHR_AINT_T__ -ETHR_NATMC_FUNC__(dec_return)(ETHR_ATMC_T__ *var) -{ - return ETHR_NATMC_FUNC__(add_return)(var, (ETHR_AINT_T__) -1); -} +#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4 +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_CMPXCHG_MB 1 +#else +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_CMPXCHG_MB 1 +#endif static ETHR_INLINE ETHR_AINT_T__ -ETHR_NATMC_FUNC__(cmpxchg)(ETHR_ATMC_T__ *var, - ETHR_AINT_T__ new, - ETHR_AINT_T__ old) +ETHR_NATMC_FUNC__(cmpxchg_mb)(ETHR_ATMC_T__ *var, + ETHR_AINT_T__ new, + ETHR_AINT_T__ old) { __asm__ __volatile__( "lock; cmpxchg" ETHR_AINT_SUFFIX__ " %2, %3" @@ -171,110 +101,148 @@ ETHR_NATMC_FUNC__(cmpxchg)(ETHR_ATMC_T__ *var, return old; } -static ETHR_INLINE ETHR_AINT_T__ -ETHR_NATMC_FUNC__(and_retold)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ mask) -{ - ETHR_AINT_T__ tmp, old; - - tmp = var->counter; - do { - old = tmp; - tmp = ETHR_NATMC_FUNC__(cmpxchg)(var, tmp & mask, tmp); - } while (__builtin_expect(tmp != old, 0)); - /* now tmp is the atomic's previous value */ - return tmp; -} - -static ETHR_INLINE ETHR_AINT_T__ -ETHR_NATMC_FUNC__(or_retold)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ mask) -{ - ETHR_AINT_T__ tmp, old; - - tmp = var->counter; - do { - old = tmp; - tmp = ETHR_NATMC_FUNC__(cmpxchg)(var, tmp | mask, tmp); - } while (__builtin_expect(tmp != old, 0)); - /* now tmp is the atomic's previous value */ - return tmp; -} +#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4 +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_XCHG_MB 1 +#else +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_XCHG_MB 1 +#endif static ETHR_INLINE ETHR_AINT_T__ -ETHR_NATMC_FUNC__(xchg)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ val) +ETHR_NATMC_FUNC__(xchg_mb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ val) { ETHR_AINT_T__ tmp = val; __asm__ __volatile__( "xchg" ETHR_AINT_SUFFIX__ " %0, %1" : "=r"(tmp) - : "m"(var->counter), "0"(tmp)); + : "m"(var->counter), "0"(tmp) + : "memory"); /* now tmp is the atomic's previous value */ return tmp; } -/* - * Atomic ops with at least specified barriers. - */ +#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4 +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_SET 1 +#else +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_SET 1 +#endif -static ETHR_INLINE ETHR_AINT_T__ -ETHR_NATMC_FUNC__(read_acqb)(ETHR_ATMC_T__ *var) +static ETHR_INLINE void +ETHR_NATMC_FUNC__(set)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ i) { - ETHR_AINT_T__ val; -#if defined(__x86_64__) || !defined(ETHR_PRE_PENTIUM4_COMPAT) - val = var->counter; + var->counter = i; +} + +#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4 +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_SET_RELB 1 #else - val = ETHR_NATMC_FUNC__(add_return)(var, 0); +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_SET_RELB 1 #endif - __asm__ __volatile__("" : : : "memory"); - return val; -} static ETHR_INLINE void ETHR_NATMC_FUNC__(set_relb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ i) { - __asm__ __volatile__("" : : : "memory"); -#if defined(__x86_64__) || !defined(ETHR_PRE_PENTIUM4_COMPAT) - var->counter = i; +#if defined(_M_IX86) + if (ETHR_X86_RUNTIME_CONF_HAVE_NO_SSE2__) + (void) ETHR_NATMC_FUNC__(xchg_mb)(var, i); + else +#endif /* _M_IX86 */ + { + ETHR_MEMBAR(ETHR_LoadStore|ETHR_StoreStore); + var->counter = i; + } +} + +#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4 +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_SET_MB 1 #else - (void) ETHR_NATMC_FUNC__(xchg)(var, i); +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_SET_MB 1 #endif + +static ETHR_INLINE void +ETHR_NATMC_FUNC__(set_mb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ i) +{ + (void) ETHR_NATMC_FUNC__(xchg_mb)(var, i); } +#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4 +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_READ 1 +#else +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_READ 1 +#endif + static ETHR_INLINE ETHR_AINT_T__ -ETHR_NATMC_FUNC__(inc_return_acqb)(ETHR_ATMC_T__ *var) +ETHR_NATMC_FUNC__(read)(ETHR_ATMC_T__ *var) { - ETHR_AINT_T__ res = ETHR_NATMC_FUNC__(inc_return)(var); - __asm__ __volatile__("" : : : "memory"); - return res; + return var->counter; } +#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4 +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_ADD_MB 1 +#else +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_ADD_MB 1 +#endif + static ETHR_INLINE void -ETHR_NATMC_FUNC__(dec_relb)(ETHR_ATMC_T__ *var) +ETHR_NATMC_FUNC__(add_mb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ incr) { - __asm__ __volatile__("" : : : "memory"); - ETHR_NATMC_FUNC__(dec)(var); -} + __asm__ __volatile__( + "lock; add" ETHR_AINT_SUFFIX__ " %1, %0" + : "=m"(var->counter) + : "ir"(incr), "m"(var->counter) + : "memory"); +} -static ETHR_INLINE ETHR_AINT_T__ -ETHR_NATMC_FUNC__(dec_return_relb)(ETHR_ATMC_T__ *var) +#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4 +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_INC_MB 1 +#else +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_INC_MB 1 +#endif + +static ETHR_INLINE void +ETHR_NATMC_FUNC__(inc_mb)(ETHR_ATMC_T__ *var) { - __asm__ __volatile__("" : : : "memory"); - return ETHR_NATMC_FUNC__(dec_return)(var); + __asm__ __volatile__( + "lock; inc" ETHR_AINT_SUFFIX__ " %0" + : "=m"(var->counter) + : "m"(var->counter) + : "memory"); } -static ETHR_INLINE ETHR_AINT_T__ -ETHR_NATMC_FUNC__(cmpxchg_acqb)(ETHR_ATMC_T__ *var, - ETHR_AINT_T__ new, - ETHR_AINT_T__ old) +#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4 +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_DEC_MB 1 +#else +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_DEC_MB 1 +#endif + +static ETHR_INLINE void +ETHR_NATMC_FUNC__(dec_mb)(ETHR_ATMC_T__ *var) { - return ETHR_NATMC_FUNC__(cmpxchg)(var, new, old); + __asm__ __volatile__( + "lock; dec" ETHR_AINT_SUFFIX__ " %0" + : "=m"(var->counter) + : "m"(var->counter) + : "memory"); } +#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4 +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_ADD_RETURN_MB 1 +#else +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_ADD_RETURN_MB 1 +#endif + static ETHR_INLINE ETHR_AINT_T__ -ETHR_NATMC_FUNC__(cmpxchg_relb)(ETHR_ATMC_T__ *var, - ETHR_AINT_T__ new, - ETHR_AINT_T__ old) +ETHR_NATMC_FUNC__(add_return_mb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ incr) { - return ETHR_NATMC_FUNC__(cmpxchg)(var, new, old); + ETHR_AINT_T__ tmp; + + tmp = incr; + __asm__ __volatile__( + "lock; xadd" ETHR_AINT_SUFFIX__ " %0, %1" /* xadd didn't exist prior to the 486 */ + : "=r"(tmp) + : "m"(var->counter), "0"(tmp) + : "memory"); + /* now tmp is the atomic's previous value */ + return tmp + incr; } #endif /* ETHR_TRY_INLINE_FUNCS */ diff --git a/erts/include/internal/i386/ethr_dw_atomic.h b/erts/include/internal/i386/ethr_dw_atomic.h new file mode 100644 index 0000000000..9fb89bbe43 --- /dev/null +++ b/erts/include/internal/i386/ethr_dw_atomic.h @@ -0,0 +1,278 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2011. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ + +/* + * Description: Native double word atomics for x86/x86_64 + * Author: Rickard Green + */ + +#ifndef ETHR_X86_DW_ATOMIC_H__ +#define ETHR_X86_DW_ATOMIC_H__ + +#ifdef ETHR_GCC_HAVE_DW_CMPXCHG_ASM_SUPPORT + +#define ETHR_HAVE_NATIVE_DW_ATOMIC +#define ETHR_NATIVE_DW_ATOMIC_IMPL "ethread" + +/* + * If ETHR_RTCHK_USE_NATIVE_DW_ATOMIC_IMPL__ is defined, it will be used + * at runtime in order to determine if native or fallback implementation + * should be used. + */ +#define ETHR_RTCHK_USE_NATIVE_DW_ATOMIC_IMPL__ \ + ETHR_X86_RUNTIME_CONF_HAVE_DW_CMPXCHG__ + +#if ETHR_SIZEOF_PTR == 4 +typedef volatile ethr_sint64_t * ethr_native_dw_ptr_t; +# define ETHR_DW_NATMC_ALIGN_MASK__ 0x7 +# define ETHR_DW_CMPXCHG_SFX__ "8b" +# define ETHR_NATIVE_SU_DW_SINT_T ethr_sint64_t +#else +#ifdef ETHR_HAVE_INT128_T +# define ETHR_NATIVE_SU_DW_SINT_T ethr_sint128_t +typedef volatile ethr_sint128_t * ethr_native_dw_ptr_t; +#else +typedef struct { + ethr_sint64_t sint64[2]; +} ethr_native_sint128_t__; +typedef volatile ethr_native_sint128_t__ * ethr_native_dw_ptr_t; +#endif +# define ETHR_DW_NATMC_ALIGN_MASK__ 0xf +# define ETHR_DW_CMPXCHG_SFX__ "16b" +#endif + +/* + * We need 16 byte aligned memory in 64-bit mode, and 8 byte aligned + * memory in 32-bit mode. 16 byte aligned malloc in 64-bit mode is + * not common, and at least some glibc malloc implementations + * only 4 byte align in 32-bit mode. + * + * This code assumes 8 byte aligned memory in 64-bit mode, and 4 byte + * aligned memory in 32-bit mode. A malloc implementation that does + * not adhere to these alignment requirements is seriously broken, + * and we wont bother trying to work around it. + * + * Since memory alignment may be off by one word we need to align at + * runtime. We, therefore, need an extra word allocated. + */ +#define ETHR_DW_NATMC_MEM__(VAR) \ + (&var->c[(int) ((ethr_uint_t) &(VAR)->c[0]) & ETHR_DW_NATMC_ALIGN_MASK__]) +typedef union { +#ifdef ETHR_NATIVE_SU_DW_SINT_T + volatile ETHR_NATIVE_SU_DW_SINT_T dw_sint; +#endif + volatile ethr_sint_t sint[3]; + volatile char c[ETHR_SIZEOF_PTR*3]; +} ethr_native_dw_atomic_t; + + +#if (defined(ETHR_TRY_INLINE_FUNCS) \ + || defined(ETHR_ATOMIC_IMPL__) \ + || defined(ETHR_X86_SSE2_ASM_C__)) \ + && ETHR_SIZEOF_PTR == 4 \ + && defined(ETHR_GCC_HAVE_SSE2_ASM_SUPPORT) +ethr_sint64_t +ethr_sse2_native_su_dw_atomic_read(ethr_native_dw_atomic_t *var); +void +ethr_sse2_native_su_dw_atomic_set(ethr_native_dw_atomic_t *var, + ethr_sint64_t val); +#endif + +#if (defined(ETHR_TRY_INLINE_FUNCS) \ + || defined(ETHR_ATOMIC_IMPL__) \ + || defined(ETHR_X86_SSE2_ASM_C__)) +# ifdef ETHR_DEBUG +# define ETHR_DW_DBG_ALIGNED__(PTR) \ + ETHR_ASSERT((((ethr_uint_t) (PTR)) & ETHR_DW_NATMC_ALIGN_MASK__) == 0); +# else +# define ETHR_DW_DBG_ALIGNED__(PTR) +# endif +#endif + +#if defined(ETHR_TRY_INLINE_FUNCS) || defined(ETHR_ATOMIC_IMPL__) + +#define ETHR_HAVE_ETHR_NATIVE_DW_ATOMIC_ADDR +static ETHR_INLINE ethr_sint_t * +ethr_native_dw_atomic_addr(ethr_native_dw_atomic_t *var) +{ + return (ethr_sint_t *) ETHR_DW_NATMC_MEM__(var); +} + +#if ETHR_SIZEOF_PTR == 4 && defined(__PIC__) && __PIC__ +/* + * When position independent code is used in 32-bit mode, the EBX register + * is used for storage of global offset table address, and we may not + * use it as input or output in an asm. We need to save and restore the + * EBX register explicitly (for some reason gcc doesn't provide this + * service to us). + */ +# define ETHR_NO_CLOBBER_EBX__ 1 +#else +# define ETHR_NO_CLOBBER_EBX__ 0 +#endif + +#if ETHR_NO_CLOBBER_EBX__ && !defined(ETHR_CMPXCHG8B_REGISTER_SHORTAGE) +/* When no optimization is on, we'll run into a register shortage */ +# if defined(ETHR_DEBUG) || defined(DEBUG) || defined(VALGRIND) \ + || defined(GCOV) || defined(PURIFY) || defined(PURECOV) +# define ETHR_CMPXCHG8B_REGISTER_SHORTAGE 1 +# else +# define ETHR_CMPXCHG8B_REGISTER_SHORTAGE 0 +# endif +#endif + + +#define ETHR_HAVE_ETHR_NATIVE_DW_ATOMIC_CMPXCHG_MB + +static ETHR_INLINE int +ethr_native_dw_atomic_cmpxchg_mb(ethr_native_dw_atomic_t *var, + ethr_sint_t *new, + ethr_sint_t *xchg) +{ + ethr_native_dw_ptr_t p = (ethr_native_dw_ptr_t) ETHR_DW_NATMC_MEM__(var); + char xchgd; + + ETHR_DW_DBG_ALIGNED__(p); + + __asm__ __volatile__( +#if ETHR_NO_CLOBBER_EBX__ + "pushl %%ebx\n\t" +# if ETHR_CMPXCHG8B_REGISTER_SHORTAGE + "movl (%7), %%ebx\n\t" + "movl 4(%7), %%ecx\n\t" +# else + "movl %8, %%ebx\n\t" +# endif +#endif + "lock; cmpxchg" ETHR_DW_CMPXCHG_SFX__ " %0\n\t" + "setz %3\n\t" +#if ETHR_NO_CLOBBER_EBX__ + "popl %%ebx\n\t" +#endif + : "=m"(*p), "=d"(xchg[1]), "=a"(xchg[0]), "=c"(xchgd) + : "m"(*p), "1"(xchg[1]), "2"(xchg[0]), +#if ETHR_NO_CLOBBER_EBX__ +# if ETHR_CMPXCHG8B_REGISTER_SHORTAGE + "3"(new) +# else + "3"(new[1]), + "r"(new[0]) +# endif +#else + "3"(new[1]), + "b"(new[0]) +#endif + : "cc", "memory"); + + return (int) xchgd; +} + +#undef ETHR_NO_CLOBBER_EBX__ + +#if ETHR_SIZEOF_PTR == 4 && defined(ETHR_GCC_HAVE_SSE2_ASM_SUPPORT) + +typedef union { + ethr_sint64_t sint64; + ethr_sint_t sint[2]; +} ethr_dw_atomic_no_sse2_convert_t; + +#define ETHR_HAVE_ETHR_NATIVE_SU_DW_ATOMIC_READ + +static ETHR_INLINE ethr_sint64_t +ethr_native_su_dw_atomic_read(ethr_native_dw_atomic_t *var) +{ + if (ETHR_X86_RUNTIME_CONF_HAVE_SSE2__) + return ethr_sse2_native_su_dw_atomic_read(var); + else { + ethr_sint_t new[2]; + ethr_dw_atomic_no_sse2_convert_t xchg; + new[0] = new[1] = xchg.sint[0] = xchg.sint[1] = 0x83838383; + (void) ethr_native_dw_atomic_cmpxchg_mb(var, new, xchg.sint); + return xchg.sint64; + } +} + +#define ETHR_HAVE_ETHR_NATIVE_SU_DW_ATOMIC_SET + +static ETHR_INLINE void +ethr_native_su_dw_atomic_set(ethr_native_dw_atomic_t *var, + ethr_sint64_t val) +{ + if (ETHR_X86_RUNTIME_CONF_HAVE_SSE2__) + ethr_sse2_native_su_dw_atomic_set(var, val); + else { + ethr_sint_t xchg[2] = {0, 0}; + ethr_dw_atomic_no_sse2_convert_t new; + new.sint64 = val; + while (!ethr_native_dw_atomic_cmpxchg_mb(var, new.sint, xchg)); + } +} + +#endif /* ETHR_SIZEOF_PTR == 4 */ + +#endif /* ETHR_TRY_INLINE_FUNCS */ + +#if defined(ETHR_X86_SSE2_ASM_C__) \ + && ETHR_SIZEOF_PTR == 4 \ + && defined(ETHR_GCC_HAVE_SSE2_ASM_SUPPORT) + +/* + * 8-byte aligned loads and stores of 64-bit values are atomic from + * pentium and forward. An ordinary volatile load or store in 32-bit + * mode generates two 32-bit operations (at least with gcc-4.1.2 using + * -msse2). In order to guarantee one 64-bit load/store operation + * from/to memory we load/store via an xmm register using movq. + * + * Load/store can be achieved using cmpxchg8b, however, using movq is + * much faster. Unfortunately we cannot do the same thing in 64-bit + * mode; instead, we have to do loads and stores via cmpxchg16b. + * + * We do not inline these, but instead compile these into a separate + * object file using -msse2. This since we don't want to use -msse2 for + * the whole system. If we detect sse2 support (pentium4 and forward) + * at runtime, we use them; otherwise, we fall back to using cmpxchg8b + * for loads and stores. This way the binary can be moved between + * processors with and without sse2 support. + */ + +ethr_sint64_t +ethr_sse2_native_su_dw_atomic_read(ethr_native_dw_atomic_t *var) +{ + ethr_native_dw_ptr_t p = (ethr_native_dw_ptr_t) ETHR_DW_NATMC_MEM__(var); + ethr_sint64_t val; + ETHR_DW_DBG_ALIGNED__(p); + __asm__ __volatile__("movq %1, %0\n\t" : "=x"(val) : "m"(*p) : "memory"); + return val; +} + +void +ethr_sse2_native_su_dw_atomic_set(ethr_native_dw_atomic_t *var, + ethr_sint64_t val) +{ + ethr_native_dw_ptr_t p = (ethr_native_dw_ptr_t) ETHR_DW_NATMC_MEM__(var); + ETHR_DW_DBG_ALIGNED__(p); + __asm__ __volatile__("movq %1, %0\n\t" : "=m"(*p) : "x"(val) : "memory"); +} + +#endif /* ETHR_X86_SSE2_ASM_C__ */ + +#endif /* ETHR_GCC_HAVE_DW_CMPXCHG_ASM_SUPPORT */ + +#endif /* ETHR_X86_DW_ATOMIC_H__ */ + diff --git a/erts/include/internal/i386/ethr_membar.h b/erts/include/internal/i386/ethr_membar.h new file mode 100644 index 0000000000..92d9de7f3f --- /dev/null +++ b/erts/include/internal/i386/ethr_membar.h @@ -0,0 +1,114 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2011. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ + +/* + * Description: Memory barriers for x86/x86-64 + * Author: Rickard Green + */ + +#ifndef ETHR_X86_MEMBAR_H__ +#define ETHR_X86_MEMBAR_H__ + +#define ETHR_LoadLoad (1 << 0) +#define ETHR_LoadStore (1 << 1) +#define ETHR_StoreLoad (1 << 2) +#define ETHR_StoreStore (1 << 3) + +#define ETHR_NO_SSE2_MEMORY_BARRIER__ \ +do { \ + volatile ethr_sint32_t x__ = 0; \ + __asm__ __volatile__ ("lock; orl $0x0, %0\n\t" \ + : "=m"(x__) \ + : "m"(x__) \ + : "memory"); \ +} while (0) + +static __inline__ void +ethr_cfence__(void) +{ + __asm__ __volatile__ ("" : : : "memory"); +} + +static __inline__ void +ethr_mfence__(void) +{ +#if ETHR_SIZEOF_PTR == 4 + if (ETHR_X86_RUNTIME_CONF_HAVE_NO_SSE2__) + ETHR_NO_SSE2_MEMORY_BARRIER__; + else +#endif + __asm__ __volatile__ ("mfence\n\t" : : : "memory"); +} + +static __inline__ void +ethr_sfence__(void) +{ +#if ETHR_SIZEOF_PTR == 4 + if (ETHR_X86_RUNTIME_CONF_HAVE_NO_SSE2__) + ETHR_NO_SSE2_MEMORY_BARRIER__; + else +#endif + __asm__ __volatile__ ("sfence\n\t" : : : "memory"); +} + +static __inline__ void +ethr_lfence__(void) +{ +#if ETHR_SIZEOF_PTR == 4 + if (ETHR_X86_RUNTIME_CONF_HAVE_NO_SSE2__) + ETHR_NO_SSE2_MEMORY_BARRIER__; + else +#endif + __asm__ __volatile__ ("lfence\n\t" : : : "memory"); +} + +#define ETHR_X86_OUT_OF_ORDER_MEMBAR(B) \ + ETHR_CHOOSE_EXPR((B) == ETHR_StoreStore, \ + ethr_sfence__(), \ + ETHR_CHOOSE_EXPR((B) == ETHR_LoadLoad, \ + ethr_lfence__(), \ + ethr_mfence__())) + +#ifdef ETHR_X86_OUT_OF_ORDER + +#define ETHR_MEMBAR(B) \ + ETHR_X86_OUT_OF_ORDER_MEMBAR((B)) + +#else /* !ETHR_X86_OUT_OF_ORDER (the default) */ + +/* + * We assume that only stores before loads may be reordered. That is, + * we assume that *no* instructions like these are used: + * - CLFLUSH, + * - streaming stores executed with non-temporal move, + * - string operations, or + * - other instructions which aren't LoadLoad, LoadStore, and StoreStore + * ordered by themselves + * If such instructions are used, either insert memory barriers + * using ETHR_X86_OUT_OF_ORDER_MEMBAR() at appropriate places, or + * define ETHR_X86_OUT_OF_ORDER. For more info see Intel 64 and IA-32 + * Architectures Software Developer's Manual; Vol 3A; Chapter 8.2.2. + */ + +#define ETHR_MEMBAR(B) \ + ETHR_CHOOSE_EXPR((B) & ETHR_StoreLoad, ethr_mfence__(), ethr_cfence__()) + +#endif /* !ETHR_X86_OUT_OF_ORDER */ + +#endif /* ETHR_X86_MEMBAR_H__ */ diff --git a/erts/include/internal/i386/ethread.h b/erts/include/internal/i386/ethread.h index b5a17caefb..80e4dc7b99 100644 --- a/erts/include/internal/i386/ethread.h +++ b/erts/include/internal/i386/ethread.h @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2005-2010. All Rights Reserved. + * Copyright Ericsson AB 2005-2011. All Rights Reserved. * * The contents of this file are subject to the Erlang Public License, * Version 1.1, (the "License"); you may not use this file except in @@ -24,17 +24,15 @@ #ifndef ETHREAD_I386_ETHREAD_H #define ETHREAD_I386_ETHREAD_H +#include "ethr_membar.h" #define ETHR_ATOMIC_WANT_32BIT_IMPL__ #include "atomic.h" #if ETHR_SIZEOF_PTR == 8 # define ETHR_ATOMIC_WANT_64BIT_IMPL__ # include "atomic.h" #endif +#include "ethr_dw_atomic.h" #include "spinlock.h" #include "rwlock.h" -#define ETHR_HAVE_NATIVE_ATOMICS 1 -#define ETHR_HAVE_NATIVE_SPINLOCKS 1 -#define ETHR_HAVE_NATIVE_RWSPINLOCKS 1 - #endif /* ETHREAD_I386_ETHREAD_H */ diff --git a/erts/include/internal/i386/rwlock.h b/erts/include/internal/i386/rwlock.h index be47f459ce..1a8cd7da0c 100644 --- a/erts/include/internal/i386/rwlock.h +++ b/erts/include/internal/i386/rwlock.h @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2005-2010. All Rights Reserved. + * Copyright Ericsson AB 2005-2011. All Rights Reserved. * * The contents of this file are subject to the Erlang Public License, * Version 1.1, (the "License"); you may not use this file except in @@ -26,6 +26,9 @@ #ifndef ETHREAD_I386_RWLOCK_H #define ETHREAD_I386_RWLOCK_H +#define ETHR_HAVE_NATIVE_RWSPINLOCKS 1 +#define ETHR_NATIVE_RWSPINLOCK_IMPL "ethread" + /* XXX: describe the algorithm */ typedef struct { volatile int lock; diff --git a/erts/include/internal/i386/spinlock.h b/erts/include/internal/i386/spinlock.h index 0325324895..a84fba91b1 100644 --- a/erts/include/internal/i386/spinlock.h +++ b/erts/include/internal/i386/spinlock.h @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2005-2010. All Rights Reserved. + * Copyright Ericsson AB 2005-2011. All Rights Reserved. * * The contents of this file are subject to the Erlang Public License, * Version 1.1, (the "License"); you may not use this file except in @@ -24,6 +24,9 @@ #ifndef ETHREAD_I386_SPINLOCK_H #define ETHREAD_I386_SPINLOCK_H +#define ETHR_HAVE_NATIVE_SPINLOCKS 1 +#define ETHR_NATIVE_SPINLOCK_IMPL "ethread" + /* A spinlock is the low byte of an aligned 32-bit integer. * A non-zero value means that the lock is locked. */ @@ -46,16 +49,20 @@ ethr_native_spin_unlock(ethr_native_spinlock_t *lock) * On i386 this needs to be a locked operation * to avoid Pentium Pro errata 66 and 92. */ -#if defined(__x86_64__) || !defined(ETHR_PRE_PENTIUM4_COMPAT) - __asm__ __volatile__("" : : : "memory"); - *(unsigned char*)&lock->lock = 0; -#else - char tmp = 0; - __asm__ __volatile__( - "xchgb %b0, %1" - : "=q"(tmp), "=m"(lock->lock) - : "0"(tmp) : "memory"); +#if !defined(__x86_64__) + if (ETHR_X86_RUNTIME_CONF_HAVE_NO_SSE2__) { + char tmp = 0; + __asm__ __volatile__( + "xchgb %b0, %1" + : "=q"(tmp), "=m"(lock->lock) + : "0"(tmp) : "memory"); + } + else #endif + { + ETHR_MEMBAR(ETHR_LoadStore|ETHR_StoreStore); + *(unsigned char*)&lock->lock = 0; + } } static ETHR_INLINE int |