diff options
author | Rickard Green <[email protected]> | 2011-01-02 10:03:54 +0100 |
---|---|---|
committer | Rickard Green <[email protected]> | 2011-06-14 11:40:19 +0200 |
commit | 7f19af0423934f85c74ccb75546e5e3a6b6d10e8 (patch) | |
tree | 612d1010f37517f813a94d8a5f38cfd0126ce3f8 /erts/include/internal/win/ethr_dw_atomic.h | |
parent | 4a5a75811e2cd590b5c94f71864a5245fd511ccf (diff) | |
download | otp-7f19af0423934f85c74ccb75546e5e3a6b6d10e8.tar.gz otp-7f19af0423934f85c74ccb75546e5e3a6b6d10e8.tar.bz2 otp-7f19af0423934f85c74ccb75546e5e3a6b6d10e8.zip |
Improve ethread atomics
The ethread atomics API now also provide double word size atomics.
Double word size atomics are implemented using native atomic
instructions on x86 (when the cmpxchg8b instruction is available)
and on x86_64 (when the cmpxchg16b instruction is available). On
other hardware where 32-bit atomics or word size atomics are
available, an optimized fallback is used; otherwise, a spinlock,
or a mutex based fallback is used.
The ethread library now performs runtime tests for presence of
hardware features, such as for example SSE2 instructions, instead
of requiring this to be determined at compile time.
There are now functions implementing each atomic operation with the
following implied memory barrier semantics: none, read, write,
acquire, release, and full. Some of the operation-barrier
combinations aren't especially useful. But instead of filtering
useful ones out, and potentially miss a useful one, we implement
them all.
A much smaller set of functionality for native atomics are required
to be implemented than before. More or less only cmpxchg and a
membar macro are required to be implemented for each atomic size.
Other functions will automatically be constructed from these. It is,
of course, often wise to implement more that this if possible from a
performance perspective.
Diffstat (limited to 'erts/include/internal/win/ethr_dw_atomic.h')
-rw-r--r-- | erts/include/internal/win/ethr_dw_atomic.h | 154 |
1 files changed, 154 insertions, 0 deletions
diff --git a/erts/include/internal/win/ethr_dw_atomic.h b/erts/include/internal/win/ethr_dw_atomic.h new file mode 100644 index 0000000000..a3e7ffc3aa --- /dev/null +++ b/erts/include/internal/win/ethr_dw_atomic.h @@ -0,0 +1,154 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2011. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ + +/* + * Description: Native double word atomics for windows + * Author: Rickard Green + */ + +#undef ETHR_INCLUDE_DW_ATOMIC_IMPL__ +#ifndef ETHR_X86_DW_ATOMIC_H__ +# define ETHR_X86_DW_ATOMIC_H__ +# if ((ETHR_SIZEOF_PTR == 4 \ + && defined(ETHR_HAVE__INTERLOCKEDCOMPAREEXCHANGE64)) \ + || (ETHR_SIZEOF_PTR == 8 \ + && defined(ETHR_HAVE__INTERLOCKEDCOMPAREEXCHANGE128))) +# define ETHR_INCLUDE_DW_ATOMIC_IMPL__ +# endif +#endif + +#ifdef ETHR_INCLUDE_DW_ATOMIC_IMPL__ + +# if ETHR_SIZEOF_PTR == 4 +# define ETHR_HAVE_NATIVE_SU_DW_ATOMIC +# else +# define ETHR_HAVE_NATIVE_DW_ATOMIC +# endif +# define ETHR_NATIVE_DW_ATOMIC_IMPL "windows-interlocked" + +# if defined(_M_IX86) || defined(_M_AMD64) +/* + * If ETHR_RTCHK_USE_NATIVE_DW_ATOMIC_IMPL__ is defined, it will be used + * at runtime in order to determine if native or fallback implementation + * should be used. + */ +# define ETHR_RTCHK_USE_NATIVE_DW_ATOMIC_IMPL__ \ + ETHR_X86_RUNTIME_CONF_HAVE_DW_CMPXCHG__ +# endif + +# include <intrin.h> +# if ETHR_SIZEOF_PTR == 4 +# pragma intrinsic(_InterlockedCompareExchange64) +# define ETHR_DW_NATMC_ALIGN_MASK__ 0x7 +# define ETHR_NATIVE_SU_DW_SINT_T ethr_sint64_t +# else +# pragma intrinsic(_InterlockedCompareExchange128) +# define ETHR_DW_NATMC_ALIGN_MASK__ 0xf +# endif + +typedef volatile __int64 * ethr_native_dw_ptr_t; + +/* + * We need 16 byte aligned memory in 64-bit mode, and 8 byte aligned + * memory in 32-bit mode. 16 byte aligned malloc in 64-bit mode is + * not common, and at least some glibc malloc implementations + * only 4 byte align in 32-bit mode. + * + * This code assumes 8 byte aligned memory in 64-bit mode, and 4 byte + * aligned memory in 32-bit mode. A malloc implementation that does + * not adhere to these alignment requirements is seriously broken, + * and we wont bother trying to work around it. + * + * Since memory alignment may be off by one word we need to align at + * runtime. We, therefore, need an extra word allocated. + */ +#define ETHR_DW_NATMC_MEM__(VAR) \ + (&var->c[(int) ((ethr_uint_t) &(VAR)->c[0]) & ETHR_DW_NATMC_ALIGN_MASK__]) +typedef union { +#ifdef ETHR_NATIVE_SU_DW_SINT_T + volatile ETHR_NATIVE_SU_DW_SINT_T dw_sint; +#endif + volatile ethr_sint_t sint[3]; + volatile char c[ETHR_SIZEOF_PTR*3]; +} ethr_native_dw_atomic_t; + +#if defined(ETHR_TRY_INLINE_FUNCS) || defined(ETHR_ATOMIC_IMPL__) + +#ifdef ETHR_DEBUG +# define ETHR_DW_DBG_ALIGNED__(PTR) \ + ETHR_ASSERT((((ethr_uint_t) (PTR)) & ETHR_DW_NATMC_ALIGN_MASK__) == 0); +#else +# define ETHR_DW_DBG_ALIGNED__(PTR) +#endif + +#define ETHR_HAVE_ETHR_NATIVE_DW_ATOMIC_ADDR + +static ETHR_INLINE ethr_sint_t * +ethr_native_dw_atomic_addr(ethr_native_dw_atomic_t *var) +{ + ethr_sint_t *p = (ethr_sint_t *) ETHR_DW_NATMC_MEM__(var); + ETHR_DW_DBG_ALIGNED__(p); + return p; +} + + +#if ETHR_SIZEOF_PTR == 4 + +#define ETHR_HAVE_ETHR_NATIVE_SU_DW_ATOMIC_CMPXCHG_MB + +static ETHR_INLINE ethr_sint64_t +ethr_native_su_dw_atomic_cmpxchg_mb(ethr_native_dw_atomic_t *var, + ethr_sint64_t new, + ethr_sint64_t exp) +{ + ethr_native_dw_ptr_t p = (ethr_native_dw_ptr_t) ETHR_DW_NATMC_MEM__(var); + ETHR_DW_DBG_ALIGNED__(p); + return (ethr_sint64_t) _InterlockedCompareExchange64(p, new, exp); +} + +#elif ETHR_SIZEOF_PTR == 8 + +#define ETHR_HAVE_ETHR_NATIVE_DW_ATOMIC_CMPXCHG_MB + +#ifdef ETHR_BIGENDIAN +# define ETHR_WIN_LOW_WORD__ 1 +# define ETHR_WIN_HIGH_WORD__ 0 +#else +# define ETHR_WIN_LOW_WORD__ 0 +# define ETHR_WIN_HIGH_WORD__ 1 +#endif + +static ETHR_INLINE int +ethr_native_dw_atomic_cmpxchg_mb(ethr_native_dw_atomic_t *var, + ethr_sint_t *new, + ethr_sint_t *xchg) +{ + ethr_native_dw_ptr_t p = (ethr_native_dw_ptr_t) ETHR_DW_NATMC_MEM__(var); + ETHR_DW_DBG_ALIGNED__(p); + return (int) _InterlockedCompareExchange128(p, + new[ETHR_WIN_HIGH_WORD__], + new[ETHR_WIN_LOW_WORD__], + xchg); +} + +#endif + +#endif /* ETHR_TRY_INLINE_FUNCS */ + +#endif /* ETHR_INCLUDE_DW_ATOMIC_IMPL__ */ |