aboutsummaryrefslogtreecommitdiffstats
path: root/erts/include/internal/i386
diff options
context:
space:
mode:
authorRickard Green <[email protected]>2011-01-02 10:03:54 +0100
committerRickard Green <[email protected]>2011-06-14 11:40:19 +0200
commit7f19af0423934f85c74ccb75546e5e3a6b6d10e8 (patch)
tree612d1010f37517f813a94d8a5f38cfd0126ce3f8 /erts/include/internal/i386
parent4a5a75811e2cd590b5c94f71864a5245fd511ccf (diff)
downloadotp-7f19af0423934f85c74ccb75546e5e3a6b6d10e8.tar.gz
otp-7f19af0423934f85c74ccb75546e5e3a6b6d10e8.tar.bz2
otp-7f19af0423934f85c74ccb75546e5e3a6b6d10e8.zip
Improve ethread atomics
The ethread atomics API now also provide double word size atomics. Double word size atomics are implemented using native atomic instructions on x86 (when the cmpxchg8b instruction is available) and on x86_64 (when the cmpxchg16b instruction is available). On other hardware where 32-bit atomics or word size atomics are available, an optimized fallback is used; otherwise, a spinlock, or a mutex based fallback is used. The ethread library now performs runtime tests for presence of hardware features, such as for example SSE2 instructions, instead of requiring this to be determined at compile time. There are now functions implementing each atomic operation with the following implied memory barrier semantics: none, read, write, acquire, release, and full. Some of the operation-barrier combinations aren't especially useful. But instead of filtering useful ones out, and potentially miss a useful one, we implement them all. A much smaller set of functionality for native atomics are required to be implemented than before. More or less only cmpxchg and a membar macro are required to be implemented for each atomic size. Other functions will automatically be constructed from these. It is, of course, often wise to implement more that this if possible from a performance perspective.
Diffstat (limited to 'erts/include/internal/i386')
-rw-r--r--erts/include/internal/i386/atomic.h334
-rw-r--r--erts/include/internal/i386/ethr_dw_atomic.h278
-rw-r--r--erts/include/internal/i386/ethr_membar.h114
-rw-r--r--erts/include/internal/i386/ethread.h8
-rw-r--r--erts/include/internal/i386/rwlock.h5
-rw-r--r--erts/include/internal/i386/spinlock.h27
6 files changed, 567 insertions, 199 deletions
diff --git a/erts/include/internal/i386/atomic.h b/erts/include/internal/i386/atomic.h
index 4e402f261a..fc1b619935 100644
--- a/erts/include/internal/i386/atomic.h
+++ b/erts/include/internal/i386/atomic.h
@@ -1,7 +1,7 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2005-2010. All Rights Reserved.
+ * Copyright Ericsson AB 2005-2011. All Rights Reserved.
*
* The contents of this file are subject to the Erlang Public License,
* Version 1.1, (the "License"); you may not use this file except in
@@ -25,53 +25,42 @@
*/
#undef ETHR_INCLUDE_ATOMIC_IMPL__
-#if !defined(ETHR_X86_ATOMIC32_H__) && defined(ETHR_ATOMIC_WANT_32BIT_IMPL__)
-#define ETHR_X86_ATOMIC32_H__
-#define ETHR_INCLUDE_ATOMIC_IMPL__ 4
-#undef ETHR_ATOMIC_WANT_32BIT_IMPL__
-#elif !defined(ETHR_X86_ATOMIC64_H__) && defined(ETHR_ATOMIC_WANT_64BIT_IMPL__)
-#define ETHR_X86_ATOMIC64_H__
-#define ETHR_INCLUDE_ATOMIC_IMPL__ 8
-#undef ETHR_ATOMIC_WANT_64BIT_IMPL__
+#if !defined(ETHR_X86_ATOMIC32_H__) \
+ && defined(ETHR_ATOMIC_WANT_32BIT_IMPL__)
+# define ETHR_X86_ATOMIC32_H__
+# define ETHR_INCLUDE_ATOMIC_IMPL__ 4
+# undef ETHR_ATOMIC_WANT_32BIT_IMPL__
+#elif !defined(ETHR_X86_ATOMIC64_H__) \
+ && defined(ETHR_ATOMIC_WANT_64BIT_IMPL__)
+# define ETHR_X86_ATOMIC64_H__
+# define ETHR_INCLUDE_ATOMIC_IMPL__ 8
+# undef ETHR_ATOMIC_WANT_64BIT_IMPL__
#endif
#ifdef ETHR_INCLUDE_ATOMIC_IMPL__
-#ifndef ETHR_X86_ATOMIC_COMMON__
-#define ETHR_X86_ATOMIC_COMMON__
-
-#define ETHR_ATOMIC_HAVE_INC_DEC_INSTRUCTIONS 1
-
-#if defined(__x86_64__) || !defined(ETHR_PRE_PENTIUM4_COMPAT)
-#define ETHR_MEMORY_BARRIER __asm__ __volatile__("mfence" : : : "memory")
-#define ETHR_WRITE_MEMORY_BARRIER __asm__ __volatile__("sfence" : : : "memory")
-#define ETHR_READ_MEMORY_BARRIER __asm__ __volatile__("lfence" : : : "memory")
-#define ETHR_READ_DEPEND_MEMORY_BARRIER __asm__ __volatile__("" : : : "memory")
-#else
-#define ETHR_MEMORY_BARRIER \
-do { \
- volatile ethr_sint32_t x___ = 0; \
- __asm__ __volatile__("lock; incl %0" : "=m"(x___) : "m"(x___) : "memory"); \
-} while (0)
-#endif
-
-#endif /* ETHR_X86_ATOMIC_COMMON__ */
-
-#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
-#define ETHR_HAVE_NATIVE_ATOMIC32 1
-#define ETHR_NATMC_FUNC__(X) ethr_native_atomic32_ ## X
-#define ETHR_ATMC_T__ ethr_native_atomic32_t
-#define ETHR_AINT_T__ ethr_sint32_t
-#define ETHR_AINT_SUFFIX__ "l"
-#elif ETHR_INCLUDE_ATOMIC_IMPL__ == 8
-#define ETHR_HAVE_NATIVE_ATOMIC64 1
-#define ETHR_NATMC_FUNC__(X) ethr_native_atomic64_ ## X
-#define ETHR_ATMC_T__ ethr_native_atomic64_t
-#define ETHR_AINT_T__ ethr_sint64_t
-#define ETHR_AINT_SUFFIX__ "q"
-#else
-#error "Unsupported integer size"
-#endif
+# ifndef ETHR_X86_ATOMIC_COMMON__
+# define ETHR_X86_ATOMIC_COMMON__
+# define ETHR_ATOMIC_HAVE_INC_DEC_INSTRUCTIONS 1
+# endif /* ETHR_X86_ATOMIC_COMMON__ */
+
+# if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+# define ETHR_HAVE_NATIVE_ATOMIC32 1
+# define ETHR_NATIVE_ATOMIC32_IMPL "ethread"
+# define ETHR_NATMC_FUNC__(X) ethr_native_atomic32_ ## X
+# define ETHR_ATMC_T__ ethr_native_atomic32_t
+# define ETHR_AINT_T__ ethr_sint32_t
+# define ETHR_AINT_SUFFIX__ "l"
+# elif ETHR_INCLUDE_ATOMIC_IMPL__ == 8
+# define ETHR_HAVE_NATIVE_ATOMIC64 1
+# define ETHR_NATIVE_ATOMIC64_IMPL "ethread"
+# define ETHR_NATMC_FUNC__(X) ethr_native_atomic64_ ## X
+# define ETHR_ATMC_T__ ethr_native_atomic64_t
+# define ETHR_AINT_T__ ethr_sint64_t
+# define ETHR_AINT_SUFFIX__ "q"
+# else
+# error "Unsupported integer size"
+# endif
/* An atomic is an aligned ETHR_AINT_T__ accessed via locked operations.
*/
@@ -81,87 +70,28 @@ typedef struct {
#if defined(ETHR_TRY_INLINE_FUNCS) || defined(ETHR_ATOMIC_IMPL__)
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_ADDR 1
+#else
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_ADDR 1
+#endif
+
static ETHR_INLINE ETHR_AINT_T__ *
ETHR_NATMC_FUNC__(addr)(ETHR_ATMC_T__ *var)
{
return (ETHR_AINT_T__ *) &var->counter;
}
-static ETHR_INLINE void
-ETHR_NATMC_FUNC__(init)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ i)
-{
- var->counter = i;
-}
-
-static ETHR_INLINE void
-ETHR_NATMC_FUNC__(set)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ i)
-{
- var->counter = i;
-}
-
-static ETHR_INLINE ETHR_AINT_T__
-ETHR_NATMC_FUNC__(read)(ETHR_ATMC_T__ *var)
-{
- return var->counter;
-}
-
-static ETHR_INLINE void
-ETHR_NATMC_FUNC__(add)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ incr)
-{
- __asm__ __volatile__(
- "lock; add" ETHR_AINT_SUFFIX__ " %1, %0"
- : "=m"(var->counter)
- : "ir"(incr), "m"(var->counter));
-}
-
-static ETHR_INLINE void
-ETHR_NATMC_FUNC__(inc)(ETHR_ATMC_T__ *var)
-{
- __asm__ __volatile__(
- "lock; inc" ETHR_AINT_SUFFIX__ " %0"
- : "=m"(var->counter)
- : "m"(var->counter));
-}
-
-static ETHR_INLINE void
-ETHR_NATMC_FUNC__(dec)(ETHR_ATMC_T__ *var)
-{
- __asm__ __volatile__(
- "lock; dec" ETHR_AINT_SUFFIX__ " %0"
- : "=m"(var->counter)
- : "m"(var->counter));
-}
-
-static ETHR_INLINE ETHR_AINT_T__
-ETHR_NATMC_FUNC__(add_return)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ incr)
-{
- ETHR_AINT_T__ tmp;
-
- tmp = incr;
- __asm__ __volatile__(
- "lock; xadd" ETHR_AINT_SUFFIX__ " %0, %1" /* xadd didn't exist prior to the 486 */
- : "=r"(tmp)
- : "m"(var->counter), "0"(tmp));
- /* now tmp is the atomic's previous value */
- return tmp + incr;
-}
-
-static ETHR_INLINE ETHR_AINT_T__
-ETHR_NATMC_FUNC__(inc_return)(ETHR_ATMC_T__ *var)
-{
- return ETHR_NATMC_FUNC__(add_return)(var, (ETHR_AINT_T__) 1);
-}
-
-static ETHR_INLINE ETHR_AINT_T__
-ETHR_NATMC_FUNC__(dec_return)(ETHR_ATMC_T__ *var)
-{
- return ETHR_NATMC_FUNC__(add_return)(var, (ETHR_AINT_T__) -1);
-}
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_CMPXCHG_MB 1
+#else
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_CMPXCHG_MB 1
+#endif
static ETHR_INLINE ETHR_AINT_T__
-ETHR_NATMC_FUNC__(cmpxchg)(ETHR_ATMC_T__ *var,
- ETHR_AINT_T__ new,
- ETHR_AINT_T__ old)
+ETHR_NATMC_FUNC__(cmpxchg_mb)(ETHR_ATMC_T__ *var,
+ ETHR_AINT_T__ new,
+ ETHR_AINT_T__ old)
{
__asm__ __volatile__(
"lock; cmpxchg" ETHR_AINT_SUFFIX__ " %2, %3"
@@ -171,110 +101,148 @@ ETHR_NATMC_FUNC__(cmpxchg)(ETHR_ATMC_T__ *var,
return old;
}
-static ETHR_INLINE ETHR_AINT_T__
-ETHR_NATMC_FUNC__(and_retold)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ mask)
-{
- ETHR_AINT_T__ tmp, old;
-
- tmp = var->counter;
- do {
- old = tmp;
- tmp = ETHR_NATMC_FUNC__(cmpxchg)(var, tmp & mask, tmp);
- } while (__builtin_expect(tmp != old, 0));
- /* now tmp is the atomic's previous value */
- return tmp;
-}
-
-static ETHR_INLINE ETHR_AINT_T__
-ETHR_NATMC_FUNC__(or_retold)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ mask)
-{
- ETHR_AINT_T__ tmp, old;
-
- tmp = var->counter;
- do {
- old = tmp;
- tmp = ETHR_NATMC_FUNC__(cmpxchg)(var, tmp | mask, tmp);
- } while (__builtin_expect(tmp != old, 0));
- /* now tmp is the atomic's previous value */
- return tmp;
-}
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_XCHG_MB 1
+#else
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_XCHG_MB 1
+#endif
static ETHR_INLINE ETHR_AINT_T__
-ETHR_NATMC_FUNC__(xchg)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ val)
+ETHR_NATMC_FUNC__(xchg_mb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ val)
{
ETHR_AINT_T__ tmp = val;
__asm__ __volatile__(
"xchg" ETHR_AINT_SUFFIX__ " %0, %1"
: "=r"(tmp)
- : "m"(var->counter), "0"(tmp));
+ : "m"(var->counter), "0"(tmp)
+ : "memory");
/* now tmp is the atomic's previous value */
return tmp;
}
-/*
- * Atomic ops with at least specified barriers.
- */
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_SET 1
+#else
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_SET 1
+#endif
-static ETHR_INLINE ETHR_AINT_T__
-ETHR_NATMC_FUNC__(read_acqb)(ETHR_ATMC_T__ *var)
+static ETHR_INLINE void
+ETHR_NATMC_FUNC__(set)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ i)
{
- ETHR_AINT_T__ val;
-#if defined(__x86_64__) || !defined(ETHR_PRE_PENTIUM4_COMPAT)
- val = var->counter;
+ var->counter = i;
+}
+
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_SET_RELB 1
#else
- val = ETHR_NATMC_FUNC__(add_return)(var, 0);
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_SET_RELB 1
#endif
- __asm__ __volatile__("" : : : "memory");
- return val;
-}
static ETHR_INLINE void
ETHR_NATMC_FUNC__(set_relb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ i)
{
- __asm__ __volatile__("" : : : "memory");
-#if defined(__x86_64__) || !defined(ETHR_PRE_PENTIUM4_COMPAT)
- var->counter = i;
+#if defined(_M_IX86)
+ if (ETHR_X86_RUNTIME_CONF_HAVE_NO_SSE2__)
+ (void) ETHR_NATMC_FUNC__(xchg_mb)(var, i);
+ else
+#endif /* _M_IX86 */
+ {
+ ETHR_MEMBAR(ETHR_LoadStore|ETHR_StoreStore);
+ var->counter = i;
+ }
+}
+
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_SET_MB 1
#else
- (void) ETHR_NATMC_FUNC__(xchg)(var, i);
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_SET_MB 1
#endif
+
+static ETHR_INLINE void
+ETHR_NATMC_FUNC__(set_mb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ i)
+{
+ (void) ETHR_NATMC_FUNC__(xchg_mb)(var, i);
}
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_READ 1
+#else
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_READ 1
+#endif
+
static ETHR_INLINE ETHR_AINT_T__
-ETHR_NATMC_FUNC__(inc_return_acqb)(ETHR_ATMC_T__ *var)
+ETHR_NATMC_FUNC__(read)(ETHR_ATMC_T__ *var)
{
- ETHR_AINT_T__ res = ETHR_NATMC_FUNC__(inc_return)(var);
- __asm__ __volatile__("" : : : "memory");
- return res;
+ return var->counter;
}
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_ADD_MB 1
+#else
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_ADD_MB 1
+#endif
+
static ETHR_INLINE void
-ETHR_NATMC_FUNC__(dec_relb)(ETHR_ATMC_T__ *var)
+ETHR_NATMC_FUNC__(add_mb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ incr)
{
- __asm__ __volatile__("" : : : "memory");
- ETHR_NATMC_FUNC__(dec)(var);
-}
+ __asm__ __volatile__(
+ "lock; add" ETHR_AINT_SUFFIX__ " %1, %0"
+ : "=m"(var->counter)
+ : "ir"(incr), "m"(var->counter)
+ : "memory");
+}
-static ETHR_INLINE ETHR_AINT_T__
-ETHR_NATMC_FUNC__(dec_return_relb)(ETHR_ATMC_T__ *var)
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_INC_MB 1
+#else
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_INC_MB 1
+#endif
+
+static ETHR_INLINE void
+ETHR_NATMC_FUNC__(inc_mb)(ETHR_ATMC_T__ *var)
{
- __asm__ __volatile__("" : : : "memory");
- return ETHR_NATMC_FUNC__(dec_return)(var);
+ __asm__ __volatile__(
+ "lock; inc" ETHR_AINT_SUFFIX__ " %0"
+ : "=m"(var->counter)
+ : "m"(var->counter)
+ : "memory");
}
-static ETHR_INLINE ETHR_AINT_T__
-ETHR_NATMC_FUNC__(cmpxchg_acqb)(ETHR_ATMC_T__ *var,
- ETHR_AINT_T__ new,
- ETHR_AINT_T__ old)
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_DEC_MB 1
+#else
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_DEC_MB 1
+#endif
+
+static ETHR_INLINE void
+ETHR_NATMC_FUNC__(dec_mb)(ETHR_ATMC_T__ *var)
{
- return ETHR_NATMC_FUNC__(cmpxchg)(var, new, old);
+ __asm__ __volatile__(
+ "lock; dec" ETHR_AINT_SUFFIX__ " %0"
+ : "=m"(var->counter)
+ : "m"(var->counter)
+ : "memory");
}
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_ADD_RETURN_MB 1
+#else
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_ADD_RETURN_MB 1
+#endif
+
static ETHR_INLINE ETHR_AINT_T__
-ETHR_NATMC_FUNC__(cmpxchg_relb)(ETHR_ATMC_T__ *var,
- ETHR_AINT_T__ new,
- ETHR_AINT_T__ old)
+ETHR_NATMC_FUNC__(add_return_mb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ incr)
{
- return ETHR_NATMC_FUNC__(cmpxchg)(var, new, old);
+ ETHR_AINT_T__ tmp;
+
+ tmp = incr;
+ __asm__ __volatile__(
+ "lock; xadd" ETHR_AINT_SUFFIX__ " %0, %1" /* xadd didn't exist prior to the 486 */
+ : "=r"(tmp)
+ : "m"(var->counter), "0"(tmp)
+ : "memory");
+ /* now tmp is the atomic's previous value */
+ return tmp + incr;
}
#endif /* ETHR_TRY_INLINE_FUNCS */
diff --git a/erts/include/internal/i386/ethr_dw_atomic.h b/erts/include/internal/i386/ethr_dw_atomic.h
new file mode 100644
index 0000000000..9fb89bbe43
--- /dev/null
+++ b/erts/include/internal/i386/ethr_dw_atomic.h
@@ -0,0 +1,278 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2011. All Rights Reserved.
+ *
+ * The contents of this file are subject to the Erlang Public License,
+ * Version 1.1, (the "License"); you may not use this file except in
+ * compliance with the License. You should have received a copy of the
+ * Erlang Public License along with this software. If not, it can be
+ * retrieved online at http://www.erlang.org/.
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/*
+ * Description: Native double word atomics for x86/x86_64
+ * Author: Rickard Green
+ */
+
+#ifndef ETHR_X86_DW_ATOMIC_H__
+#define ETHR_X86_DW_ATOMIC_H__
+
+#ifdef ETHR_GCC_HAVE_DW_CMPXCHG_ASM_SUPPORT
+
+#define ETHR_HAVE_NATIVE_DW_ATOMIC
+#define ETHR_NATIVE_DW_ATOMIC_IMPL "ethread"
+
+/*
+ * If ETHR_RTCHK_USE_NATIVE_DW_ATOMIC_IMPL__ is defined, it will be used
+ * at runtime in order to determine if native or fallback implementation
+ * should be used.
+ */
+#define ETHR_RTCHK_USE_NATIVE_DW_ATOMIC_IMPL__ \
+ ETHR_X86_RUNTIME_CONF_HAVE_DW_CMPXCHG__
+
+#if ETHR_SIZEOF_PTR == 4
+typedef volatile ethr_sint64_t * ethr_native_dw_ptr_t;
+# define ETHR_DW_NATMC_ALIGN_MASK__ 0x7
+# define ETHR_DW_CMPXCHG_SFX__ "8b"
+# define ETHR_NATIVE_SU_DW_SINT_T ethr_sint64_t
+#else
+#ifdef ETHR_HAVE_INT128_T
+# define ETHR_NATIVE_SU_DW_SINT_T ethr_sint128_t
+typedef volatile ethr_sint128_t * ethr_native_dw_ptr_t;
+#else
+typedef struct {
+ ethr_sint64_t sint64[2];
+} ethr_native_sint128_t__;
+typedef volatile ethr_native_sint128_t__ * ethr_native_dw_ptr_t;
+#endif
+# define ETHR_DW_NATMC_ALIGN_MASK__ 0xf
+# define ETHR_DW_CMPXCHG_SFX__ "16b"
+#endif
+
+/*
+ * We need 16 byte aligned memory in 64-bit mode, and 8 byte aligned
+ * memory in 32-bit mode. 16 byte aligned malloc in 64-bit mode is
+ * not common, and at least some glibc malloc implementations
+ * only 4 byte align in 32-bit mode.
+ *
+ * This code assumes 8 byte aligned memory in 64-bit mode, and 4 byte
+ * aligned memory in 32-bit mode. A malloc implementation that does
+ * not adhere to these alignment requirements is seriously broken,
+ * and we wont bother trying to work around it.
+ *
+ * Since memory alignment may be off by one word we need to align at
+ * runtime. We, therefore, need an extra word allocated.
+ */
+#define ETHR_DW_NATMC_MEM__(VAR) \
+ (&var->c[(int) ((ethr_uint_t) &(VAR)->c[0]) & ETHR_DW_NATMC_ALIGN_MASK__])
+typedef union {
+#ifdef ETHR_NATIVE_SU_DW_SINT_T
+ volatile ETHR_NATIVE_SU_DW_SINT_T dw_sint;
+#endif
+ volatile ethr_sint_t sint[3];
+ volatile char c[ETHR_SIZEOF_PTR*3];
+} ethr_native_dw_atomic_t;
+
+
+#if (defined(ETHR_TRY_INLINE_FUNCS) \
+ || defined(ETHR_ATOMIC_IMPL__) \
+ || defined(ETHR_X86_SSE2_ASM_C__)) \
+ && ETHR_SIZEOF_PTR == 4 \
+ && defined(ETHR_GCC_HAVE_SSE2_ASM_SUPPORT)
+ethr_sint64_t
+ethr_sse2_native_su_dw_atomic_read(ethr_native_dw_atomic_t *var);
+void
+ethr_sse2_native_su_dw_atomic_set(ethr_native_dw_atomic_t *var,
+ ethr_sint64_t val);
+#endif
+
+#if (defined(ETHR_TRY_INLINE_FUNCS) \
+ || defined(ETHR_ATOMIC_IMPL__) \
+ || defined(ETHR_X86_SSE2_ASM_C__))
+# ifdef ETHR_DEBUG
+# define ETHR_DW_DBG_ALIGNED__(PTR) \
+ ETHR_ASSERT((((ethr_uint_t) (PTR)) & ETHR_DW_NATMC_ALIGN_MASK__) == 0);
+# else
+# define ETHR_DW_DBG_ALIGNED__(PTR)
+# endif
+#endif
+
+#if defined(ETHR_TRY_INLINE_FUNCS) || defined(ETHR_ATOMIC_IMPL__)
+
+#define ETHR_HAVE_ETHR_NATIVE_DW_ATOMIC_ADDR
+static ETHR_INLINE ethr_sint_t *
+ethr_native_dw_atomic_addr(ethr_native_dw_atomic_t *var)
+{
+ return (ethr_sint_t *) ETHR_DW_NATMC_MEM__(var);
+}
+
+#if ETHR_SIZEOF_PTR == 4 && defined(__PIC__) && __PIC__
+/*
+ * When position independent code is used in 32-bit mode, the EBX register
+ * is used for storage of global offset table address, and we may not
+ * use it as input or output in an asm. We need to save and restore the
+ * EBX register explicitly (for some reason gcc doesn't provide this
+ * service to us).
+ */
+# define ETHR_NO_CLOBBER_EBX__ 1
+#else
+# define ETHR_NO_CLOBBER_EBX__ 0
+#endif
+
+#if ETHR_NO_CLOBBER_EBX__ && !defined(ETHR_CMPXCHG8B_REGISTER_SHORTAGE)
+/* When no optimization is on, we'll run into a register shortage */
+# if defined(ETHR_DEBUG) || defined(DEBUG) || defined(VALGRIND) \
+ || defined(GCOV) || defined(PURIFY) || defined(PURECOV)
+# define ETHR_CMPXCHG8B_REGISTER_SHORTAGE 1
+# else
+# define ETHR_CMPXCHG8B_REGISTER_SHORTAGE 0
+# endif
+#endif
+
+
+#define ETHR_HAVE_ETHR_NATIVE_DW_ATOMIC_CMPXCHG_MB
+
+static ETHR_INLINE int
+ethr_native_dw_atomic_cmpxchg_mb(ethr_native_dw_atomic_t *var,
+ ethr_sint_t *new,
+ ethr_sint_t *xchg)
+{
+ ethr_native_dw_ptr_t p = (ethr_native_dw_ptr_t) ETHR_DW_NATMC_MEM__(var);
+ char xchgd;
+
+ ETHR_DW_DBG_ALIGNED__(p);
+
+ __asm__ __volatile__(
+#if ETHR_NO_CLOBBER_EBX__
+ "pushl %%ebx\n\t"
+# if ETHR_CMPXCHG8B_REGISTER_SHORTAGE
+ "movl (%7), %%ebx\n\t"
+ "movl 4(%7), %%ecx\n\t"
+# else
+ "movl %8, %%ebx\n\t"
+# endif
+#endif
+ "lock; cmpxchg" ETHR_DW_CMPXCHG_SFX__ " %0\n\t"
+ "setz %3\n\t"
+#if ETHR_NO_CLOBBER_EBX__
+ "popl %%ebx\n\t"
+#endif
+ : "=m"(*p), "=d"(xchg[1]), "=a"(xchg[0]), "=c"(xchgd)
+ : "m"(*p), "1"(xchg[1]), "2"(xchg[0]),
+#if ETHR_NO_CLOBBER_EBX__
+# if ETHR_CMPXCHG8B_REGISTER_SHORTAGE
+ "3"(new)
+# else
+ "3"(new[1]),
+ "r"(new[0])
+# endif
+#else
+ "3"(new[1]),
+ "b"(new[0])
+#endif
+ : "cc", "memory");
+
+ return (int) xchgd;
+}
+
+#undef ETHR_NO_CLOBBER_EBX__
+
+#if ETHR_SIZEOF_PTR == 4 && defined(ETHR_GCC_HAVE_SSE2_ASM_SUPPORT)
+
+typedef union {
+ ethr_sint64_t sint64;
+ ethr_sint_t sint[2];
+} ethr_dw_atomic_no_sse2_convert_t;
+
+#define ETHR_HAVE_ETHR_NATIVE_SU_DW_ATOMIC_READ
+
+static ETHR_INLINE ethr_sint64_t
+ethr_native_su_dw_atomic_read(ethr_native_dw_atomic_t *var)
+{
+ if (ETHR_X86_RUNTIME_CONF_HAVE_SSE2__)
+ return ethr_sse2_native_su_dw_atomic_read(var);
+ else {
+ ethr_sint_t new[2];
+ ethr_dw_atomic_no_sse2_convert_t xchg;
+ new[0] = new[1] = xchg.sint[0] = xchg.sint[1] = 0x83838383;
+ (void) ethr_native_dw_atomic_cmpxchg_mb(var, new, xchg.sint);
+ return xchg.sint64;
+ }
+}
+
+#define ETHR_HAVE_ETHR_NATIVE_SU_DW_ATOMIC_SET
+
+static ETHR_INLINE void
+ethr_native_su_dw_atomic_set(ethr_native_dw_atomic_t *var,
+ ethr_sint64_t val)
+{
+ if (ETHR_X86_RUNTIME_CONF_HAVE_SSE2__)
+ ethr_sse2_native_su_dw_atomic_set(var, val);
+ else {
+ ethr_sint_t xchg[2] = {0, 0};
+ ethr_dw_atomic_no_sse2_convert_t new;
+ new.sint64 = val;
+ while (!ethr_native_dw_atomic_cmpxchg_mb(var, new.sint, xchg));
+ }
+}
+
+#endif /* ETHR_SIZEOF_PTR == 4 */
+
+#endif /* ETHR_TRY_INLINE_FUNCS */
+
+#if defined(ETHR_X86_SSE2_ASM_C__) \
+ && ETHR_SIZEOF_PTR == 4 \
+ && defined(ETHR_GCC_HAVE_SSE2_ASM_SUPPORT)
+
+/*
+ * 8-byte aligned loads and stores of 64-bit values are atomic from
+ * pentium and forward. An ordinary volatile load or store in 32-bit
+ * mode generates two 32-bit operations (at least with gcc-4.1.2 using
+ * -msse2). In order to guarantee one 64-bit load/store operation
+ * from/to memory we load/store via an xmm register using movq.
+ *
+ * Load/store can be achieved using cmpxchg8b, however, using movq is
+ * much faster. Unfortunately we cannot do the same thing in 64-bit
+ * mode; instead, we have to do loads and stores via cmpxchg16b.
+ *
+ * We do not inline these, but instead compile these into a separate
+ * object file using -msse2. This since we don't want to use -msse2 for
+ * the whole system. If we detect sse2 support (pentium4 and forward)
+ * at runtime, we use them; otherwise, we fall back to using cmpxchg8b
+ * for loads and stores. This way the binary can be moved between
+ * processors with and without sse2 support.
+ */
+
+ethr_sint64_t
+ethr_sse2_native_su_dw_atomic_read(ethr_native_dw_atomic_t *var)
+{
+ ethr_native_dw_ptr_t p = (ethr_native_dw_ptr_t) ETHR_DW_NATMC_MEM__(var);
+ ethr_sint64_t val;
+ ETHR_DW_DBG_ALIGNED__(p);
+ __asm__ __volatile__("movq %1, %0\n\t" : "=x"(val) : "m"(*p) : "memory");
+ return val;
+}
+
+void
+ethr_sse2_native_su_dw_atomic_set(ethr_native_dw_atomic_t *var,
+ ethr_sint64_t val)
+{
+ ethr_native_dw_ptr_t p = (ethr_native_dw_ptr_t) ETHR_DW_NATMC_MEM__(var);
+ ETHR_DW_DBG_ALIGNED__(p);
+ __asm__ __volatile__("movq %1, %0\n\t" : "=m"(*p) : "x"(val) : "memory");
+}
+
+#endif /* ETHR_X86_SSE2_ASM_C__ */
+
+#endif /* ETHR_GCC_HAVE_DW_CMPXCHG_ASM_SUPPORT */
+
+#endif /* ETHR_X86_DW_ATOMIC_H__ */
+
diff --git a/erts/include/internal/i386/ethr_membar.h b/erts/include/internal/i386/ethr_membar.h
new file mode 100644
index 0000000000..92d9de7f3f
--- /dev/null
+++ b/erts/include/internal/i386/ethr_membar.h
@@ -0,0 +1,114 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2011. All Rights Reserved.
+ *
+ * The contents of this file are subject to the Erlang Public License,
+ * Version 1.1, (the "License"); you may not use this file except in
+ * compliance with the License. You should have received a copy of the
+ * Erlang Public License along with this software. If not, it can be
+ * retrieved online at http://www.erlang.org/.
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/*
+ * Description: Memory barriers for x86/x86-64
+ * Author: Rickard Green
+ */
+
+#ifndef ETHR_X86_MEMBAR_H__
+#define ETHR_X86_MEMBAR_H__
+
+#define ETHR_LoadLoad (1 << 0)
+#define ETHR_LoadStore (1 << 1)
+#define ETHR_StoreLoad (1 << 2)
+#define ETHR_StoreStore (1 << 3)
+
+#define ETHR_NO_SSE2_MEMORY_BARRIER__ \
+do { \
+ volatile ethr_sint32_t x__ = 0; \
+ __asm__ __volatile__ ("lock; orl $0x0, %0\n\t" \
+ : "=m"(x__) \
+ : "m"(x__) \
+ : "memory"); \
+} while (0)
+
+static __inline__ void
+ethr_cfence__(void)
+{
+ __asm__ __volatile__ ("" : : : "memory");
+}
+
+static __inline__ void
+ethr_mfence__(void)
+{
+#if ETHR_SIZEOF_PTR == 4
+ if (ETHR_X86_RUNTIME_CONF_HAVE_NO_SSE2__)
+ ETHR_NO_SSE2_MEMORY_BARRIER__;
+ else
+#endif
+ __asm__ __volatile__ ("mfence\n\t" : : : "memory");
+}
+
+static __inline__ void
+ethr_sfence__(void)
+{
+#if ETHR_SIZEOF_PTR == 4
+ if (ETHR_X86_RUNTIME_CONF_HAVE_NO_SSE2__)
+ ETHR_NO_SSE2_MEMORY_BARRIER__;
+ else
+#endif
+ __asm__ __volatile__ ("sfence\n\t" : : : "memory");
+}
+
+static __inline__ void
+ethr_lfence__(void)
+{
+#if ETHR_SIZEOF_PTR == 4
+ if (ETHR_X86_RUNTIME_CONF_HAVE_NO_SSE2__)
+ ETHR_NO_SSE2_MEMORY_BARRIER__;
+ else
+#endif
+ __asm__ __volatile__ ("lfence\n\t" : : : "memory");
+}
+
+#define ETHR_X86_OUT_OF_ORDER_MEMBAR(B) \
+ ETHR_CHOOSE_EXPR((B) == ETHR_StoreStore, \
+ ethr_sfence__(), \
+ ETHR_CHOOSE_EXPR((B) == ETHR_LoadLoad, \
+ ethr_lfence__(), \
+ ethr_mfence__()))
+
+#ifdef ETHR_X86_OUT_OF_ORDER
+
+#define ETHR_MEMBAR(B) \
+ ETHR_X86_OUT_OF_ORDER_MEMBAR((B))
+
+#else /* !ETHR_X86_OUT_OF_ORDER (the default) */
+
+/*
+ * We assume that only stores before loads may be reordered. That is,
+ * we assume that *no* instructions like these are used:
+ * - CLFLUSH,
+ * - streaming stores executed with non-temporal move,
+ * - string operations, or
+ * - other instructions which aren't LoadLoad, LoadStore, and StoreStore
+ * ordered by themselves
+ * If such instructions are used, either insert memory barriers
+ * using ETHR_X86_OUT_OF_ORDER_MEMBAR() at appropriate places, or
+ * define ETHR_X86_OUT_OF_ORDER. For more info see Intel 64 and IA-32
+ * Architectures Software Developer's Manual; Vol 3A; Chapter 8.2.2.
+ */
+
+#define ETHR_MEMBAR(B) \
+ ETHR_CHOOSE_EXPR((B) & ETHR_StoreLoad, ethr_mfence__(), ethr_cfence__())
+
+#endif /* !ETHR_X86_OUT_OF_ORDER */
+
+#endif /* ETHR_X86_MEMBAR_H__ */
diff --git a/erts/include/internal/i386/ethread.h b/erts/include/internal/i386/ethread.h
index b5a17caefb..80e4dc7b99 100644
--- a/erts/include/internal/i386/ethread.h
+++ b/erts/include/internal/i386/ethread.h
@@ -1,7 +1,7 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2005-2010. All Rights Reserved.
+ * Copyright Ericsson AB 2005-2011. All Rights Reserved.
*
* The contents of this file are subject to the Erlang Public License,
* Version 1.1, (the "License"); you may not use this file except in
@@ -24,17 +24,15 @@
#ifndef ETHREAD_I386_ETHREAD_H
#define ETHREAD_I386_ETHREAD_H
+#include "ethr_membar.h"
#define ETHR_ATOMIC_WANT_32BIT_IMPL__
#include "atomic.h"
#if ETHR_SIZEOF_PTR == 8
# define ETHR_ATOMIC_WANT_64BIT_IMPL__
# include "atomic.h"
#endif
+#include "ethr_dw_atomic.h"
#include "spinlock.h"
#include "rwlock.h"
-#define ETHR_HAVE_NATIVE_ATOMICS 1
-#define ETHR_HAVE_NATIVE_SPINLOCKS 1
-#define ETHR_HAVE_NATIVE_RWSPINLOCKS 1
-
#endif /* ETHREAD_I386_ETHREAD_H */
diff --git a/erts/include/internal/i386/rwlock.h b/erts/include/internal/i386/rwlock.h
index be47f459ce..1a8cd7da0c 100644
--- a/erts/include/internal/i386/rwlock.h
+++ b/erts/include/internal/i386/rwlock.h
@@ -1,7 +1,7 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2005-2010. All Rights Reserved.
+ * Copyright Ericsson AB 2005-2011. All Rights Reserved.
*
* The contents of this file are subject to the Erlang Public License,
* Version 1.1, (the "License"); you may not use this file except in
@@ -26,6 +26,9 @@
#ifndef ETHREAD_I386_RWLOCK_H
#define ETHREAD_I386_RWLOCK_H
+#define ETHR_HAVE_NATIVE_RWSPINLOCKS 1
+#define ETHR_NATIVE_RWSPINLOCK_IMPL "ethread"
+
/* XXX: describe the algorithm */
typedef struct {
volatile int lock;
diff --git a/erts/include/internal/i386/spinlock.h b/erts/include/internal/i386/spinlock.h
index 0325324895..a84fba91b1 100644
--- a/erts/include/internal/i386/spinlock.h
+++ b/erts/include/internal/i386/spinlock.h
@@ -1,7 +1,7 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2005-2010. All Rights Reserved.
+ * Copyright Ericsson AB 2005-2011. All Rights Reserved.
*
* The contents of this file are subject to the Erlang Public License,
* Version 1.1, (the "License"); you may not use this file except in
@@ -24,6 +24,9 @@
#ifndef ETHREAD_I386_SPINLOCK_H
#define ETHREAD_I386_SPINLOCK_H
+#define ETHR_HAVE_NATIVE_SPINLOCKS 1
+#define ETHR_NATIVE_SPINLOCK_IMPL "ethread"
+
/* A spinlock is the low byte of an aligned 32-bit integer.
* A non-zero value means that the lock is locked.
*/
@@ -46,16 +49,20 @@ ethr_native_spin_unlock(ethr_native_spinlock_t *lock)
* On i386 this needs to be a locked operation
* to avoid Pentium Pro errata 66 and 92.
*/
-#if defined(__x86_64__) || !defined(ETHR_PRE_PENTIUM4_COMPAT)
- __asm__ __volatile__("" : : : "memory");
- *(unsigned char*)&lock->lock = 0;
-#else
- char tmp = 0;
- __asm__ __volatile__(
- "xchgb %b0, %1"
- : "=q"(tmp), "=m"(lock->lock)
- : "0"(tmp) : "memory");
+#if !defined(__x86_64__)
+ if (ETHR_X86_RUNTIME_CONF_HAVE_NO_SSE2__) {
+ char tmp = 0;
+ __asm__ __volatile__(
+ "xchgb %b0, %1"
+ : "=q"(tmp), "=m"(lock->lock)
+ : "0"(tmp) : "memory");
+ }
+ else
#endif
+ {
+ ETHR_MEMBAR(ETHR_LoadStore|ETHR_StoreStore);
+ *(unsigned char*)&lock->lock = 0;
+ }
}
static ETHR_INLINE int