aboutsummaryrefslogtreecommitdiffstats
path: root/erts/include/internal/i386
diff options
context:
space:
mode:
Diffstat (limited to 'erts/include/internal/i386')
-rw-r--r--erts/include/internal/i386/atomic.h334
-rw-r--r--erts/include/internal/i386/ethr_dw_atomic.h278
-rw-r--r--erts/include/internal/i386/ethr_membar.h114
-rw-r--r--erts/include/internal/i386/ethread.h8
-rw-r--r--erts/include/internal/i386/rwlock.h5
-rw-r--r--erts/include/internal/i386/spinlock.h27
6 files changed, 567 insertions, 199 deletions
diff --git a/erts/include/internal/i386/atomic.h b/erts/include/internal/i386/atomic.h
index 4e402f261a..fc1b619935 100644
--- a/erts/include/internal/i386/atomic.h
+++ b/erts/include/internal/i386/atomic.h
@@ -1,7 +1,7 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2005-2010. All Rights Reserved.
+ * Copyright Ericsson AB 2005-2011. All Rights Reserved.
*
* The contents of this file are subject to the Erlang Public License,
* Version 1.1, (the "License"); you may not use this file except in
@@ -25,53 +25,42 @@
*/
#undef ETHR_INCLUDE_ATOMIC_IMPL__
-#if !defined(ETHR_X86_ATOMIC32_H__) && defined(ETHR_ATOMIC_WANT_32BIT_IMPL__)
-#define ETHR_X86_ATOMIC32_H__
-#define ETHR_INCLUDE_ATOMIC_IMPL__ 4
-#undef ETHR_ATOMIC_WANT_32BIT_IMPL__
-#elif !defined(ETHR_X86_ATOMIC64_H__) && defined(ETHR_ATOMIC_WANT_64BIT_IMPL__)
-#define ETHR_X86_ATOMIC64_H__
-#define ETHR_INCLUDE_ATOMIC_IMPL__ 8
-#undef ETHR_ATOMIC_WANT_64BIT_IMPL__
+#if !defined(ETHR_X86_ATOMIC32_H__) \
+ && defined(ETHR_ATOMIC_WANT_32BIT_IMPL__)
+# define ETHR_X86_ATOMIC32_H__
+# define ETHR_INCLUDE_ATOMIC_IMPL__ 4
+# undef ETHR_ATOMIC_WANT_32BIT_IMPL__
+#elif !defined(ETHR_X86_ATOMIC64_H__) \
+ && defined(ETHR_ATOMIC_WANT_64BIT_IMPL__)
+# define ETHR_X86_ATOMIC64_H__
+# define ETHR_INCLUDE_ATOMIC_IMPL__ 8
+# undef ETHR_ATOMIC_WANT_64BIT_IMPL__
#endif
#ifdef ETHR_INCLUDE_ATOMIC_IMPL__
-#ifndef ETHR_X86_ATOMIC_COMMON__
-#define ETHR_X86_ATOMIC_COMMON__
-
-#define ETHR_ATOMIC_HAVE_INC_DEC_INSTRUCTIONS 1
-
-#if defined(__x86_64__) || !defined(ETHR_PRE_PENTIUM4_COMPAT)
-#define ETHR_MEMORY_BARRIER __asm__ __volatile__("mfence" : : : "memory")
-#define ETHR_WRITE_MEMORY_BARRIER __asm__ __volatile__("sfence" : : : "memory")
-#define ETHR_READ_MEMORY_BARRIER __asm__ __volatile__("lfence" : : : "memory")
-#define ETHR_READ_DEPEND_MEMORY_BARRIER __asm__ __volatile__("" : : : "memory")
-#else
-#define ETHR_MEMORY_BARRIER \
-do { \
- volatile ethr_sint32_t x___ = 0; \
- __asm__ __volatile__("lock; incl %0" : "=m"(x___) : "m"(x___) : "memory"); \
-} while (0)
-#endif
-
-#endif /* ETHR_X86_ATOMIC_COMMON__ */
-
-#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
-#define ETHR_HAVE_NATIVE_ATOMIC32 1
-#define ETHR_NATMC_FUNC__(X) ethr_native_atomic32_ ## X
-#define ETHR_ATMC_T__ ethr_native_atomic32_t
-#define ETHR_AINT_T__ ethr_sint32_t
-#define ETHR_AINT_SUFFIX__ "l"
-#elif ETHR_INCLUDE_ATOMIC_IMPL__ == 8
-#define ETHR_HAVE_NATIVE_ATOMIC64 1
-#define ETHR_NATMC_FUNC__(X) ethr_native_atomic64_ ## X
-#define ETHR_ATMC_T__ ethr_native_atomic64_t
-#define ETHR_AINT_T__ ethr_sint64_t
-#define ETHR_AINT_SUFFIX__ "q"
-#else
-#error "Unsupported integer size"
-#endif
+# ifndef ETHR_X86_ATOMIC_COMMON__
+# define ETHR_X86_ATOMIC_COMMON__
+# define ETHR_ATOMIC_HAVE_INC_DEC_INSTRUCTIONS 1
+# endif /* ETHR_X86_ATOMIC_COMMON__ */
+
+# if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+# define ETHR_HAVE_NATIVE_ATOMIC32 1
+# define ETHR_NATIVE_ATOMIC32_IMPL "ethread"
+# define ETHR_NATMC_FUNC__(X) ethr_native_atomic32_ ## X
+# define ETHR_ATMC_T__ ethr_native_atomic32_t
+# define ETHR_AINT_T__ ethr_sint32_t
+# define ETHR_AINT_SUFFIX__ "l"
+# elif ETHR_INCLUDE_ATOMIC_IMPL__ == 8
+# define ETHR_HAVE_NATIVE_ATOMIC64 1
+# define ETHR_NATIVE_ATOMIC64_IMPL "ethread"
+# define ETHR_NATMC_FUNC__(X) ethr_native_atomic64_ ## X
+# define ETHR_ATMC_T__ ethr_native_atomic64_t
+# define ETHR_AINT_T__ ethr_sint64_t
+# define ETHR_AINT_SUFFIX__ "q"
+# else
+# error "Unsupported integer size"
+# endif
/* An atomic is an aligned ETHR_AINT_T__ accessed via locked operations.
*/
@@ -81,87 +70,28 @@ typedef struct {
#if defined(ETHR_TRY_INLINE_FUNCS) || defined(ETHR_ATOMIC_IMPL__)
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_ADDR 1
+#else
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_ADDR 1
+#endif
+
static ETHR_INLINE ETHR_AINT_T__ *
ETHR_NATMC_FUNC__(addr)(ETHR_ATMC_T__ *var)
{
return (ETHR_AINT_T__ *) &var->counter;
}
-static ETHR_INLINE void
-ETHR_NATMC_FUNC__(init)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ i)
-{
- var->counter = i;
-}
-
-static ETHR_INLINE void
-ETHR_NATMC_FUNC__(set)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ i)
-{
- var->counter = i;
-}
-
-static ETHR_INLINE ETHR_AINT_T__
-ETHR_NATMC_FUNC__(read)(ETHR_ATMC_T__ *var)
-{
- return var->counter;
-}
-
-static ETHR_INLINE void
-ETHR_NATMC_FUNC__(add)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ incr)
-{
- __asm__ __volatile__(
- "lock; add" ETHR_AINT_SUFFIX__ " %1, %0"
- : "=m"(var->counter)
- : "ir"(incr), "m"(var->counter));
-}
-
-static ETHR_INLINE void
-ETHR_NATMC_FUNC__(inc)(ETHR_ATMC_T__ *var)
-{
- __asm__ __volatile__(
- "lock; inc" ETHR_AINT_SUFFIX__ " %0"
- : "=m"(var->counter)
- : "m"(var->counter));
-}
-
-static ETHR_INLINE void
-ETHR_NATMC_FUNC__(dec)(ETHR_ATMC_T__ *var)
-{
- __asm__ __volatile__(
- "lock; dec" ETHR_AINT_SUFFIX__ " %0"
- : "=m"(var->counter)
- : "m"(var->counter));
-}
-
-static ETHR_INLINE ETHR_AINT_T__
-ETHR_NATMC_FUNC__(add_return)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ incr)
-{
- ETHR_AINT_T__ tmp;
-
- tmp = incr;
- __asm__ __volatile__(
- "lock; xadd" ETHR_AINT_SUFFIX__ " %0, %1" /* xadd didn't exist prior to the 486 */
- : "=r"(tmp)
- : "m"(var->counter), "0"(tmp));
- /* now tmp is the atomic's previous value */
- return tmp + incr;
-}
-
-static ETHR_INLINE ETHR_AINT_T__
-ETHR_NATMC_FUNC__(inc_return)(ETHR_ATMC_T__ *var)
-{
- return ETHR_NATMC_FUNC__(add_return)(var, (ETHR_AINT_T__) 1);
-}
-
-static ETHR_INLINE ETHR_AINT_T__
-ETHR_NATMC_FUNC__(dec_return)(ETHR_ATMC_T__ *var)
-{
- return ETHR_NATMC_FUNC__(add_return)(var, (ETHR_AINT_T__) -1);
-}
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_CMPXCHG_MB 1
+#else
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_CMPXCHG_MB 1
+#endif
static ETHR_INLINE ETHR_AINT_T__
-ETHR_NATMC_FUNC__(cmpxchg)(ETHR_ATMC_T__ *var,
- ETHR_AINT_T__ new,
- ETHR_AINT_T__ old)
+ETHR_NATMC_FUNC__(cmpxchg_mb)(ETHR_ATMC_T__ *var,
+ ETHR_AINT_T__ new,
+ ETHR_AINT_T__ old)
{
__asm__ __volatile__(
"lock; cmpxchg" ETHR_AINT_SUFFIX__ " %2, %3"
@@ -171,110 +101,148 @@ ETHR_NATMC_FUNC__(cmpxchg)(ETHR_ATMC_T__ *var,
return old;
}
-static ETHR_INLINE ETHR_AINT_T__
-ETHR_NATMC_FUNC__(and_retold)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ mask)
-{
- ETHR_AINT_T__ tmp, old;
-
- tmp = var->counter;
- do {
- old = tmp;
- tmp = ETHR_NATMC_FUNC__(cmpxchg)(var, tmp & mask, tmp);
- } while (__builtin_expect(tmp != old, 0));
- /* now tmp is the atomic's previous value */
- return tmp;
-}
-
-static ETHR_INLINE ETHR_AINT_T__
-ETHR_NATMC_FUNC__(or_retold)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ mask)
-{
- ETHR_AINT_T__ tmp, old;
-
- tmp = var->counter;
- do {
- old = tmp;
- tmp = ETHR_NATMC_FUNC__(cmpxchg)(var, tmp | mask, tmp);
- } while (__builtin_expect(tmp != old, 0));
- /* now tmp is the atomic's previous value */
- return tmp;
-}
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_XCHG_MB 1
+#else
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_XCHG_MB 1
+#endif
static ETHR_INLINE ETHR_AINT_T__
-ETHR_NATMC_FUNC__(xchg)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ val)
+ETHR_NATMC_FUNC__(xchg_mb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ val)
{
ETHR_AINT_T__ tmp = val;
__asm__ __volatile__(
"xchg" ETHR_AINT_SUFFIX__ " %0, %1"
: "=r"(tmp)
- : "m"(var->counter), "0"(tmp));
+ : "m"(var->counter), "0"(tmp)
+ : "memory");
/* now tmp is the atomic's previous value */
return tmp;
}
-/*
- * Atomic ops with at least specified barriers.
- */
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_SET 1
+#else
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_SET 1
+#endif
-static ETHR_INLINE ETHR_AINT_T__
-ETHR_NATMC_FUNC__(read_acqb)(ETHR_ATMC_T__ *var)
+static ETHR_INLINE void
+ETHR_NATMC_FUNC__(set)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ i)
{
- ETHR_AINT_T__ val;
-#if defined(__x86_64__) || !defined(ETHR_PRE_PENTIUM4_COMPAT)
- val = var->counter;
+ var->counter = i;
+}
+
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_SET_RELB 1
#else
- val = ETHR_NATMC_FUNC__(add_return)(var, 0);
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_SET_RELB 1
#endif
- __asm__ __volatile__("" : : : "memory");
- return val;
-}
static ETHR_INLINE void
ETHR_NATMC_FUNC__(set_relb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ i)
{
- __asm__ __volatile__("" : : : "memory");
-#if defined(__x86_64__) || !defined(ETHR_PRE_PENTIUM4_COMPAT)
- var->counter = i;
+#if defined(_M_IX86)
+ if (ETHR_X86_RUNTIME_CONF_HAVE_NO_SSE2__)
+ (void) ETHR_NATMC_FUNC__(xchg_mb)(var, i);
+ else
+#endif /* _M_IX86 */
+ {
+ ETHR_MEMBAR(ETHR_LoadStore|ETHR_StoreStore);
+ var->counter = i;
+ }
+}
+
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_SET_MB 1
#else
- (void) ETHR_NATMC_FUNC__(xchg)(var, i);
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_SET_MB 1
#endif
+
+static ETHR_INLINE void
+ETHR_NATMC_FUNC__(set_mb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ i)
+{
+ (void) ETHR_NATMC_FUNC__(xchg_mb)(var, i);
}
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_READ 1
+#else
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_READ 1
+#endif
+
static ETHR_INLINE ETHR_AINT_T__
-ETHR_NATMC_FUNC__(inc_return_acqb)(ETHR_ATMC_T__ *var)
+ETHR_NATMC_FUNC__(read)(ETHR_ATMC_T__ *var)
{
- ETHR_AINT_T__ res = ETHR_NATMC_FUNC__(inc_return)(var);
- __asm__ __volatile__("" : : : "memory");
- return res;
+ return var->counter;
}
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_ADD_MB 1
+#else
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_ADD_MB 1
+#endif
+
static ETHR_INLINE void
-ETHR_NATMC_FUNC__(dec_relb)(ETHR_ATMC_T__ *var)
+ETHR_NATMC_FUNC__(add_mb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ incr)
{
- __asm__ __volatile__("" : : : "memory");
- ETHR_NATMC_FUNC__(dec)(var);
-}
+ __asm__ __volatile__(
+ "lock; add" ETHR_AINT_SUFFIX__ " %1, %0"
+ : "=m"(var->counter)
+ : "ir"(incr), "m"(var->counter)
+ : "memory");
+}
-static ETHR_INLINE ETHR_AINT_T__
-ETHR_NATMC_FUNC__(dec_return_relb)(ETHR_ATMC_T__ *var)
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_INC_MB 1
+#else
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_INC_MB 1
+#endif
+
+static ETHR_INLINE void
+ETHR_NATMC_FUNC__(inc_mb)(ETHR_ATMC_T__ *var)
{
- __asm__ __volatile__("" : : : "memory");
- return ETHR_NATMC_FUNC__(dec_return)(var);
+ __asm__ __volatile__(
+ "lock; inc" ETHR_AINT_SUFFIX__ " %0"
+ : "=m"(var->counter)
+ : "m"(var->counter)
+ : "memory");
}
-static ETHR_INLINE ETHR_AINT_T__
-ETHR_NATMC_FUNC__(cmpxchg_acqb)(ETHR_ATMC_T__ *var,
- ETHR_AINT_T__ new,
- ETHR_AINT_T__ old)
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_DEC_MB 1
+#else
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_DEC_MB 1
+#endif
+
+static ETHR_INLINE void
+ETHR_NATMC_FUNC__(dec_mb)(ETHR_ATMC_T__ *var)
{
- return ETHR_NATMC_FUNC__(cmpxchg)(var, new, old);
+ __asm__ __volatile__(
+ "lock; dec" ETHR_AINT_SUFFIX__ " %0"
+ : "=m"(var->counter)
+ : "m"(var->counter)
+ : "memory");
}
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_ADD_RETURN_MB 1
+#else
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_ADD_RETURN_MB 1
+#endif
+
static ETHR_INLINE ETHR_AINT_T__
-ETHR_NATMC_FUNC__(cmpxchg_relb)(ETHR_ATMC_T__ *var,
- ETHR_AINT_T__ new,
- ETHR_AINT_T__ old)
+ETHR_NATMC_FUNC__(add_return_mb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ incr)
{
- return ETHR_NATMC_FUNC__(cmpxchg)(var, new, old);
+ ETHR_AINT_T__ tmp;
+
+ tmp = incr;
+ __asm__ __volatile__(
+ "lock; xadd" ETHR_AINT_SUFFIX__ " %0, %1" /* xadd didn't exist prior to the 486 */
+ : "=r"(tmp)
+ : "m"(var->counter), "0"(tmp)
+ : "memory");
+ /* now tmp is the atomic's previous value */
+ return tmp + incr;
}
#endif /* ETHR_TRY_INLINE_FUNCS */
diff --git a/erts/include/internal/i386/ethr_dw_atomic.h b/erts/include/internal/i386/ethr_dw_atomic.h
new file mode 100644
index 0000000000..9fb89bbe43
--- /dev/null
+++ b/erts/include/internal/i386/ethr_dw_atomic.h
@@ -0,0 +1,278 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2011. All Rights Reserved.
+ *
+ * The contents of this file are subject to the Erlang Public License,
+ * Version 1.1, (the "License"); you may not use this file except in
+ * compliance with the License. You should have received a copy of the
+ * Erlang Public License along with this software. If not, it can be
+ * retrieved online at http://www.erlang.org/.
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/*
+ * Description: Native double word atomics for x86/x86_64
+ * Author: Rickard Green
+ */
+
+#ifndef ETHR_X86_DW_ATOMIC_H__
+#define ETHR_X86_DW_ATOMIC_H__
+
+#ifdef ETHR_GCC_HAVE_DW_CMPXCHG_ASM_SUPPORT
+
+#define ETHR_HAVE_NATIVE_DW_ATOMIC
+#define ETHR_NATIVE_DW_ATOMIC_IMPL "ethread"
+
+/*
+ * If ETHR_RTCHK_USE_NATIVE_DW_ATOMIC_IMPL__ is defined, it will be used
+ * at runtime in order to determine if native or fallback implementation
+ * should be used.
+ */
+#define ETHR_RTCHK_USE_NATIVE_DW_ATOMIC_IMPL__ \
+ ETHR_X86_RUNTIME_CONF_HAVE_DW_CMPXCHG__
+
+#if ETHR_SIZEOF_PTR == 4
+typedef volatile ethr_sint64_t * ethr_native_dw_ptr_t;
+# define ETHR_DW_NATMC_ALIGN_MASK__ 0x7
+# define ETHR_DW_CMPXCHG_SFX__ "8b"
+# define ETHR_NATIVE_SU_DW_SINT_T ethr_sint64_t
+#else
+#ifdef ETHR_HAVE_INT128_T
+# define ETHR_NATIVE_SU_DW_SINT_T ethr_sint128_t
+typedef volatile ethr_sint128_t * ethr_native_dw_ptr_t;
+#else
+typedef struct {
+ ethr_sint64_t sint64[2];
+} ethr_native_sint128_t__;
+typedef volatile ethr_native_sint128_t__ * ethr_native_dw_ptr_t;
+#endif
+# define ETHR_DW_NATMC_ALIGN_MASK__ 0xf
+# define ETHR_DW_CMPXCHG_SFX__ "16b"
+#endif
+
+/*
+ * We need 16 byte aligned memory in 64-bit mode, and 8 byte aligned
+ * memory in 32-bit mode. 16 byte aligned malloc in 64-bit mode is
+ * not common, and at least some glibc malloc implementations
+ * only 4 byte align in 32-bit mode.
+ *
+ * This code assumes 8 byte aligned memory in 64-bit mode, and 4 byte
+ * aligned memory in 32-bit mode. A malloc implementation that does
+ * not adhere to these alignment requirements is seriously broken,
+ * and we wont bother trying to work around it.
+ *
+ * Since memory alignment may be off by one word we need to align at
+ * runtime. We, therefore, need an extra word allocated.
+ */
+#define ETHR_DW_NATMC_MEM__(VAR) \
+ (&var->c[(int) ((ethr_uint_t) &(VAR)->c[0]) & ETHR_DW_NATMC_ALIGN_MASK__])
+typedef union {
+#ifdef ETHR_NATIVE_SU_DW_SINT_T
+ volatile ETHR_NATIVE_SU_DW_SINT_T dw_sint;
+#endif
+ volatile ethr_sint_t sint[3];
+ volatile char c[ETHR_SIZEOF_PTR*3];
+} ethr_native_dw_atomic_t;
+
+
+#if (defined(ETHR_TRY_INLINE_FUNCS) \
+ || defined(ETHR_ATOMIC_IMPL__) \
+ || defined(ETHR_X86_SSE2_ASM_C__)) \
+ && ETHR_SIZEOF_PTR == 4 \
+ && defined(ETHR_GCC_HAVE_SSE2_ASM_SUPPORT)
+ethr_sint64_t
+ethr_sse2_native_su_dw_atomic_read(ethr_native_dw_atomic_t *var);
+void
+ethr_sse2_native_su_dw_atomic_set(ethr_native_dw_atomic_t *var,
+ ethr_sint64_t val);
+#endif
+
+#if (defined(ETHR_TRY_INLINE_FUNCS) \
+ || defined(ETHR_ATOMIC_IMPL__) \
+ || defined(ETHR_X86_SSE2_ASM_C__))
+# ifdef ETHR_DEBUG
+# define ETHR_DW_DBG_ALIGNED__(PTR) \
+ ETHR_ASSERT((((ethr_uint_t) (PTR)) & ETHR_DW_NATMC_ALIGN_MASK__) == 0);
+# else
+# define ETHR_DW_DBG_ALIGNED__(PTR)
+# endif
+#endif
+
+#if defined(ETHR_TRY_INLINE_FUNCS) || defined(ETHR_ATOMIC_IMPL__)
+
+#define ETHR_HAVE_ETHR_NATIVE_DW_ATOMIC_ADDR
+static ETHR_INLINE ethr_sint_t *
+ethr_native_dw_atomic_addr(ethr_native_dw_atomic_t *var)
+{
+ return (ethr_sint_t *) ETHR_DW_NATMC_MEM__(var);
+}
+
+#if ETHR_SIZEOF_PTR == 4 && defined(__PIC__) && __PIC__
+/*
+ * When position independent code is used in 32-bit mode, the EBX register
+ * is used for storage of global offset table address, and we may not
+ * use it as input or output in an asm. We need to save and restore the
+ * EBX register explicitly (for some reason gcc doesn't provide this
+ * service to us).
+ */
+# define ETHR_NO_CLOBBER_EBX__ 1
+#else
+# define ETHR_NO_CLOBBER_EBX__ 0
+#endif
+
+#if ETHR_NO_CLOBBER_EBX__ && !defined(ETHR_CMPXCHG8B_REGISTER_SHORTAGE)
+/* When no optimization is on, we'll run into a register shortage */
+# if defined(ETHR_DEBUG) || defined(DEBUG) || defined(VALGRIND) \
+ || defined(GCOV) || defined(PURIFY) || defined(PURECOV)
+# define ETHR_CMPXCHG8B_REGISTER_SHORTAGE 1
+# else
+# define ETHR_CMPXCHG8B_REGISTER_SHORTAGE 0
+# endif
+#endif
+
+
+#define ETHR_HAVE_ETHR_NATIVE_DW_ATOMIC_CMPXCHG_MB
+
+static ETHR_INLINE int
+ethr_native_dw_atomic_cmpxchg_mb(ethr_native_dw_atomic_t *var,
+ ethr_sint_t *new,
+ ethr_sint_t *xchg)
+{
+ ethr_native_dw_ptr_t p = (ethr_native_dw_ptr_t) ETHR_DW_NATMC_MEM__(var);
+ char xchgd;
+
+ ETHR_DW_DBG_ALIGNED__(p);
+
+ __asm__ __volatile__(
+#if ETHR_NO_CLOBBER_EBX__
+ "pushl %%ebx\n\t"
+# if ETHR_CMPXCHG8B_REGISTER_SHORTAGE
+ "movl (%7), %%ebx\n\t"
+ "movl 4(%7), %%ecx\n\t"
+# else
+ "movl %8, %%ebx\n\t"
+# endif
+#endif
+ "lock; cmpxchg" ETHR_DW_CMPXCHG_SFX__ " %0\n\t"
+ "setz %3\n\t"
+#if ETHR_NO_CLOBBER_EBX__
+ "popl %%ebx\n\t"
+#endif
+ : "=m"(*p), "=d"(xchg[1]), "=a"(xchg[0]), "=c"(xchgd)
+ : "m"(*p), "1"(xchg[1]), "2"(xchg[0]),
+#if ETHR_NO_CLOBBER_EBX__
+# if ETHR_CMPXCHG8B_REGISTER_SHORTAGE
+ "3"(new)
+# else
+ "3"(new[1]),
+ "r"(new[0])
+# endif
+#else
+ "3"(new[1]),
+ "b"(new[0])
+#endif
+ : "cc", "memory");
+
+ return (int) xchgd;
+}
+
+#undef ETHR_NO_CLOBBER_EBX__
+
+#if ETHR_SIZEOF_PTR == 4 && defined(ETHR_GCC_HAVE_SSE2_ASM_SUPPORT)
+
+typedef union {
+ ethr_sint64_t sint64;
+ ethr_sint_t sint[2];
+} ethr_dw_atomic_no_sse2_convert_t;
+
+#define ETHR_HAVE_ETHR_NATIVE_SU_DW_ATOMIC_READ
+
+static ETHR_INLINE ethr_sint64_t
+ethr_native_su_dw_atomic_read(ethr_native_dw_atomic_t *var)
+{
+ if (ETHR_X86_RUNTIME_CONF_HAVE_SSE2__)
+ return ethr_sse2_native_su_dw_atomic_read(var);
+ else {
+ ethr_sint_t new[2];
+ ethr_dw_atomic_no_sse2_convert_t xchg;
+ new[0] = new[1] = xchg.sint[0] = xchg.sint[1] = 0x83838383;
+ (void) ethr_native_dw_atomic_cmpxchg_mb(var, new, xchg.sint);
+ return xchg.sint64;
+ }
+}
+
+#define ETHR_HAVE_ETHR_NATIVE_SU_DW_ATOMIC_SET
+
+static ETHR_INLINE void
+ethr_native_su_dw_atomic_set(ethr_native_dw_atomic_t *var,
+ ethr_sint64_t val)
+{
+ if (ETHR_X86_RUNTIME_CONF_HAVE_SSE2__)
+ ethr_sse2_native_su_dw_atomic_set(var, val);
+ else {
+ ethr_sint_t xchg[2] = {0, 0};
+ ethr_dw_atomic_no_sse2_convert_t new;
+ new.sint64 = val;
+ while (!ethr_native_dw_atomic_cmpxchg_mb(var, new.sint, xchg));
+ }
+}
+
+#endif /* ETHR_SIZEOF_PTR == 4 */
+
+#endif /* ETHR_TRY_INLINE_FUNCS */
+
+#if defined(ETHR_X86_SSE2_ASM_C__) \
+ && ETHR_SIZEOF_PTR == 4 \
+ && defined(ETHR_GCC_HAVE_SSE2_ASM_SUPPORT)
+
+/*
+ * 8-byte aligned loads and stores of 64-bit values are atomic from
+ * pentium and forward. An ordinary volatile load or store in 32-bit
+ * mode generates two 32-bit operations (at least with gcc-4.1.2 using
+ * -msse2). In order to guarantee one 64-bit load/store operation
+ * from/to memory we load/store via an xmm register using movq.
+ *
+ * Load/store can be achieved using cmpxchg8b, however, using movq is
+ * much faster. Unfortunately we cannot do the same thing in 64-bit
+ * mode; instead, we have to do loads and stores via cmpxchg16b.
+ *
+ * We do not inline these, but instead compile these into a separate
+ * object file using -msse2. This since we don't want to use -msse2 for
+ * the whole system. If we detect sse2 support (pentium4 and forward)
+ * at runtime, we use them; otherwise, we fall back to using cmpxchg8b
+ * for loads and stores. This way the binary can be moved between
+ * processors with and without sse2 support.
+ */
+
+ethr_sint64_t
+ethr_sse2_native_su_dw_atomic_read(ethr_native_dw_atomic_t *var)
+{
+ ethr_native_dw_ptr_t p = (ethr_native_dw_ptr_t) ETHR_DW_NATMC_MEM__(var);
+ ethr_sint64_t val;
+ ETHR_DW_DBG_ALIGNED__(p);
+ __asm__ __volatile__("movq %1, %0\n\t" : "=x"(val) : "m"(*p) : "memory");
+ return val;
+}
+
+void
+ethr_sse2_native_su_dw_atomic_set(ethr_native_dw_atomic_t *var,
+ ethr_sint64_t val)
+{
+ ethr_native_dw_ptr_t p = (ethr_native_dw_ptr_t) ETHR_DW_NATMC_MEM__(var);
+ ETHR_DW_DBG_ALIGNED__(p);
+ __asm__ __volatile__("movq %1, %0\n\t" : "=m"(*p) : "x"(val) : "memory");
+}
+
+#endif /* ETHR_X86_SSE2_ASM_C__ */
+
+#endif /* ETHR_GCC_HAVE_DW_CMPXCHG_ASM_SUPPORT */
+
+#endif /* ETHR_X86_DW_ATOMIC_H__ */
+
diff --git a/erts/include/internal/i386/ethr_membar.h b/erts/include/internal/i386/ethr_membar.h
new file mode 100644
index 0000000000..92d9de7f3f
--- /dev/null
+++ b/erts/include/internal/i386/ethr_membar.h
@@ -0,0 +1,114 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2011. All Rights Reserved.
+ *
+ * The contents of this file are subject to the Erlang Public License,
+ * Version 1.1, (the "License"); you may not use this file except in
+ * compliance with the License. You should have received a copy of the
+ * Erlang Public License along with this software. If not, it can be
+ * retrieved online at http://www.erlang.org/.
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/*
+ * Description: Memory barriers for x86/x86-64
+ * Author: Rickard Green
+ */
+
+#ifndef ETHR_X86_MEMBAR_H__
+#define ETHR_X86_MEMBAR_H__
+
+#define ETHR_LoadLoad (1 << 0)
+#define ETHR_LoadStore (1 << 1)
+#define ETHR_StoreLoad (1 << 2)
+#define ETHR_StoreStore (1 << 3)
+
+#define ETHR_NO_SSE2_MEMORY_BARRIER__ \
+do { \
+ volatile ethr_sint32_t x__ = 0; \
+ __asm__ __volatile__ ("lock; orl $0x0, %0\n\t" \
+ : "=m"(x__) \
+ : "m"(x__) \
+ : "memory"); \
+} while (0)
+
+static __inline__ void
+ethr_cfence__(void)
+{
+ __asm__ __volatile__ ("" : : : "memory");
+}
+
+static __inline__ void
+ethr_mfence__(void)
+{
+#if ETHR_SIZEOF_PTR == 4
+ if (ETHR_X86_RUNTIME_CONF_HAVE_NO_SSE2__)
+ ETHR_NO_SSE2_MEMORY_BARRIER__;
+ else
+#endif
+ __asm__ __volatile__ ("mfence\n\t" : : : "memory");
+}
+
+static __inline__ void
+ethr_sfence__(void)
+{
+#if ETHR_SIZEOF_PTR == 4
+ if (ETHR_X86_RUNTIME_CONF_HAVE_NO_SSE2__)
+ ETHR_NO_SSE2_MEMORY_BARRIER__;
+ else
+#endif
+ __asm__ __volatile__ ("sfence\n\t" : : : "memory");
+}
+
+static __inline__ void
+ethr_lfence__(void)
+{
+#if ETHR_SIZEOF_PTR == 4
+ if (ETHR_X86_RUNTIME_CONF_HAVE_NO_SSE2__)
+ ETHR_NO_SSE2_MEMORY_BARRIER__;
+ else
+#endif
+ __asm__ __volatile__ ("lfence\n\t" : : : "memory");
+}
+
+#define ETHR_X86_OUT_OF_ORDER_MEMBAR(B) \
+ ETHR_CHOOSE_EXPR((B) == ETHR_StoreStore, \
+ ethr_sfence__(), \
+ ETHR_CHOOSE_EXPR((B) == ETHR_LoadLoad, \
+ ethr_lfence__(), \
+ ethr_mfence__()))
+
+#ifdef ETHR_X86_OUT_OF_ORDER
+
+#define ETHR_MEMBAR(B) \
+ ETHR_X86_OUT_OF_ORDER_MEMBAR((B))
+
+#else /* !ETHR_X86_OUT_OF_ORDER (the default) */
+
+/*
+ * We assume that only stores before loads may be reordered. That is,
+ * we assume that *no* instructions like these are used:
+ * - CLFLUSH,
+ * - streaming stores executed with non-temporal move,
+ * - string operations, or
+ * - other instructions which aren't LoadLoad, LoadStore, and StoreStore
+ * ordered by themselves
+ * If such instructions are used, either insert memory barriers
+ * using ETHR_X86_OUT_OF_ORDER_MEMBAR() at appropriate places, or
+ * define ETHR_X86_OUT_OF_ORDER. For more info see Intel 64 and IA-32
+ * Architectures Software Developer's Manual; Vol 3A; Chapter 8.2.2.
+ */
+
+#define ETHR_MEMBAR(B) \
+ ETHR_CHOOSE_EXPR((B) & ETHR_StoreLoad, ethr_mfence__(), ethr_cfence__())
+
+#endif /* !ETHR_X86_OUT_OF_ORDER */
+
+#endif /* ETHR_X86_MEMBAR_H__ */
diff --git a/erts/include/internal/i386/ethread.h b/erts/include/internal/i386/ethread.h
index b5a17caefb..80e4dc7b99 100644
--- a/erts/include/internal/i386/ethread.h
+++ b/erts/include/internal/i386/ethread.h
@@ -1,7 +1,7 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2005-2010. All Rights Reserved.
+ * Copyright Ericsson AB 2005-2011. All Rights Reserved.
*
* The contents of this file are subject to the Erlang Public License,
* Version 1.1, (the "License"); you may not use this file except in
@@ -24,17 +24,15 @@
#ifndef ETHREAD_I386_ETHREAD_H
#define ETHREAD_I386_ETHREAD_H
+#include "ethr_membar.h"
#define ETHR_ATOMIC_WANT_32BIT_IMPL__
#include "atomic.h"
#if ETHR_SIZEOF_PTR == 8
# define ETHR_ATOMIC_WANT_64BIT_IMPL__
# include "atomic.h"
#endif
+#include "ethr_dw_atomic.h"
#include "spinlock.h"
#include "rwlock.h"
-#define ETHR_HAVE_NATIVE_ATOMICS 1
-#define ETHR_HAVE_NATIVE_SPINLOCKS 1
-#define ETHR_HAVE_NATIVE_RWSPINLOCKS 1
-
#endif /* ETHREAD_I386_ETHREAD_H */
diff --git a/erts/include/internal/i386/rwlock.h b/erts/include/internal/i386/rwlock.h
index be47f459ce..1a8cd7da0c 100644
--- a/erts/include/internal/i386/rwlock.h
+++ b/erts/include/internal/i386/rwlock.h
@@ -1,7 +1,7 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2005-2010. All Rights Reserved.
+ * Copyright Ericsson AB 2005-2011. All Rights Reserved.
*
* The contents of this file are subject to the Erlang Public License,
* Version 1.1, (the "License"); you may not use this file except in
@@ -26,6 +26,9 @@
#ifndef ETHREAD_I386_RWLOCK_H
#define ETHREAD_I386_RWLOCK_H
+#define ETHR_HAVE_NATIVE_RWSPINLOCKS 1
+#define ETHR_NATIVE_RWSPINLOCK_IMPL "ethread"
+
/* XXX: describe the algorithm */
typedef struct {
volatile int lock;
diff --git a/erts/include/internal/i386/spinlock.h b/erts/include/internal/i386/spinlock.h
index 0325324895..a84fba91b1 100644
--- a/erts/include/internal/i386/spinlock.h
+++ b/erts/include/internal/i386/spinlock.h
@@ -1,7 +1,7 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2005-2010. All Rights Reserved.
+ * Copyright Ericsson AB 2005-2011. All Rights Reserved.
*
* The contents of this file are subject to the Erlang Public License,
* Version 1.1, (the "License"); you may not use this file except in
@@ -24,6 +24,9 @@
#ifndef ETHREAD_I386_SPINLOCK_H
#define ETHREAD_I386_SPINLOCK_H
+#define ETHR_HAVE_NATIVE_SPINLOCKS 1
+#define ETHR_NATIVE_SPINLOCK_IMPL "ethread"
+
/* A spinlock is the low byte of an aligned 32-bit integer.
* A non-zero value means that the lock is locked.
*/
@@ -46,16 +49,20 @@ ethr_native_spin_unlock(ethr_native_spinlock_t *lock)
* On i386 this needs to be a locked operation
* to avoid Pentium Pro errata 66 and 92.
*/
-#if defined(__x86_64__) || !defined(ETHR_PRE_PENTIUM4_COMPAT)
- __asm__ __volatile__("" : : : "memory");
- *(unsigned char*)&lock->lock = 0;
-#else
- char tmp = 0;
- __asm__ __volatile__(
- "xchgb %b0, %1"
- : "=q"(tmp), "=m"(lock->lock)
- : "0"(tmp) : "memory");
+#if !defined(__x86_64__)
+ if (ETHR_X86_RUNTIME_CONF_HAVE_NO_SSE2__) {
+ char tmp = 0;
+ __asm__ __volatile__(
+ "xchgb %b0, %1"
+ : "=q"(tmp), "=m"(lock->lock)
+ : "0"(tmp) : "memory");
+ }
+ else
#endif
+ {
+ ETHR_MEMBAR(ETHR_LoadStore|ETHR_StoreStore);
+ *(unsigned char*)&lock->lock = 0;
+ }
}
static ETHR_INLINE int