aboutsummaryrefslogtreecommitdiffstats
path: root/erts/include/internal/i386
diff options
context:
space:
mode:
Diffstat (limited to 'erts/include/internal/i386')
-rw-r--r--erts/include/internal/i386/atomic.h278
-rw-r--r--erts/include/internal/i386/ethr_dw_atomic.h278
-rw-r--r--erts/include/internal/i386/ethr_membar.h114
-rw-r--r--erts/include/internal/i386/ethread.h12
-rw-r--r--erts/include/internal/i386/rwlock.h15
-rw-r--r--erts/include/internal/i386/spinlock.h37
6 files changed, 620 insertions, 114 deletions
diff --git a/erts/include/internal/i386/atomic.h b/erts/include/internal/i386/atomic.h
index 3291ad38e5..fc1b619935 100644
--- a/erts/include/internal/i386/atomic.h
+++ b/erts/include/internal/i386/atomic.h
@@ -1,19 +1,19 @@
/*
* %CopyrightBegin%
- *
- * Copyright Ericsson AB 2005-2009. All Rights Reserved.
- *
+ *
+ * Copyright Ericsson AB 2005-2011. All Rights Reserved.
+ *
* The contents of this file are subject to the Erlang Public License,
* Version 1.1, (the "License"); you may not use this file except in
* compliance with the License. You should have received a copy of the
* Erlang Public License along with this software. If not, it can be
* retrieved online at http://www.erlang.org/.
- *
+ *
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
* the License for the specific language governing rights and limitations
* under the License.
- *
+ *
* %CopyrightEnd%
*/
@@ -23,133 +23,233 @@
*
* This code requires a 486 or newer processor.
*/
-#ifndef ETHREAD_I386_ATOMIC_H
-#define ETHREAD_I386_ATOMIC_H
-/* An atomic is an aligned long accessed via locked operations.
+#undef ETHR_INCLUDE_ATOMIC_IMPL__
+#if !defined(ETHR_X86_ATOMIC32_H__) \
+ && defined(ETHR_ATOMIC_WANT_32BIT_IMPL__)
+# define ETHR_X86_ATOMIC32_H__
+# define ETHR_INCLUDE_ATOMIC_IMPL__ 4
+# undef ETHR_ATOMIC_WANT_32BIT_IMPL__
+#elif !defined(ETHR_X86_ATOMIC64_H__) \
+ && defined(ETHR_ATOMIC_WANT_64BIT_IMPL__)
+# define ETHR_X86_ATOMIC64_H__
+# define ETHR_INCLUDE_ATOMIC_IMPL__ 8
+# undef ETHR_ATOMIC_WANT_64BIT_IMPL__
+#endif
+
+#ifdef ETHR_INCLUDE_ATOMIC_IMPL__
+
+# ifndef ETHR_X86_ATOMIC_COMMON__
+# define ETHR_X86_ATOMIC_COMMON__
+# define ETHR_ATOMIC_HAVE_INC_DEC_INSTRUCTIONS 1
+# endif /* ETHR_X86_ATOMIC_COMMON__ */
+
+# if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+# define ETHR_HAVE_NATIVE_ATOMIC32 1
+# define ETHR_NATIVE_ATOMIC32_IMPL "ethread"
+# define ETHR_NATMC_FUNC__(X) ethr_native_atomic32_ ## X
+# define ETHR_ATMC_T__ ethr_native_atomic32_t
+# define ETHR_AINT_T__ ethr_sint32_t
+# define ETHR_AINT_SUFFIX__ "l"
+# elif ETHR_INCLUDE_ATOMIC_IMPL__ == 8
+# define ETHR_HAVE_NATIVE_ATOMIC64 1
+# define ETHR_NATIVE_ATOMIC64_IMPL "ethread"
+# define ETHR_NATMC_FUNC__(X) ethr_native_atomic64_ ## X
+# define ETHR_ATMC_T__ ethr_native_atomic64_t
+# define ETHR_AINT_T__ ethr_sint64_t
+# define ETHR_AINT_SUFFIX__ "q"
+# else
+# error "Unsupported integer size"
+# endif
+
+/* An atomic is an aligned ETHR_AINT_T__ accessed via locked operations.
*/
typedef struct {
- volatile long counter;
-} ethr_native_atomic_t;
+ volatile ETHR_AINT_T__ counter;
+} ETHR_ATMC_T__;
+
+#if defined(ETHR_TRY_INLINE_FUNCS) || defined(ETHR_ATOMIC_IMPL__)
+
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_ADDR 1
+#else
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_ADDR 1
+#endif
+
+static ETHR_INLINE ETHR_AINT_T__ *
+ETHR_NATMC_FUNC__(addr)(ETHR_ATMC_T__ *var)
+{
+ return (ETHR_AINT_T__ *) &var->counter;
+}
+
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_CMPXCHG_MB 1
+#else
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_CMPXCHG_MB 1
+#endif
+
+static ETHR_INLINE ETHR_AINT_T__
+ETHR_NATMC_FUNC__(cmpxchg_mb)(ETHR_ATMC_T__ *var,
+ ETHR_AINT_T__ new,
+ ETHR_AINT_T__ old)
+{
+ __asm__ __volatile__(
+ "lock; cmpxchg" ETHR_AINT_SUFFIX__ " %2, %3"
+ : "=a"(old), "=m"(var->counter)
+ : "r"(new), "m"(var->counter), "0"(old)
+ : "cc", "memory"); /* full memory clobber to make this a compiler barrier */
+ return old;
+}
+
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_XCHG_MB 1
+#else
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_XCHG_MB 1
+#endif
-#ifdef ETHR_TRY_INLINE_FUNCS
+static ETHR_INLINE ETHR_AINT_T__
+ETHR_NATMC_FUNC__(xchg_mb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ val)
+{
+ ETHR_AINT_T__ tmp = val;
+ __asm__ __volatile__(
+ "xchg" ETHR_AINT_SUFFIX__ " %0, %1"
+ : "=r"(tmp)
+ : "m"(var->counter), "0"(tmp)
+ : "memory");
+ /* now tmp is the atomic's previous value */
+ return tmp;
+}
-#ifdef __x86_64__
-#define LONG_SUFFIX "q"
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_SET 1
#else
-#define LONG_SUFFIX "l"
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_SET 1
#endif
static ETHR_INLINE void
-ethr_native_atomic_init(ethr_native_atomic_t *var, long i)
+ETHR_NATMC_FUNC__(set)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ i)
{
var->counter = i;
}
-#define ethr_native_atomic_set(v, i) ethr_native_atomic_init((v), (i))
-static ETHR_INLINE long
-ethr_native_atomic_read(ethr_native_atomic_t *var)
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_SET_RELB 1
+#else
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_SET_RELB 1
+#endif
+
+static ETHR_INLINE void
+ETHR_NATMC_FUNC__(set_relb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ i)
+{
+#if defined(_M_IX86)
+ if (ETHR_X86_RUNTIME_CONF_HAVE_NO_SSE2__)
+ (void) ETHR_NATMC_FUNC__(xchg_mb)(var, i);
+ else
+#endif /* _M_IX86 */
+ {
+ ETHR_MEMBAR(ETHR_LoadStore|ETHR_StoreStore);
+ var->counter = i;
+ }
+}
+
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_SET_MB 1
+#else
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_SET_MB 1
+#endif
+
+static ETHR_INLINE void
+ETHR_NATMC_FUNC__(set_mb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ i)
+{
+ (void) ETHR_NATMC_FUNC__(xchg_mb)(var, i);
+}
+
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_READ 1
+#else
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_READ 1
+#endif
+
+static ETHR_INLINE ETHR_AINT_T__
+ETHR_NATMC_FUNC__(read)(ETHR_ATMC_T__ *var)
{
return var->counter;
}
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_ADD_MB 1
+#else
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_ADD_MB 1
+#endif
+
static ETHR_INLINE void
-ethr_native_atomic_add(ethr_native_atomic_t *var, long incr)
+ETHR_NATMC_FUNC__(add_mb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ incr)
{
__asm__ __volatile__(
- "lock; add" LONG_SUFFIX " %1, %0"
+ "lock; add" ETHR_AINT_SUFFIX__ " %1, %0"
: "=m"(var->counter)
- : "ir"(incr), "m"(var->counter));
+ : "ir"(incr), "m"(var->counter)
+ : "memory");
}
+
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_INC_MB 1
+#else
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_INC_MB 1
+#endif
static ETHR_INLINE void
-ethr_native_atomic_inc(ethr_native_atomic_t *var)
+ETHR_NATMC_FUNC__(inc_mb)(ETHR_ATMC_T__ *var)
{
__asm__ __volatile__(
- "lock; inc" LONG_SUFFIX " %0"
+ "lock; inc" ETHR_AINT_SUFFIX__ " %0"
: "=m"(var->counter)
- : "m"(var->counter));
+ : "m"(var->counter)
+ : "memory");
}
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_DEC_MB 1
+#else
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_DEC_MB 1
+#endif
+
static ETHR_INLINE void
-ethr_native_atomic_dec(ethr_native_atomic_t *var)
+ETHR_NATMC_FUNC__(dec_mb)(ETHR_ATMC_T__ *var)
{
__asm__ __volatile__(
- "lock; dec" LONG_SUFFIX " %0"
+ "lock; dec" ETHR_AINT_SUFFIX__ " %0"
: "=m"(var->counter)
- : "m"(var->counter));
+ : "m"(var->counter)
+ : "memory");
}
-static ETHR_INLINE long
-ethr_native_atomic_add_return(ethr_native_atomic_t *var, long incr)
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_ADD_RETURN_MB 1
+#else
+# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_ADD_RETURN_MB 1
+#endif
+
+static ETHR_INLINE ETHR_AINT_T__
+ETHR_NATMC_FUNC__(add_return_mb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ incr)
{
- long tmp;
+ ETHR_AINT_T__ tmp;
tmp = incr;
__asm__ __volatile__(
- "lock; xadd" LONG_SUFFIX " %0, %1" /* xadd didn't exist prior to the 486 */
+ "lock; xadd" ETHR_AINT_SUFFIX__ " %0, %1" /* xadd didn't exist prior to the 486 */
: "=r"(tmp)
- : "m"(var->counter), "0"(tmp));
+ : "m"(var->counter), "0"(tmp)
+ : "memory");
/* now tmp is the atomic's previous value */
return tmp + incr;
}
-#define ethr_native_atomic_inc_return(var) ethr_native_atomic_add_return((var), 1)
-#define ethr_native_atomic_dec_return(var) ethr_native_atomic_add_return((var), -1)
-
-static ETHR_INLINE long
-ethr_native_atomic_cmpxchg(ethr_native_atomic_t *var, long new, long old)
-{
- __asm__ __volatile__(
- "lock; cmpxchg" LONG_SUFFIX " %2, %3"
- : "=a"(old), "=m"(var->counter)
- : "r"(new), "m"(var->counter), "0"(old)
- : "cc", "memory"); /* full memory clobber to make this a compiler barrier */
- return old;
-}
-
-static ETHR_INLINE long
-ethr_native_atomic_and_retold(ethr_native_atomic_t *var, long mask)
-{
- long tmp, old;
-
- tmp = var->counter;
- do {
- old = tmp;
- tmp = ethr_native_atomic_cmpxchg(var, tmp & mask, tmp);
- } while (__builtin_expect(tmp != old, 0));
- /* now tmp is the atomic's previous value */
- return tmp;
-}
-
-static ETHR_INLINE long
-ethr_native_atomic_or_retold(ethr_native_atomic_t *var, long mask)
-{
- long tmp, old;
-
- tmp = var->counter;
- do {
- old = tmp;
- tmp = ethr_native_atomic_cmpxchg(var, tmp | mask, tmp);
- } while (__builtin_expect(tmp != old, 0));
- /* now tmp is the atomic's previous value */
- return tmp;
-}
-
-static ETHR_INLINE long
-ethr_native_atomic_xchg(ethr_native_atomic_t *var, long val)
-{
- long tmp = val;
- __asm__ __volatile__(
- "xchg" LONG_SUFFIX " %0, %1"
- : "=r"(tmp)
- : "m"(var->counter), "0"(tmp));
- /* now tmp is the atomic's previous value */
- return tmp;
-}
-
-#undef LONG_SUFFIX
-
#endif /* ETHR_TRY_INLINE_FUNCS */
-#endif /* ETHREAD_I386_ATOMIC_H */
+#undef ETHR_NATMC_FUNC__
+#undef ETHR_ATMC_T__
+#undef ETHR_AINT_T__
+#undef ETHR_AINT_SUFFIX__
+
+#endif /* ETHR_INCLUDE_ATOMIC_IMPL__ */
diff --git a/erts/include/internal/i386/ethr_dw_atomic.h b/erts/include/internal/i386/ethr_dw_atomic.h
new file mode 100644
index 0000000000..9fb89bbe43
--- /dev/null
+++ b/erts/include/internal/i386/ethr_dw_atomic.h
@@ -0,0 +1,278 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2011. All Rights Reserved.
+ *
+ * The contents of this file are subject to the Erlang Public License,
+ * Version 1.1, (the "License"); you may not use this file except in
+ * compliance with the License. You should have received a copy of the
+ * Erlang Public License along with this software. If not, it can be
+ * retrieved online at http://www.erlang.org/.
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/*
+ * Description: Native double word atomics for x86/x86_64
+ * Author: Rickard Green
+ */
+
+#ifndef ETHR_X86_DW_ATOMIC_H__
+#define ETHR_X86_DW_ATOMIC_H__
+
+#ifdef ETHR_GCC_HAVE_DW_CMPXCHG_ASM_SUPPORT
+
+#define ETHR_HAVE_NATIVE_DW_ATOMIC
+#define ETHR_NATIVE_DW_ATOMIC_IMPL "ethread"
+
+/*
+ * If ETHR_RTCHK_USE_NATIVE_DW_ATOMIC_IMPL__ is defined, it will be used
+ * at runtime in order to determine if native or fallback implementation
+ * should be used.
+ */
+#define ETHR_RTCHK_USE_NATIVE_DW_ATOMIC_IMPL__ \
+ ETHR_X86_RUNTIME_CONF_HAVE_DW_CMPXCHG__
+
+#if ETHR_SIZEOF_PTR == 4
+typedef volatile ethr_sint64_t * ethr_native_dw_ptr_t;
+# define ETHR_DW_NATMC_ALIGN_MASK__ 0x7
+# define ETHR_DW_CMPXCHG_SFX__ "8b"
+# define ETHR_NATIVE_SU_DW_SINT_T ethr_sint64_t
+#else
+#ifdef ETHR_HAVE_INT128_T
+# define ETHR_NATIVE_SU_DW_SINT_T ethr_sint128_t
+typedef volatile ethr_sint128_t * ethr_native_dw_ptr_t;
+#else
+typedef struct {
+ ethr_sint64_t sint64[2];
+} ethr_native_sint128_t__;
+typedef volatile ethr_native_sint128_t__ * ethr_native_dw_ptr_t;
+#endif
+# define ETHR_DW_NATMC_ALIGN_MASK__ 0xf
+# define ETHR_DW_CMPXCHG_SFX__ "16b"
+#endif
+
+/*
+ * We need 16 byte aligned memory in 64-bit mode, and 8 byte aligned
+ * memory in 32-bit mode. 16 byte aligned malloc in 64-bit mode is
+ * not common, and at least some glibc malloc implementations
+ * only 4 byte align in 32-bit mode.
+ *
+ * This code assumes 8 byte aligned memory in 64-bit mode, and 4 byte
+ * aligned memory in 32-bit mode. A malloc implementation that does
+ * not adhere to these alignment requirements is seriously broken,
+ * and we wont bother trying to work around it.
+ *
+ * Since memory alignment may be off by one word we need to align at
+ * runtime. We, therefore, need an extra word allocated.
+ */
+#define ETHR_DW_NATMC_MEM__(VAR) \
+ (&var->c[(int) ((ethr_uint_t) &(VAR)->c[0]) & ETHR_DW_NATMC_ALIGN_MASK__])
+typedef union {
+#ifdef ETHR_NATIVE_SU_DW_SINT_T
+ volatile ETHR_NATIVE_SU_DW_SINT_T dw_sint;
+#endif
+ volatile ethr_sint_t sint[3];
+ volatile char c[ETHR_SIZEOF_PTR*3];
+} ethr_native_dw_atomic_t;
+
+
+#if (defined(ETHR_TRY_INLINE_FUNCS) \
+ || defined(ETHR_ATOMIC_IMPL__) \
+ || defined(ETHR_X86_SSE2_ASM_C__)) \
+ && ETHR_SIZEOF_PTR == 4 \
+ && defined(ETHR_GCC_HAVE_SSE2_ASM_SUPPORT)
+ethr_sint64_t
+ethr_sse2_native_su_dw_atomic_read(ethr_native_dw_atomic_t *var);
+void
+ethr_sse2_native_su_dw_atomic_set(ethr_native_dw_atomic_t *var,
+ ethr_sint64_t val);
+#endif
+
+#if (defined(ETHR_TRY_INLINE_FUNCS) \
+ || defined(ETHR_ATOMIC_IMPL__) \
+ || defined(ETHR_X86_SSE2_ASM_C__))
+# ifdef ETHR_DEBUG
+# define ETHR_DW_DBG_ALIGNED__(PTR) \
+ ETHR_ASSERT((((ethr_uint_t) (PTR)) & ETHR_DW_NATMC_ALIGN_MASK__) == 0);
+# else
+# define ETHR_DW_DBG_ALIGNED__(PTR)
+# endif
+#endif
+
+#if defined(ETHR_TRY_INLINE_FUNCS) || defined(ETHR_ATOMIC_IMPL__)
+
+#define ETHR_HAVE_ETHR_NATIVE_DW_ATOMIC_ADDR
+static ETHR_INLINE ethr_sint_t *
+ethr_native_dw_atomic_addr(ethr_native_dw_atomic_t *var)
+{
+ return (ethr_sint_t *) ETHR_DW_NATMC_MEM__(var);
+}
+
+#if ETHR_SIZEOF_PTR == 4 && defined(__PIC__) && __PIC__
+/*
+ * When position independent code is used in 32-bit mode, the EBX register
+ * is used for storage of global offset table address, and we may not
+ * use it as input or output in an asm. We need to save and restore the
+ * EBX register explicitly (for some reason gcc doesn't provide this
+ * service to us).
+ */
+# define ETHR_NO_CLOBBER_EBX__ 1
+#else
+# define ETHR_NO_CLOBBER_EBX__ 0
+#endif
+
+#if ETHR_NO_CLOBBER_EBX__ && !defined(ETHR_CMPXCHG8B_REGISTER_SHORTAGE)
+/* When no optimization is on, we'll run into a register shortage */
+# if defined(ETHR_DEBUG) || defined(DEBUG) || defined(VALGRIND) \
+ || defined(GCOV) || defined(PURIFY) || defined(PURECOV)
+# define ETHR_CMPXCHG8B_REGISTER_SHORTAGE 1
+# else
+# define ETHR_CMPXCHG8B_REGISTER_SHORTAGE 0
+# endif
+#endif
+
+
+#define ETHR_HAVE_ETHR_NATIVE_DW_ATOMIC_CMPXCHG_MB
+
+static ETHR_INLINE int
+ethr_native_dw_atomic_cmpxchg_mb(ethr_native_dw_atomic_t *var,
+ ethr_sint_t *new,
+ ethr_sint_t *xchg)
+{
+ ethr_native_dw_ptr_t p = (ethr_native_dw_ptr_t) ETHR_DW_NATMC_MEM__(var);
+ char xchgd;
+
+ ETHR_DW_DBG_ALIGNED__(p);
+
+ __asm__ __volatile__(
+#if ETHR_NO_CLOBBER_EBX__
+ "pushl %%ebx\n\t"
+# if ETHR_CMPXCHG8B_REGISTER_SHORTAGE
+ "movl (%7), %%ebx\n\t"
+ "movl 4(%7), %%ecx\n\t"
+# else
+ "movl %8, %%ebx\n\t"
+# endif
+#endif
+ "lock; cmpxchg" ETHR_DW_CMPXCHG_SFX__ " %0\n\t"
+ "setz %3\n\t"
+#if ETHR_NO_CLOBBER_EBX__
+ "popl %%ebx\n\t"
+#endif
+ : "=m"(*p), "=d"(xchg[1]), "=a"(xchg[0]), "=c"(xchgd)
+ : "m"(*p), "1"(xchg[1]), "2"(xchg[0]),
+#if ETHR_NO_CLOBBER_EBX__
+# if ETHR_CMPXCHG8B_REGISTER_SHORTAGE
+ "3"(new)
+# else
+ "3"(new[1]),
+ "r"(new[0])
+# endif
+#else
+ "3"(new[1]),
+ "b"(new[0])
+#endif
+ : "cc", "memory");
+
+ return (int) xchgd;
+}
+
+#undef ETHR_NO_CLOBBER_EBX__
+
+#if ETHR_SIZEOF_PTR == 4 && defined(ETHR_GCC_HAVE_SSE2_ASM_SUPPORT)
+
+typedef union {
+ ethr_sint64_t sint64;
+ ethr_sint_t sint[2];
+} ethr_dw_atomic_no_sse2_convert_t;
+
+#define ETHR_HAVE_ETHR_NATIVE_SU_DW_ATOMIC_READ
+
+static ETHR_INLINE ethr_sint64_t
+ethr_native_su_dw_atomic_read(ethr_native_dw_atomic_t *var)
+{
+ if (ETHR_X86_RUNTIME_CONF_HAVE_SSE2__)
+ return ethr_sse2_native_su_dw_atomic_read(var);
+ else {
+ ethr_sint_t new[2];
+ ethr_dw_atomic_no_sse2_convert_t xchg;
+ new[0] = new[1] = xchg.sint[0] = xchg.sint[1] = 0x83838383;
+ (void) ethr_native_dw_atomic_cmpxchg_mb(var, new, xchg.sint);
+ return xchg.sint64;
+ }
+}
+
+#define ETHR_HAVE_ETHR_NATIVE_SU_DW_ATOMIC_SET
+
+static ETHR_INLINE void
+ethr_native_su_dw_atomic_set(ethr_native_dw_atomic_t *var,
+ ethr_sint64_t val)
+{
+ if (ETHR_X86_RUNTIME_CONF_HAVE_SSE2__)
+ ethr_sse2_native_su_dw_atomic_set(var, val);
+ else {
+ ethr_sint_t xchg[2] = {0, 0};
+ ethr_dw_atomic_no_sse2_convert_t new;
+ new.sint64 = val;
+ while (!ethr_native_dw_atomic_cmpxchg_mb(var, new.sint, xchg));
+ }
+}
+
+#endif /* ETHR_SIZEOF_PTR == 4 */
+
+#endif /* ETHR_TRY_INLINE_FUNCS */
+
+#if defined(ETHR_X86_SSE2_ASM_C__) \
+ && ETHR_SIZEOF_PTR == 4 \
+ && defined(ETHR_GCC_HAVE_SSE2_ASM_SUPPORT)
+
+/*
+ * 8-byte aligned loads and stores of 64-bit values are atomic from
+ * pentium and forward. An ordinary volatile load or store in 32-bit
+ * mode generates two 32-bit operations (at least with gcc-4.1.2 using
+ * -msse2). In order to guarantee one 64-bit load/store operation
+ * from/to memory we load/store via an xmm register using movq.
+ *
+ * Load/store can be achieved using cmpxchg8b, however, using movq is
+ * much faster. Unfortunately we cannot do the same thing in 64-bit
+ * mode; instead, we have to do loads and stores via cmpxchg16b.
+ *
+ * We do not inline these, but instead compile these into a separate
+ * object file using -msse2. This since we don't want to use -msse2 for
+ * the whole system. If we detect sse2 support (pentium4 and forward)
+ * at runtime, we use them; otherwise, we fall back to using cmpxchg8b
+ * for loads and stores. This way the binary can be moved between
+ * processors with and without sse2 support.
+ */
+
+ethr_sint64_t
+ethr_sse2_native_su_dw_atomic_read(ethr_native_dw_atomic_t *var)
+{
+ ethr_native_dw_ptr_t p = (ethr_native_dw_ptr_t) ETHR_DW_NATMC_MEM__(var);
+ ethr_sint64_t val;
+ ETHR_DW_DBG_ALIGNED__(p);
+ __asm__ __volatile__("movq %1, %0\n\t" : "=x"(val) : "m"(*p) : "memory");
+ return val;
+}
+
+void
+ethr_sse2_native_su_dw_atomic_set(ethr_native_dw_atomic_t *var,
+ ethr_sint64_t val)
+{
+ ethr_native_dw_ptr_t p = (ethr_native_dw_ptr_t) ETHR_DW_NATMC_MEM__(var);
+ ETHR_DW_DBG_ALIGNED__(p);
+ __asm__ __volatile__("movq %1, %0\n\t" : "=m"(*p) : "x"(val) : "memory");
+}
+
+#endif /* ETHR_X86_SSE2_ASM_C__ */
+
+#endif /* ETHR_GCC_HAVE_DW_CMPXCHG_ASM_SUPPORT */
+
+#endif /* ETHR_X86_DW_ATOMIC_H__ */
+
diff --git a/erts/include/internal/i386/ethr_membar.h b/erts/include/internal/i386/ethr_membar.h
new file mode 100644
index 0000000000..92d9de7f3f
--- /dev/null
+++ b/erts/include/internal/i386/ethr_membar.h
@@ -0,0 +1,114 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2011. All Rights Reserved.
+ *
+ * The contents of this file are subject to the Erlang Public License,
+ * Version 1.1, (the "License"); you may not use this file except in
+ * compliance with the License. You should have received a copy of the
+ * Erlang Public License along with this software. If not, it can be
+ * retrieved online at http://www.erlang.org/.
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/*
+ * Description: Memory barriers for x86/x86-64
+ * Author: Rickard Green
+ */
+
+#ifndef ETHR_X86_MEMBAR_H__
+#define ETHR_X86_MEMBAR_H__
+
+#define ETHR_LoadLoad (1 << 0)
+#define ETHR_LoadStore (1 << 1)
+#define ETHR_StoreLoad (1 << 2)
+#define ETHR_StoreStore (1 << 3)
+
+#define ETHR_NO_SSE2_MEMORY_BARRIER__ \
+do { \
+ volatile ethr_sint32_t x__ = 0; \
+ __asm__ __volatile__ ("lock; orl $0x0, %0\n\t" \
+ : "=m"(x__) \
+ : "m"(x__) \
+ : "memory"); \
+} while (0)
+
+static __inline__ void
+ethr_cfence__(void)
+{
+ __asm__ __volatile__ ("" : : : "memory");
+}
+
+static __inline__ void
+ethr_mfence__(void)
+{
+#if ETHR_SIZEOF_PTR == 4
+ if (ETHR_X86_RUNTIME_CONF_HAVE_NO_SSE2__)
+ ETHR_NO_SSE2_MEMORY_BARRIER__;
+ else
+#endif
+ __asm__ __volatile__ ("mfence\n\t" : : : "memory");
+}
+
+static __inline__ void
+ethr_sfence__(void)
+{
+#if ETHR_SIZEOF_PTR == 4
+ if (ETHR_X86_RUNTIME_CONF_HAVE_NO_SSE2__)
+ ETHR_NO_SSE2_MEMORY_BARRIER__;
+ else
+#endif
+ __asm__ __volatile__ ("sfence\n\t" : : : "memory");
+}
+
+static __inline__ void
+ethr_lfence__(void)
+{
+#if ETHR_SIZEOF_PTR == 4
+ if (ETHR_X86_RUNTIME_CONF_HAVE_NO_SSE2__)
+ ETHR_NO_SSE2_MEMORY_BARRIER__;
+ else
+#endif
+ __asm__ __volatile__ ("lfence\n\t" : : : "memory");
+}
+
+#define ETHR_X86_OUT_OF_ORDER_MEMBAR(B) \
+ ETHR_CHOOSE_EXPR((B) == ETHR_StoreStore, \
+ ethr_sfence__(), \
+ ETHR_CHOOSE_EXPR((B) == ETHR_LoadLoad, \
+ ethr_lfence__(), \
+ ethr_mfence__()))
+
+#ifdef ETHR_X86_OUT_OF_ORDER
+
+#define ETHR_MEMBAR(B) \
+ ETHR_X86_OUT_OF_ORDER_MEMBAR((B))
+
+#else /* !ETHR_X86_OUT_OF_ORDER (the default) */
+
+/*
+ * We assume that only stores before loads may be reordered. That is,
+ * we assume that *no* instructions like these are used:
+ * - CLFLUSH,
+ * - streaming stores executed with non-temporal move,
+ * - string operations, or
+ * - other instructions which aren't LoadLoad, LoadStore, and StoreStore
+ * ordered by themselves
+ * If such instructions are used, either insert memory barriers
+ * using ETHR_X86_OUT_OF_ORDER_MEMBAR() at appropriate places, or
+ * define ETHR_X86_OUT_OF_ORDER. For more info see Intel 64 and IA-32
+ * Architectures Software Developer's Manual; Vol 3A; Chapter 8.2.2.
+ */
+
+#define ETHR_MEMBAR(B) \
+ ETHR_CHOOSE_EXPR((B) & ETHR_StoreLoad, ethr_mfence__(), ethr_cfence__())
+
+#endif /* !ETHR_X86_OUT_OF_ORDER */
+
+#endif /* ETHR_X86_MEMBAR_H__ */
diff --git a/erts/include/internal/i386/ethread.h b/erts/include/internal/i386/ethread.h
index fad8b108fa..80e4dc7b99 100644
--- a/erts/include/internal/i386/ethread.h
+++ b/erts/include/internal/i386/ethread.h
@@ -1,7 +1,7 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2005-2009. All Rights Reserved.
+ * Copyright Ericsson AB 2005-2011. All Rights Reserved.
*
* The contents of this file are subject to the Erlang Public License,
* Version 1.1, (the "License"); you may not use this file except in
@@ -24,11 +24,15 @@
#ifndef ETHREAD_I386_ETHREAD_H
#define ETHREAD_I386_ETHREAD_H
+#include "ethr_membar.h"
+#define ETHR_ATOMIC_WANT_32BIT_IMPL__
#include "atomic.h"
+#if ETHR_SIZEOF_PTR == 8
+# define ETHR_ATOMIC_WANT_64BIT_IMPL__
+# include "atomic.h"
+#endif
+#include "ethr_dw_atomic.h"
#include "spinlock.h"
#include "rwlock.h"
-#define ETHR_HAVE_NATIVE_ATOMICS 1
-#define ETHR_HAVE_NATIVE_LOCKS 1
-
#endif /* ETHREAD_I386_ETHREAD_H */
diff --git a/erts/include/internal/i386/rwlock.h b/erts/include/internal/i386/rwlock.h
index c009be8ef1..1a8cd7da0c 100644
--- a/erts/include/internal/i386/rwlock.h
+++ b/erts/include/internal/i386/rwlock.h
@@ -1,19 +1,19 @@
/*
* %CopyrightBegin%
- *
- * Copyright Ericsson AB 2005-2009. All Rights Reserved.
- *
+ *
+ * Copyright Ericsson AB 2005-2011. All Rights Reserved.
+ *
* The contents of this file are subject to the Erlang Public License,
* Version 1.1, (the "License"); you may not use this file except in
* compliance with the License. You should have received a copy of the
* Erlang Public License along with this software. If not, it can be
* retrieved online at http://www.erlang.org/.
- *
+ *
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
* the License for the specific language governing rights and limitations
* under the License.
- *
+ *
* %CopyrightEnd%
*/
@@ -26,12 +26,15 @@
#ifndef ETHREAD_I386_RWLOCK_H
#define ETHREAD_I386_RWLOCK_H
+#define ETHR_HAVE_NATIVE_RWSPINLOCKS 1
+#define ETHR_NATIVE_RWSPINLOCK_IMPL "ethread"
+
/* XXX: describe the algorithm */
typedef struct {
volatile int lock;
} ethr_native_rwlock_t;
-#ifdef ETHR_TRY_INLINE_FUNCS
+#if defined(ETHR_TRY_INLINE_FUNCS) || defined(ETHR_AUX_IMPL__)
#define ETHR_RWLOCK_OFFSET (1<<24)
diff --git a/erts/include/internal/i386/spinlock.h b/erts/include/internal/i386/spinlock.h
index 2b4832e26a..a84fba91b1 100644
--- a/erts/include/internal/i386/spinlock.h
+++ b/erts/include/internal/i386/spinlock.h
@@ -1,19 +1,19 @@
/*
* %CopyrightBegin%
- *
- * Copyright Ericsson AB 2005-2009. All Rights Reserved.
- *
+ *
+ * Copyright Ericsson AB 2005-2011. All Rights Reserved.
+ *
* The contents of this file are subject to the Erlang Public License,
* Version 1.1, (the "License"); you may not use this file except in
* compliance with the License. You should have received a copy of the
* Erlang Public License along with this software. If not, it can be
* retrieved online at http://www.erlang.org/.
- *
+ *
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
* the License for the specific language governing rights and limitations
* under the License.
- *
+ *
* %CopyrightEnd%
*/
@@ -24,6 +24,9 @@
#ifndef ETHREAD_I386_SPINLOCK_H
#define ETHREAD_I386_SPINLOCK_H
+#define ETHR_HAVE_NATIVE_SPINLOCKS 1
+#define ETHR_NATIVE_SPINLOCK_IMPL "ethread"
+
/* A spinlock is the low byte of an aligned 32-bit integer.
* A non-zero value means that the lock is locked.
*/
@@ -31,7 +34,7 @@ typedef struct {
volatile unsigned int lock;
} ethr_native_spinlock_t;
-#ifdef ETHR_TRY_INLINE_FUNCS
+#if defined(ETHR_TRY_INLINE_FUNCS) || defined(ETHR_AUX_IMPL__)
static ETHR_INLINE void
ethr_native_spinlock_init(ethr_native_spinlock_t *lock)
@@ -46,16 +49,20 @@ ethr_native_spin_unlock(ethr_native_spinlock_t *lock)
* On i386 this needs to be a locked operation
* to avoid Pentium Pro errata 66 and 92.
*/
-#if defined(__x86_64__)
- __asm__ __volatile__("" : : : "memory");
- *(unsigned char*)&lock->lock = 0;
-#else
- char tmp = 0;
- __asm__ __volatile__(
- "xchgb %b0, %1"
- : "=q"(tmp), "=m"(lock->lock)
- : "0"(tmp) : "memory");
+#if !defined(__x86_64__)
+ if (ETHR_X86_RUNTIME_CONF_HAVE_NO_SSE2__) {
+ char tmp = 0;
+ __asm__ __volatile__(
+ "xchgb %b0, %1"
+ : "=q"(tmp), "=m"(lock->lock)
+ : "0"(tmp) : "memory");
+ }
+ else
#endif
+ {
+ ETHR_MEMBAR(ETHR_LoadStore|ETHR_StoreStore);
+ *(unsigned char*)&lock->lock = 0;
+ }
}
static ETHR_INLINE int