Merge branch 'rickard/atomics-api/OTP-9014' into major

* rickard/atomics-api/OTP-9014: Use new atomic API in runtime system Improve ethread atomics
author: Rickard Green <[email protected]> 2011-07-08 13:56:10 +0200
committer: Rickard Green <[email protected]> 2011-07-08 13:56:10 +0200
commit: c6bc815813f57fb7dfffe704c31a4124a0fe755e (patch)
tree: 6d142c893ee4e9f800b094bba26537147cf5b2de /erts/include/internal/i386/ethr_dw_atomic.h
parent: 5c62fdc1914000f3da921c82a82b6dc30783db53 (diff)
parent: 0204e80cba378dfc1140a7f98d96705d470bddde (diff)
download: otp-c6bc815813f57fb7dfffe704c31a4124a0fe755e.tar.gz
otp-c6bc815813f57fb7dfffe704c31a4124a0fe755e.tar.bz2
otp-c6bc815813f57fb7dfffe704c31a4124a0fe755e.zip
1 files changed, 278 insertions, 0 deletions
diff --git a/erts/include/internal/i386/ethr_dw_atomic.h b/erts/include/internal/i386/ethr_dw_atomic.h
new file mode 100644
index 0000000000..9fb89bbe43
--- /dev/null
+++ b/erts/include/internal/i386/ethr_dw_atomic.h
@@ -0,0 +1,278 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2011. All Rights Reserved.
+ *
+ * The contents of this file are subject to the Erlang Public License,
+ * Version 1.1, (the "License"); you may not use this file except in
+ * compliance with the License. You should have received a copy of the
+ * Erlang Public License along with this software. If not, it can be
+ * retrieved online at http://www.erlang.org/.
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/*
+ * Description: Native double word atomics for x86/x86_64
+ * Author: Rickard Green
+ */
+
+#ifndef ETHR_X86_DW_ATOMIC_H__
+#define ETHR_X86_DW_ATOMIC_H__
+
+#ifdef ETHR_GCC_HAVE_DW_CMPXCHG_ASM_SUPPORT
+
+#define ETHR_HAVE_NATIVE_DW_ATOMIC
+#define ETHR_NATIVE_DW_ATOMIC_IMPL "ethread"
+
+/*
+ * If ETHR_RTCHK_USE_NATIVE_DW_ATOMIC_IMPL__ is defined, it will be used
+ * at runtime in order to determine if native or fallback implementation
+ * should be used.
+ */
+#define ETHR_RTCHK_USE_NATIVE_DW_ATOMIC_IMPL__ \
+  ETHR_X86_RUNTIME_CONF_HAVE_DW_CMPXCHG__
+
+#if ETHR_SIZEOF_PTR == 4
+typedef volatile ethr_sint64_t * ethr_native_dw_ptr_t;
+#  define ETHR_DW_NATMC_ALIGN_MASK__ 0x7
+#  define ETHR_DW_CMPXCHG_SFX__ "8b"
+#  define ETHR_NATIVE_SU_DW_SINT_T ethr_sint64_t
+#else
+#ifdef ETHR_HAVE_INT128_T
+#  define ETHR_NATIVE_SU_DW_SINT_T ethr_sint128_t
+typedef volatile ethr_sint128_t * ethr_native_dw_ptr_t;
+#else
+typedef struct {
+    ethr_sint64_t sint64[2];
+} ethr_native_sint128_t__;
+typedef volatile ethr_native_sint128_t__ * ethr_native_dw_ptr_t;
+#endif
+#  define ETHR_DW_NATMC_ALIGN_MASK__ 0xf
+#  define ETHR_DW_CMPXCHG_SFX__ "16b"
+#endif
+
+/*
+ * We need 16 byte aligned memory in 64-bit mode, and 8 byte aligned
+ * memory in 32-bit mode. 16 byte aligned malloc in 64-bit mode is
+ * not common, and at least some glibc malloc implementations
+ * only 4 byte align in 32-bit mode.
+ *
+ * This code assumes 8 byte aligned memory in 64-bit mode, and 4 byte
+ * aligned memory in 32-bit mode. A malloc implementation that does
+ * not adhere to these alignment requirements is seriously broken,
+ * and we wont bother trying to work around it.
+ *
+ * Since memory alignment may be off by one word we need to align at
+ * runtime. We, therefore, need an extra word allocated.
+ */
+#define ETHR_DW_NATMC_MEM__(VAR) \
+   (&var->c[(int) ((ethr_uint_t) &(VAR)->c[0]) & ETHR_DW_NATMC_ALIGN_MASK__])
+typedef union {
+#ifdef ETHR_NATIVE_SU_DW_SINT_T
+    volatile ETHR_NATIVE_SU_DW_SINT_T dw_sint;
+#endif
+    volatile ethr_sint_t sint[3];
+    volatile char c[ETHR_SIZEOF_PTR*3];
+} ethr_native_dw_atomic_t;
+
+
+#if (defined(ETHR_TRY_INLINE_FUNCS) \
+     || defined(ETHR_ATOMIC_IMPL__) \
+     || defined(ETHR_X86_SSE2_ASM_C__)) \
+    && ETHR_SIZEOF_PTR == 4 \
+    && defined(ETHR_GCC_HAVE_SSE2_ASM_SUPPORT)
+ethr_sint64_t
+ethr_sse2_native_su_dw_atomic_read(ethr_native_dw_atomic_t *var);
+void
+ethr_sse2_native_su_dw_atomic_set(ethr_native_dw_atomic_t *var,
+				  ethr_sint64_t val);
+#endif
+
+#if (defined(ETHR_TRY_INLINE_FUNCS) \
+     || defined(ETHR_ATOMIC_IMPL__) \
+     || defined(ETHR_X86_SSE2_ASM_C__))
+#  ifdef ETHR_DEBUG
+#    define ETHR_DW_DBG_ALIGNED__(PTR) \
+       ETHR_ASSERT((((ethr_uint_t) (PTR)) & ETHR_DW_NATMC_ALIGN_MASK__) == 0);
+#  else
+#    define ETHR_DW_DBG_ALIGNED__(PTR)
+#  endif
+#endif
+
+#if defined(ETHR_TRY_INLINE_FUNCS) || defined(ETHR_ATOMIC_IMPL__)
+
+#define ETHR_HAVE_ETHR_NATIVE_DW_ATOMIC_ADDR
+static ETHR_INLINE ethr_sint_t *
+ethr_native_dw_atomic_addr(ethr_native_dw_atomic_t *var)
+{
+    return (ethr_sint_t *) ETHR_DW_NATMC_MEM__(var);
+}
+
+#if ETHR_SIZEOF_PTR == 4 && defined(__PIC__) && __PIC__
+/*
+ * When position independent code is used in 32-bit mode, the EBX register
+ * is used for storage of global offset table address, and we may not
+ * use it as input or output in an asm. We need to save and restore the
+ * EBX register explicitly (for some reason gcc doesn't provide this
+ * service to us).
+ */
+#  define ETHR_NO_CLOBBER_EBX__ 1
+#else
+#  define ETHR_NO_CLOBBER_EBX__ 0
+#endif
+
+#if ETHR_NO_CLOBBER_EBX__ && !defined(ETHR_CMPXCHG8B_REGISTER_SHORTAGE)
+/* When no optimization is on, we'll run into a register shortage */
+#  if defined(ETHR_DEBUG) || defined(DEBUG) || defined(VALGRIND) \
+      || defined(GCOV) || defined(PURIFY) || defined(PURECOV)
+#    define ETHR_CMPXCHG8B_REGISTER_SHORTAGE 1
+#  else
+#    define ETHR_CMPXCHG8B_REGISTER_SHORTAGE 0
+#  endif
+#endif
+
+
+#define ETHR_HAVE_ETHR_NATIVE_DW_ATOMIC_CMPXCHG_MB
+
+static ETHR_INLINE int
+ethr_native_dw_atomic_cmpxchg_mb(ethr_native_dw_atomic_t *var,
+				 ethr_sint_t *new,
+				 ethr_sint_t *xchg)
+{
+    ethr_native_dw_ptr_t p = (ethr_native_dw_ptr_t) ETHR_DW_NATMC_MEM__(var);
+    char xchgd;
+
+    ETHR_DW_DBG_ALIGNED__(p);
+
+    __asm__ __volatile__(
+#if ETHR_NO_CLOBBER_EBX__
+	"pushl %%ebx\n\t"
+#  if ETHR_CMPXCHG8B_REGISTER_SHORTAGE
+	"movl (%7), %%ebx\n\t"
+	"movl 4(%7), %%ecx\n\t"
+#  else
+	"movl %8, %%ebx\n\t"
+#  endif
+#endif
+	"lock; cmpxchg" ETHR_DW_CMPXCHG_SFX__ " %0\n\t"
+	"setz %3\n\t"
+#if ETHR_NO_CLOBBER_EBX__
+	"popl %%ebx\n\t"
+#endif
+	: "=m"(*p), "=d"(xchg[1]), "=a"(xchg[0]), "=c"(xchgd)
+	: "m"(*p), "1"(xchg[1]), "2"(xchg[0]),
+#if ETHR_NO_CLOBBER_EBX__
+#  if ETHR_CMPXCHG8B_REGISTER_SHORTAGE
+	  "3"(new)
+#  else
+	  "3"(new[1]),
+	  "r"(new[0])
+#  endif
+#else
+	  "3"(new[1]),
+	  "b"(new[0])
+#endif
+	: "cc", "memory");
+
+    return (int) xchgd;
+}
+
+#undef ETHR_NO_CLOBBER_EBX__
+
+#if ETHR_SIZEOF_PTR == 4 && defined(ETHR_GCC_HAVE_SSE2_ASM_SUPPORT)
+
+typedef union {
+    ethr_sint64_t sint64;
+    ethr_sint_t sint[2];
+} ethr_dw_atomic_no_sse2_convert_t;
+
+#define ETHR_HAVE_ETHR_NATIVE_SU_DW_ATOMIC_READ
+
+static ETHR_INLINE ethr_sint64_t
+ethr_native_su_dw_atomic_read(ethr_native_dw_atomic_t *var)
+{
+    if (ETHR_X86_RUNTIME_CONF_HAVE_SSE2__)
+	return ethr_sse2_native_su_dw_atomic_read(var);
+    else {
+	ethr_sint_t new[2];
+	ethr_dw_atomic_no_sse2_convert_t xchg;
+	new[0] = new[1] = xchg.sint[0] = xchg.sint[1] = 0x83838383;
+	(void) ethr_native_dw_atomic_cmpxchg_mb(var, new, xchg.sint);
+	return xchg.sint64;
+    }
+}
+
+#define ETHR_HAVE_ETHR_NATIVE_SU_DW_ATOMIC_SET
+
+static ETHR_INLINE void
+ethr_native_su_dw_atomic_set(ethr_native_dw_atomic_t *var,
+			     ethr_sint64_t val)
+{
+    if (ETHR_X86_RUNTIME_CONF_HAVE_SSE2__)
+	ethr_sse2_native_su_dw_atomic_set(var, val);
+    else {
+	ethr_sint_t xchg[2] = {0, 0};
+	ethr_dw_atomic_no_sse2_convert_t new;
+	new.sint64 = val;
+	while (!ethr_native_dw_atomic_cmpxchg_mb(var, new.sint, xchg));
+    }
+}
+
+#endif /* ETHR_SIZEOF_PTR == 4 */
+
+#endif /* ETHR_TRY_INLINE_FUNCS */
+
+#if defined(ETHR_X86_SSE2_ASM_C__) \
+    && ETHR_SIZEOF_PTR == 4 \
+    && defined(ETHR_GCC_HAVE_SSE2_ASM_SUPPORT)
+
+/*
+ * 8-byte aligned loads and stores of 64-bit values are atomic from
+ * pentium and forward. An ordinary volatile load or store in 32-bit
+ * mode generates two 32-bit operations (at least with gcc-4.1.2 using
+ * -msse2). In order to guarantee one 64-bit load/store operation
+ * from/to memory we load/store via an xmm register using movq.
+ *
+ * Load/store can be achieved using cmpxchg8b, however, using movq is
+ * much faster. Unfortunately we cannot do the same thing in 64-bit
+ * mode; instead, we have to do loads and stores via cmpxchg16b.
+ *
+ * We do not inline these, but instead compile these into a separate
+ * object file using -msse2. This since we don't want to use -msse2 for
+ * the whole system. If we detect sse2 support (pentium4 and forward)
+ * at runtime, we use them; otherwise, we fall back to using cmpxchg8b
+ * for loads and stores. This way the binary can be moved between
+ * processors with and without sse2 support.
+ */
+
+ethr_sint64_t
+ethr_sse2_native_su_dw_atomic_read(ethr_native_dw_atomic_t *var)
+{
+    ethr_native_dw_ptr_t p = (ethr_native_dw_ptr_t) ETHR_DW_NATMC_MEM__(var);
+    ethr_sint64_t val;
+    ETHR_DW_DBG_ALIGNED__(p);
+    __asm__ __volatile__("movq %1, %0\n\t" : "=x"(val) : "m"(*p) : "memory");
+    return val;
+}
+
+void
+ethr_sse2_native_su_dw_atomic_set(ethr_native_dw_atomic_t *var,
+				  ethr_sint64_t val)
+{
+    ethr_native_dw_ptr_t p = (ethr_native_dw_ptr_t) ETHR_DW_NATMC_MEM__(var);
+    ETHR_DW_DBG_ALIGNED__(p);
+    __asm__ __volatile__("movq %1, %0\n\t" : "=m"(*p) : "x"(val) : "memory");
+}
+
+#endif /* ETHR_X86_SSE2_ASM_C__ */
+
+#endif /* ETHR_GCC_HAVE_DW_CMPXCHG_ASM_SUPPORT */
+
+#endif /* ETHR_X86_DW_ATOMIC_H__ */
+
author	Rickard Green <[email protected]>	2011-07-08 13:56:10 +0200
committer	Rickard Green <[email protected]>	2011-07-08 13:56:10 +0200
commit	c6bc815813f57fb7dfffe704c31a4124a0fe755e (patch)
tree	6d142c893ee4e9f800b094bba26537147cf5b2de /erts/include/internal/i386/ethr_dw_atomic.h
parent	5c62fdc1914000f3da921c82a82b6dc30783db53 (diff)
parent	0204e80cba378dfc1140a7f98d96705d470bddde (diff)
download	otp-c6bc815813f57fb7dfffe704c31a4124a0fe755e.tar.gz otp-c6bc815813f57fb7dfffe704c31a4124a0fe755e.tar.bz2 otp-c6bc815813f57fb7dfffe704c31a4124a0fe755e.zip