Improve ethread atomics based on GCC builtins

* Use of __atomic builtins when available. * Improved configure test that checks for missing memory barrier in __sync_synchronize(). The old approach was to verify known working gcc versions and check gcc version at compile time. Besides not being very safe, the old approach often unnecessarily caused usage of the very expensive workaround. * Introduced (no overhead) workaround for missing clobber in __sync_synchronize() when using buggy LLVM implementation of __sync_synchronize(). * Implement native memory barriers for ARM processors supporting the DMB instruction. * Use of volatile store on Alpha as atomic set operation if no __atomic_store_n() is available (already used on x86/x86_64 Sparc V9, PowerPC, and MIPS). Fallback used when not using volatile store is typically very expensive. * Use volatile load on Alpha and ARM as atomic read operation if no __atomic_load_n() is available (already used on x86/x86_64 Sparc V9, PowerPC, and MIPS). Fallback when not using volatile load is typically very expensive.
author: Rickard Green <[email protected]> 2015-01-05 11:04:34 +0100
committer: Rickard Green <[email protected]> 2015-01-14 20:24:45 +0100
commit: 24fa075b5c0d54f2035a2ff510a82aa19187eda4 (patch)
tree: 2e26371bbcf360ae53f75c6bad1799aa375c1b72 /erts/include
parent: ce73c38b10d1dee5209b505ef054b108e747b522 (diff)
download: otp-24fa075b5c0d54f2035a2ff510a82aa19187eda4.tar.gz
otp-24fa075b5c0d54f2035a2ff510a82aa19187eda4.tar.bz2
otp-24fa075b5c0d54f2035a2ff510a82aa19187eda4.zip
7 files changed, 1140 insertions, 148 deletions
diff --git a/erts/include/internal/ethread.h b/erts/include/internal/ethread.h
index ad5d05704c..ac40163a1a 100644
--- a/erts/include/internal/ethread.h
+++ b/erts/include/internal/ethread.h
@@ -1,7 +1,7 @@
 /*
  * %CopyrightBegin%
  *
- * Copyright Ericsson AB 2004-2013. All Rights Reserved.
+ * Copyright Ericsson AB 2004-2015. All Rights Reserved.
  *
  * The contents of this file are subject to the Erlang Public License,
  * Version 1.1, (the "License"); you may not use this file except in
@@ -364,6 +364,9 @@ extern ethr_runtime_t ethr_runtime__;
 #          include "sparc64/ethread.h"
 #        endif
 #      endif
+#      if ETHR_HAVE_GCC___ATOMIC_BUILTINS
+#        include "gcc/ethread.h"
+#      endif
 #      include "libatomic_ops/ethread.h"
 #      include "gcc/ethread.h"
 #    endif
diff --git a/erts/include/internal/ethread_header_config.h.in b/erts/include/internal/ethread_header_config.h.in
index b36322490a..bd57d9d67b 100644
--- a/erts/include/internal/ethread_header_config.h.in
+++ b/erts/include/internal/ethread_header_config.h.in
@@ -1,7 +1,7 @@
 /*
  * %CopyrightBegin%
  *
- * Copyright Ericsson AB 2004-2011. All Rights Reserved.
+ * Copyright Ericsson AB 2004-2015. All Rights Reserved.
  *
  * The contents of this file are subject to the Erlang Public License,
  * Version 1.1, (the "License"); you may not use this file except in
@@ -84,32 +84,62 @@
 /* Define if run in Sparc RMO, PSO, or TSO mode */
 #undef ETHR_SPARC_RMO
 
-/* Define if you have __sync_add_and_fetch() for 32-bit integers */
-#undef ETHR_HAVE___SYNC_ADD_AND_FETCH32
+/* Define as a boolean indicating whether you have a gcc compatible compiler
+   capable of generating the ARM DMB instruction, and are compiling for an ARM
+   processor with ARM DMB instruction support, or not */
+#undef ETHR_HAVE_GCC_ASM_ARM_DMB_INSTRUCTION
 
-/* Define if you have __sync_add_and_fetch() for 64-bit integers */
-#undef ETHR_HAVE___SYNC_ADD_AND_FETCH64
+/* Define as a bitmask corresponding to the word sizes that
+   __sync_synchronize() can handle on your system */
+#undef ETHR_HAVE___sync_synchronize
 
-/* Define if you have __sync_fetch_and_and() for 32-bit integers */
-#undef ETHR_HAVE___SYNC_FETCH_AND_AND32
+/* Define as a bitmask corresponding to the word sizes that
+   __sync_add_and_fetch() can handle on your system */
+#undef ETHR_HAVE___sync_add_and_fetch
 
-/* Define if you have __sync_fetch_and_and() for 64-bit integers */
-#undef ETHR_HAVE___SYNC_FETCH_AND_AND64
+/* Define as a bitmask corresponding to the word sizes that
+   __sync_fetch_and_and() can handle on your system */
+#undef ETHR_HAVE___sync_fetch_and_and
 
-/* Define if you have __sync_fetch_and_or() for 32-bit integers */
-#undef ETHR_HAVE___SYNC_FETCH_AND_OR32
+/* Define as a bitmask corresponding to the word sizes that
+   __sync_fetch_and_or() can handle on your system */
+#undef ETHR_HAVE___sync_fetch_and_or
 
-/* Define if you have __sync_fetch_and_or() for 64-bit integers */
-#undef ETHR_HAVE___SYNC_FETCH_AND_OR64
+/* Define as a bitmask corresponding to the word sizes that
+   __sync_val_compare_and_swap() can handle on your system */
+#undef ETHR_HAVE___sync_val_compare_and_swap
 
-/* Define if you have __sync_val_compare_and_swap() for 32-bit integers */
-#undef ETHR_HAVE___SYNC_VAL_COMPARE_AND_SWAP32
+/* Define as a boolean indicating whether you have a gcc __atomic builtins or
+   not */
+#undef ETHR_HAVE_GCC___ATOMIC_BUILTINS
 
-/* Define if you have __sync_val_compare_and_swap() for 64-bit integers */
-#undef ETHR_HAVE___SYNC_VAL_COMPARE_AND_SWAP64
+/* Define as a boolean indicating whether you trust gcc's __atomic_* builtins
+   memory barrier implementations, or not */
+#undef ETHR_TRUST_GCC_ATOMIC_BUILTINS_MEMORY_BARRIERS
 
-/* Define if you have __sync_val_compare_and_swap() for 128-bit integers */
-#undef ETHR_HAVE___SYNC_VAL_COMPARE_AND_SWAP128
+/* Define as a bitmask corresponding to the word sizes that __atomic_store_n()
+   can handle on your system */
+#undef ETHR_HAVE___atomic_store_n
+
+/* Define as a bitmask corresponding to the word sizes that __atomic_load_n()
+   can handle on your system */
+#undef ETHR_HAVE___atomic_load_n
+
+/* Define as a bitmask corresponding to the word sizes that
+   __atomic_add_fetch() can handle on your system */
+#undef ETHR_HAVE___atomic_add_fetch
+
+/* Define as a bitmask corresponding to the word sizes that
+   __atomic_fetch_and() can handle on your system */
+#undef ETHR_HAVE___atomic_fetch_and
+
+/* Define as a bitmask corresponding to the word sizes that
+   __atomic_fetch_or() can handle on your system */
+#undef ETHR_HAVE___atomic_fetch_or
+
+/* Define as a bitmask corresponding to the word sizes that
+   __atomic_compare_exchange_n() can handle on your system */
+#undef ETHR_HAVE___atomic_compare_exchange_n
 
 /* Define if you prefer gcc native ethread implementations */
 #undef ETHR_PREFER_GCC_NATIVE_IMPLS
diff --git a/erts/include/internal/ethread_inline.h b/erts/include/internal/ethread_inline.h
index ffb756c84f..c09a67619a 100644
--- a/erts/include/internal/ethread_inline.h
+++ b/erts/include/internal/ethread_inline.h
@@ -20,6 +20,29 @@
 #ifndef ETHREAD_INLINE_H__
 #define ETHREAD_INLINE_H__
 
+#define ETHR_GCC_COMPILER_FALSE 0 /* Not a gcc compatible compiler */
+#define ETHR_GCC_COMPILER_TRUE 1 /* The GNU gcc compiler */
+/* Negative integers for gcc compatible compilers */
+#define ETHR_GCC_COMPILER_CLANG -1 /* The Clang gcc compatible compiler */
+#define ETHR_GCC_COMPILER_ICC -2 /* The Intel gcc compatible compiler */
+/* Line them up... */
+
+/*
+ * Unfortunately there is no easy and certain way of
+ * detecting a true gcc compiler, since the compatible
+ * ones all define the same defines as the true gnu-gcc...
+ */
+#if !defined(__GNUC__) && !defined(__GNUG__) 
+#  define ETHR_GCC_COMPILER ETHR_GCC_COMPILER_FALSE
+#elif defined(__clang__)
+#  define ETHR_GCC_COMPILER ETHR_GCC_COMPILER_CLANG
+#elif defined(__ICC) || defined(__INTEL_COMPILER)
+#  define ETHR_GCC_COMPILER ETHR_GCC_COMPILER_ICC
+#else
+/* Seems to be the true gnu-gcc... */
+#  define ETHR_GCC_COMPILER ETHR_GCC_COMPILER_TRUE
+#endif
+
 #if !defined(__GNUC__)
 #  define ETHR_AT_LEAST_GCC_VSN__(MAJ, MIN, PL) 0
 #elif !defined(__GNUC_MINOR__)
diff --git a/erts/include/internal/gcc/ethr_atomic.h b/erts/include/internal/gcc/ethr_atomic.h
index f598f8537b..62eed78f76 100644
--- a/erts/include/internal/gcc/ethr_atomic.h
+++ b/erts/include/internal/gcc/ethr_atomic.h
@@ -1,7 +1,7 @@
 /*
  * %CopyrightBegin%
  *
- * Copyright Ericsson AB 2010-2011. All Rights Reserved.
+ * Copyright Ericsson AB 2010-2015. All Rights Reserved.
  *
  * The contents of this file are subject to the Erlang Public License,
  * Version 1.1, (the "License"); you may not use this file except in
@@ -18,78 +18,81 @@
  */
 
 /*
- * Description: Native atomics ethread support using gcc's builtins
+ * Description: Native atomics ethread support using gcc's __atomic
+ *              and __sync builtins
  * Author: Rickard Green
+ *
+ * Note: The C11 memory model implemented by gcc's __atomic
+ *       builtins does not match the ethread API very well.
+ *
+ *       Due to this we cannot use the __ATOMIC_SEQ_CST
+ *       memory model. For more information see the comment
+ *       in the begining of ethr_membar.h in this directory.
  */
 
 #undef ETHR_INCLUDE_ATOMIC_IMPL__
-#if !defined(ETHR_GCC_ATOMIC32_H__) && defined(ETHR_ATOMIC_WANT_32BIT_IMPL__)
-#define ETHR_GCC_ATOMIC32_H__
-#if defined(ETHR_HAVE___SYNC_VAL_COMPARE_AND_SWAP32)
-#  define ETHR_INCLUDE_ATOMIC_IMPL__ 4
-#endif
+#if !defined(ETHR_GCC_ATOMIC_ATOMIC32_H__)		\
+    && defined(ETHR_ATOMIC_WANT_32BIT_IMPL__)		\
+    && ((ETHR_HAVE___sync_val_compare_and_swap & 4)	\
+	|| (ETHR_HAVE___atomic_compare_exchange_n & 4))
+
+#define ETHR_GCC_ATOMIC_ATOMIC32_H__
+#define ETHR_INCLUDE_ATOMIC_IMPL__ 4
 #undef ETHR_ATOMIC_WANT_32BIT_IMPL__
-#elif !defined(ETHR_GCC_ATOMIC64_H__) && defined(ETHR_ATOMIC_WANT_64BIT_IMPL__)
-#define ETHR_GCC_ATOMIC64_H__
-#if defined(ETHR_HAVE___SYNC_VAL_COMPARE_AND_SWAP64)
-#  define ETHR_INCLUDE_ATOMIC_IMPL__ 8
-#endif
-#undef ETHR_ATOMIC_WANT_64BIT_IMPL__
-#endif
 
-#ifdef ETHR_INCLUDE_ATOMIC_IMPL__
+#elif !defined(ETHR_GCC_ATOMIC64_H__)			\
+    && defined(ETHR_ATOMIC_WANT_64BIT_IMPL__)		\
+    && ((ETHR_HAVE___sync_val_compare_and_swap & 8)	\
+	|| (ETHR_HAVE___atomic_compare_exchange_n & 8))
 
-#ifndef ETHR_GCC_ATOMIC_COMMON__
-#define ETHR_GCC_ATOMIC_COMMON__
+#define ETHR_GCC_ATOMIC64_H__
+#define ETHR_INCLUDE_ATOMIC_IMPL__ 8
+#undef ETHR_ATOMIC_WANT_64BIT_IMPL__
 
-#define ETHR_READ_AND_SET_WITHOUT_SYNC_OP__ 0
-#if defined(__i386__) || defined(__x86_64__) || defined(__sparc__) \
-    || defined(__powerpc__) || defined(__ppc__) || defined(__mips__)
-#  undef ETHR_READ_AND_SET_WITHOUT_SYNC_OP__
-#  define ETHR_READ_AND_SET_WITHOUT_SYNC_OP__ 1
 #endif
 
-#endif /* ETHR_GCC_ATOMIC_COMMON__ */
+#ifdef ETHR_INCLUDE_ATOMIC_IMPL__
 
 #if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
 #define ETHR_HAVE_NATIVE_ATOMIC32 1
-#define ETHR_NATIVE_ATOMIC32_IMPL "gcc"
 #define ETHR_NATMC_FUNC__(X) ethr_native_atomic32_ ## X
 #define ETHR_ATMC_T__ ethr_native_atomic32_t
 #define ETHR_AINT_T__ ethr_sint32_t
-#if defined(ETHR_HAVE___SYNC_ADD_AND_FETCH32)
-#  define ETHR_HAVE___SYNC_ADD_AND_FETCH
-#endif
-#if defined(ETHR_HAVE___SYNC_FETCH_AND_AND32)
-#  define ETHR_HAVE___SYNC_FETCH_AND_AND
-#endif 
-#if defined(ETHR_HAVE___SYNC_FETCH_AND_OR32)
-#  define ETHR_HAVE___SYNC_FETCH_AND_OR
+#if ((ETHR_HAVE___sync_val_compare_and_swap & 4) \
+     && (ETHR_HAVE___atomic_compare_exchange_n & 4))
+#  define ETHR_NATIVE_ATOMIC32_IMPL "gcc_atomic_and_sync_builtins"
+#elif (ETHR_HAVE___atomic_compare_exchange_n & 4)
+#  define ETHR_NATIVE_ATOMIC32_IMPL "gcc_atomic_builtins"
+#elif (ETHR_HAVE___sync_val_compare_and_swap & 4)
+#  define ETHR_NATIVE_ATOMIC32_IMPL "gcc_sync_builtins"
+#else
+#  error "!?"
 #endif
 #elif ETHR_INCLUDE_ATOMIC_IMPL__ == 8
 #define ETHR_HAVE_NATIVE_ATOMIC64 1
-#define ETHR_NATIVE_ATOMIC64_IMPL "gcc"
 #define ETHR_NATMC_FUNC__(X) ethr_native_atomic64_ ## X
 #define ETHR_ATMC_T__ ethr_native_atomic64_t
 #define ETHR_AINT_T__ ethr_sint64_t
-#if defined(ETHR_HAVE___SYNC_ADD_AND_FETCH64)
-#  define ETHR_HAVE___SYNC_ADD_AND_FETCH
-#endif
-#if defined(ETHR_HAVE___SYNC_FETCH_AND_AND64)
-#  define ETHR_HAVE___SYNC_FETCH_AND_AND
-#endif 
-#if defined(ETHR_HAVE___SYNC_FETCH_AND_OR64)
-#  define ETHR_HAVE___SYNC_FETCH_AND_OR
+#if ((ETHR_HAVE___sync_val_compare_and_swap & 8) \
+     && (ETHR_HAVE___atomic_compare_exchange_n & 8))
+#  define ETHR_NATIVE_ATOMIC64_IMPL "gcc_atomic_and_sync_builtins"
+#elif (ETHR_HAVE___atomic_compare_exchange_n & 8)
+#  define ETHR_NATIVE_ATOMIC64_IMPL "gcc_atomic_builtins"
+#elif (ETHR_HAVE___sync_val_compare_and_swap & 8)
+#  define ETHR_NATIVE_ATOMIC64_IMPL "gcc_sync_builtins"
+#else
+#  error "!?"
 #endif
 #else
 #error "Unsupported integer size"
 #endif
 
+#undef ETHR_NATIVE_ATOMIC_IMPL__
+
 typedef struct {
-    volatile ETHR_AINT_T__ counter;
+    volatile ETHR_AINT_T__ value;
 } ETHR_ATMC_T__;
 
-
 #if defined(ETHR_TRY_INLINE_FUNCS) || defined(ETHR_ATOMIC_IMPL__)
 
 #if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
@@ -98,13 +101,19 @@ typedef struct {
 #  define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_ADDR 1
 #endif
 
+
 static ETHR_INLINE ETHR_AINT_T__ *
 ETHR_NATMC_FUNC__(addr)(ETHR_ATMC_T__ *var)
 {
-    return (ETHR_AINT_T__ *) &var->counter;
+    return (ETHR_AINT_T__ *) &var->value;
 }
 
-#if ETHR_READ_AND_SET_WITHOUT_SYNC_OP__
+/*
+ * set()
+ */
+#if (ETHR_HAVE___atomic_store_n & ETHR_INCLUDE_ATOMIC_IMPL__)
+
+#if (ETHR_GCC_RELAXED_VERSIONS__ & ETHR_INCLUDE_ATOMIC_IMPL__)
 
 #if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
 #  define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_SET 1
@@ -115,12 +124,109 @@ ETHR_NATMC_FUNC__(addr)(ETHR_ATMC_T__ *var)
 static ETHR_INLINE void
 ETHR_NATMC_FUNC__(set)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ value)
 {
-    var->counter = value;
+    __atomic_store_n(&var->value, value, __ATOMIC_RELAXED);
 }
 
-#endif /* ETHR_READ_AND_SET_WITHOUT_SYNC_OP__ */
+#endif /* ETHR_GCC_RELAXED_VERSIONS__ */
 
-#if ETHR_READ_AND_SET_WITHOUT_SYNC_OP__
+#if (ETHR_GCC_RELB_VERSIONS__ & ETHR_INCLUDE_ATOMIC_IMPL__)
+
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+#  define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_SET_RELB 1
+#else
+#  define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_SET_RELB 1
+#endif
+
+static ETHR_INLINE void
+ETHR_NATMC_FUNC__(set_relb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ value)
+{
+    __atomic_store_n(&var->value, value, __ATOMIC_RELEASE);
+}
+
+#endif /* ETHR_GCC_RELB_VERSIONS__ */
+
+#elif (ETHR_GCC_VOLATILE_STORE_IS_ATOMIC_STORE__ & ETHR_INCLUDE_ATOMIC_IMPL__)
+
+#if (ETHR_GCC_RELAXED_VERSIONS__ & ETHR_INCLUDE_ATOMIC_IMPL__)
+
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+#  define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_SET 1
+#else
+#  define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_SET 1
+#endif
+
+static ETHR_INLINE void
+ETHR_NATMC_FUNC__(set)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ value)
+{
+    var->value = value;
+}
+
+#endif /* ETHR_GCC_RELAXED_VERSIONS__ */
+
+#if (ETHR_GCC_VOLATILE_STORE_IS_ATOMIC_STORE_RELB__ & ETHR_INCLUDE_ATOMIC_IMPL__)
+
+#if (ETHR_GCC_RELB_VERSIONS__ & ETHR_INCLUDE_ATOMIC_IMPL__)
+
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+#  define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_SET_RELB 1
+#else
+#  define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_SET_RELB 1
+#endif
+
+static ETHR_INLINE void
+ETHR_NATMC_FUNC__(set_relb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ value)
+{
+    var->value = value;
+}
+
+#endif /* ETHR_GCC_RELB_VERSIONS__ */
+
+#endif /* ETHR_GCC_VOLATILE_STORE_IS_ATOMIC_STORE_RELB__ */
+
+#endif /* ETHR_GCC_VOLATILE_STORE_IS_ATOMIC_STORE__ */
+
+/*
+ * read()
+ */
+
+#if (ETHR_HAVE___atomic_load_n & ETHR_INCLUDE_ATOMIC_IMPL__)
+
+#if (ETHR_GCC_RELAXED_VERSIONS__ & ETHR_INCLUDE_ATOMIC_IMPL__)
+
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+#  define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_READ 1
+#else
+#  define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_READ 1
+#endif
+
+static ETHR_INLINE ETHR_AINT_T__
+ETHR_NATMC_FUNC__(read)(ETHR_ATMC_T__ *var)
+{
+    return __atomic_load_n(&var->value, __ATOMIC_RELAXED);
+}
+
+#endif /* ETHR_GCC_RELAXED_VERSIONS__ */
+
+#if ((ETHR_GCC_ACQB_VERSIONS__ & ETHR_INCLUDE_ATOMIC_IMPL__) \
+     & ~ETHR___atomic_load_ACQUIRE_barrier_bug)
+
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+#  define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_READ_ACQB 1
+#else
+#  define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_READ_ACQB 1
+#endif
+
+static ETHR_INLINE ETHR_AINT_T__
+ETHR_NATMC_FUNC__(read_acqb)(ETHR_ATMC_T__ *var)
+{
+    return __atomic_load_n(&var->value, __ATOMIC_ACQUIRE);
+}
+
+#endif /* ETHR_GCC_ACQB_VERSIONS__ */
+
+#elif (ETHR_GCC_VOLATILE_LOAD_IS_ATOMIC_LOAD__ & ETHR_INCLUDE_ATOMIC_IMPL__)
+
+#if (ETHR_GCC_RELAXED_VERSIONS__ & ETHR_INCLUDE_ATOMIC_IMPL__)
 
 #if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
 #  define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_READ 1
@@ -131,12 +237,90 @@ ETHR_NATMC_FUNC__(set)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ value)
 static ETHR_INLINE ETHR_AINT_T__
 ETHR_NATMC_FUNC__(read)(ETHR_ATMC_T__ *var)
 {
-    return var->counter;
+    return var->value;
+}
+
+#endif /* ETHR_GCC_RELAXED_VERSIONS__ */
+
+#if (ETHR_GCC_VOLATILE_LOAD_IS_ATOMIC_LOAD_ACQB__ & ETHR_INCLUDE_ATOMIC_IMPL__)
+
+#if (ETHR_GCC_ACQB_VERSIONS__ & ETHR_INCLUDE_ATOMIC_IMPL__)
+
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+#  define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_READ_ACQB 1
+#else
+#  define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_READ_ACQB 1
+#endif
+
+static ETHR_INLINE ETHR_AINT_T__
+ETHR_NATMC_FUNC__(read_acqb)(ETHR_ATMC_T__ *var)
+{
+    return var->value;
+}
+
+#endif /* ETHR_GCC_ACQB_VERSIONS__ */
+
+#endif /* ETHR_GCC_VOLATILE_LOAD_IS_ATOMIC_LOAD_ACQB__ */
+
+#endif /* ETHR_GCC_VOLATILE_LOAD_IS_ATOMIC_LOAD__ */
+
+/*
+ * add_return()
+ */
+#if (ETHR_HAVE___atomic_add_fetch & ETHR_INCLUDE_ATOMIC_IMPL__)
+
+#if (ETHR_GCC_RELAXED_MOD_VERSIONS__ & ETHR_INCLUDE_ATOMIC_IMPL__)
+
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+#  define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_ADD_RETURN 1
+#else
+#  define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_ADD_RETURN 1
+#endif
+
+static ETHR_INLINE ETHR_AINT_T__
+ETHR_NATMC_FUNC__(add_return)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ incr)
+{
+    return __atomic_add_fetch(&var->value, incr, __ATOMIC_RELAXED);
 }
 
-#endif /* ETHR_READ_AND_SET_WITHOUT_SYNC_OP__ */
+#endif /* ETHR_GCC_RELAXED_MOD_VERSIONS__ */
 
-#if defined(ETHR_HAVE___SYNC_ADD_AND_FETCH)
+#if (ETHR_GCC_ACQB_MOD_VERSIONS__ & ETHR_INCLUDE_ATOMIC_IMPL__)
+
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+#  define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_ADD_RETURN_ACQB 1
+#else
+#  define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_ADD_RETURN_ACQB 1
+#endif
+
+static ETHR_INLINE ETHR_AINT_T__
+ETHR_NATMC_FUNC__(add_return_acqb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ incr)
+{
+    return __atomic_add_fetch(&var->value, incr, __ATOMIC_ACQUIRE);
+}
+
+#endif /* ETHR_GCC_ACQB_MOD_VERSIONS__ */
+
+#if (ETHR_GCC_RELB_MOD_VERSIONS__ & ETHR_INCLUDE_ATOMIC_IMPL__)
+
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+#  define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_ADD_RETURN_RELB 1
+#else
+#  define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_ADD_RETURN_RELB 1
+#endif
+
+static ETHR_INLINE ETHR_AINT_T__
+ETHR_NATMC_FUNC__(add_return_relb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ incr)
+{
+    return __atomic_add_fetch(&var->value, incr, __ATOMIC_RELEASE);
+}
+
+#endif /* ETHR_GCC_RELB_MOD_VERSIONS__ */
+
+#endif /* ETHR_HAVE___atomic_add_fetch */
+
+#if ((ETHR_HAVE___sync_add_and_fetch & ETHR_INCLUDE_ATOMIC_IMPL__) \
+     & ETHR_GCC_MB_MOD_VERSIONS__)
 
 #if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
 #  define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_ADD_RETURN_MB 1
@@ -147,12 +331,68 @@ ETHR_NATMC_FUNC__(read)(ETHR_ATMC_T__ *var)
 static ETHR_INLINE ETHR_AINT_T__
 ETHR_NATMC_FUNC__(add_return_mb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ incr)
 {
-    return __sync_add_and_fetch(&var->counter, incr);
+    return __sync_add_and_fetch(&var->value, incr);
+}
+
+#endif /* ETHR_HAVE___sync_add_and_fetch */
+
+/*
+ * and_retold()
+ */
+#if (ETHR_HAVE___atomic_fetch_and & ETHR_INCLUDE_ATOMIC_IMPL__)
+
+#if (ETHR_GCC_RELAXED_MOD_VERSIONS__ & ETHR_INCLUDE_ATOMIC_IMPL__)
+
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+#  define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_AND_RETOLD 1
+#else
+#  define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_AND_RETOLD 1
+#endif
+
+static ETHR_INLINE ETHR_AINT_T__
+ETHR_NATMC_FUNC__(and_retold)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ mask)
+{
+    return __atomic_fetch_and(&var->value, mask, __ATOMIC_RELAXED);
+}
+
+#endif /* ETHR_GCC_RELAXED_MOD_VERSIONS__ */
+
+#if (ETHR_GCC_ACQB_MOD_VERSIONS__ & ETHR_INCLUDE_ATOMIC_IMPL__)
+
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+#  define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_AND_RETOLD_ACQB 1
+#else
+#  define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_AND_RETOLD_ACQB 1
+#endif
+
+static ETHR_INLINE ETHR_AINT_T__
+ETHR_NATMC_FUNC__(and_retold_acqb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ mask)
+{
+    return __atomic_fetch_and(&var->value, mask, __ATOMIC_ACQUIRE);
 }
 
+#endif /* ETHR_GCC_ACQB_MOD_VERSIONS__ */
+
+#if (ETHR_GCC_RELB_MOD_VERSIONS__ & ETHR_INCLUDE_ATOMIC_IMPL__)
+
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+#  define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_AND_RETOLD_RELB 1
+#else
+#  define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_AND_RETOLD_RELB 1
 #endif
 
-#if defined(ETHR_HAVE___SYNC_FETCH_AND_AND)
+static ETHR_INLINE ETHR_AINT_T__
+ETHR_NATMC_FUNC__(and_retold_relb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ mask)
+{
+    return __atomic_fetch_and(&var->value, mask, __ATOMIC_RELEASE);
+}
+
+#endif /* ETHR_GCC_RELB_MOD_VERSIONS__ */
+
+#endif /* ETHR_HAVE___atomic_fetch_and */
+
+#if ((ETHR_HAVE___sync_fetch_and_and & ETHR_INCLUDE_ATOMIC_IMPL__) \
+     & ETHR_GCC_MB_MOD_VERSIONS__)
 
 #if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
 #  define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_AND_RETOLD_MB 1
@@ -163,12 +403,68 @@ ETHR_NATMC_FUNC__(add_return_mb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ incr)
 static ETHR_INLINE ETHR_AINT_T__
 ETHR_NATMC_FUNC__(and_retold_mb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ mask)
 {
-    return __sync_fetch_and_and(&var->counter, mask);
+    return __sync_fetch_and_and(&var->value, mask);
+}
+
+#endif /* ETHR_HAVE___sync_fetch_and_and */
+
+/*
+ * or_retold()
+ */
+#if (ETHR_HAVE___atomic_fetch_or & ETHR_INCLUDE_ATOMIC_IMPL__)
+
+#if (ETHR_GCC_RELAXED_MOD_VERSIONS__ & ETHR_INCLUDE_ATOMIC_IMPL__)
+
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+#  define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_OR_RETOLD 1
+#else
+#  define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_OR_RETOLD 1
+#endif
+
+static ETHR_INLINE ETHR_AINT_T__
+ETHR_NATMC_FUNC__(or_retold)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ mask)
+{
+    return __atomic_fetch_or(&var->value, mask, __ATOMIC_RELAXED);
+}
+
+#endif /* ETHR_GCC_RELAXED_MOD_VERSIONS__ */
+
+#if (ETHR_GCC_ACQB_MOD_VERSIONS__ & ETHR_INCLUDE_ATOMIC_IMPL__)
+
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+#  define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_OR_RETOLD_ACQB 1
+#else
+#  define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_OR_RETOLD_ACQB 1
+#endif
+
+static ETHR_INLINE ETHR_AINT_T__
+ETHR_NATMC_FUNC__(or_retold_acqb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ mask)
+{
+    return __atomic_fetch_or(&var->value, mask, __ATOMIC_ACQUIRE);
 }
 
+#endif /* ETHR_GCC_ACQB_MOD_VERSIONS__ */
+
+#if (ETHR_GCC_RELB_MOD_VERSIONS__ & ETHR_INCLUDE_ATOMIC_IMPL__)
+
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+#  define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_OR_RETOLD_RELB 1
+#else
+#  define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_OR_RETOLD_RELB 1
 #endif
 
-#if defined(ETHR_HAVE___SYNC_FETCH_AND_OR)
+static ETHR_INLINE ETHR_AINT_T__
+ETHR_NATMC_FUNC__(or_retold_relb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ mask)
+{
+    return __atomic_fetch_or(&var->value, mask, __ATOMIC_RELEASE);
+}
+
+#endif /* ETHR_GCC_RELB_MOD_VERSIONS__ */
+
+#endif /* ETHR_HAVE___atomic_fetch_or */
+
+#if ((ETHR_HAVE___sync_fetch_and_or & ETHR_INCLUDE_ATOMIC_IMPL__) \
+     & ETHR_GCC_MB_MOD_VERSIONS__)
 
 #if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
 #  define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_OR_RETOLD_MB 1
@@ -179,11 +475,73 @@ ETHR_NATMC_FUNC__(and_retold_mb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ mask)
 static ETHR_INLINE ETHR_AINT_T__
 ETHR_NATMC_FUNC__(or_retold_mb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ mask)
 {
-    return (ETHR_AINT_T__) __sync_fetch_and_or(&var->counter, mask);
+    return (ETHR_AINT_T__) __sync_fetch_and_or(&var->value, mask);
+}
+
+#endif /* ETHR_HAVE___sync_fetch_and_or */
+
+/*
+ * cmpxchg()
+ */
+#if (ETHR_HAVE___atomic_compare_exchange_n & ETHR_INCLUDE_ATOMIC_IMPL__)
+
+#if (ETHR_GCC_RELAXED_MOD_VERSIONS__ & ETHR_INCLUDE_ATOMIC_IMPL__)
+
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+#  define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_CMPXCHG 1
+#else
+#  define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_CMPXCHG 1
+#endif
+
+static ETHR_INLINE ETHR_AINT_T__
+ETHR_NATMC_FUNC__(cmpxchg)(ETHR_ATMC_T__ *var,
+			   ETHR_AINT_T__ new,
+			   ETHR_AINT_T__ exp)
+{
+    ETHR_AINT_T__ xchg = exp;
+    if (__atomic_compare_exchange_n(&var->value,
+				    &xchg,
+				    new,
+				    0, /* No spurious failures, please */
+				    __ATOMIC_RELAXED,
+				    __ATOMIC_RELAXED))
+	return exp;
+    return xchg;
 }
 
+#endif /* ETHR_GCC_RELAXED_MOD_VERSIONS__ */
+
+#if (ETHR_GCC_ACQB_MOD_VERSIONS__ & ETHR_INCLUDE_ATOMIC_IMPL__)
+
+#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
+#  define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_CMPXCHG_ACQB 1
+#else
+#  define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_CMPXCHG_ACQB 1
 #endif
 
+static ETHR_INLINE ETHR_AINT_T__
+ETHR_NATMC_FUNC__(cmpxchg_acqb)(ETHR_ATMC_T__ *var,
+				ETHR_AINT_T__ new,
+				ETHR_AINT_T__ exp)
+{
+    ETHR_AINT_T__ xchg = exp;
+    if (__atomic_compare_exchange_n(&var->value,
+				    &xchg,
+				    new,
+				    0, /* No spurious failures, please */
+				    __ATOMIC_ACQUIRE,
+				    __ATOMIC_ACQUIRE))
+	return exp;
+    return xchg;
+}
+
+#endif /* ETHR_GCC_ACQB_MOD_VERSIONS__ */
+
+#endif /* ETHR_HAVE___atomic_compare_exchange_n */
+
+#if ((ETHR_HAVE___sync_val_compare_and_swap & ETHR_INCLUDE_ATOMIC_IMPL__) \
+     & ETHR_GCC_MB_MOD_VERSIONS__)
+
 #if ETHR_INCLUDE_ATOMIC_IMPL__ == 4
 #  define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_CMPXCHG_MB 1
 #else
@@ -195,17 +553,16 @@ ETHR_NATMC_FUNC__(cmpxchg_mb)(ETHR_ATMC_T__ *var,
 			      ETHR_AINT_T__ new,
 			      ETHR_AINT_T__ old)
 {
-    return __sync_val_compare_and_swap(&var->counter, old, new);
+    return __sync_val_compare_and_swap(&var->value, old, new);
 }
 
+#endif /* ETHR_HAVE___sync_val_compare_and_swap */
+
 #endif /* ETHR_TRY_INLINE_FUNCS */
 
 #undef ETHR_NATMC_FUNC__
 #undef ETHR_ATMC_T__
 #undef ETHR_AINT_T__
 #undef ETHR_AINT_SUFFIX__
-#undef ETHR_HAVE___SYNC_ADD_AND_FETCH
-#undef ETHR_HAVE___SYNC_FETCH_AND_AND
-#undef ETHR_HAVE___SYNC_FETCH_AND_OR
 
 #endif
diff --git a/erts/include/internal/gcc/ethr_dw_atomic.h b/erts/include/internal/gcc/ethr_dw_atomic.h
index 6736f9c547..c2c8f85b7b 100644
--- a/erts/include/internal/gcc/ethr_dw_atomic.h
+++ b/erts/include/internal/gcc/ethr_dw_atomic.h
@@ -1,7 +1,7 @@
 /*
  * %CopyrightBegin%
  *
- * Copyright Ericsson AB 2011. All Rights Reserved.
+ * Copyright Ericsson AB 2011-2015. All Rights Reserved.
  *
  * The contents of this file are subject to the Erlang Public License,
  * Version 1.1, (the "License"); you may not use this file except in
@@ -18,35 +18,39 @@
  */
 
 /*
- * Description: Native double word atomics using gcc's builtins
+ * Description: Native double word atomics using gcc's __atomic
+ *              and __sync builtins
  * Author: Rickard Green
+ *
+ * Note: The C11 memory model implemented by gcc's __atomic
+ *       builtins does not match the ethread API very well.
+ *
+ *       Due to this we cannot use the __ATOMIC_SEQ_CST
+ *       memory model. For more information see the comment
+ *       in the begining of ethr_membar.h in this directory.
  */
 
 #undef ETHR_INCLUDE_DW_ATOMIC_IMPL__
-#ifndef ETHR_GCC_DW_ATOMIC_H__
-#  define ETHR_GCC_DW_ATOMIC_H__
-#  if ((ETHR_SIZEOF_PTR == 4 \
-        && defined(ETHR_HAVE___SYNC_VAL_COMPARE_AND_SWAP64)) \
-       || (ETHR_SIZEOF_PTR == 8 \
-           && defined(ETHR_HAVE___SYNC_VAL_COMPARE_AND_SWAP128) \
-	   && defined(ETHR_HAVE_INT128_T)))
-#    define ETHR_INCLUDE_DW_ATOMIC_IMPL__
-#  endif
+#if !defined(ETHR_GCC_ATOMIC_DW_ATOMIC_H__)				\
+    && ((ETHR_HAVE___sync_val_compare_and_swap & (2*ETHR_SIZEOF_PTR))	\
+	|| (ETHR_HAVE___atomic_compare_exchange_n & (2*ETHR_SIZEOF_PTR)))
+#  define ETHR_GCC_ATOMIC_DW_ATOMIC_H__
+#  define ETHR_INCLUDE_DW_ATOMIC_IMPL__
 #endif
 
 #ifdef ETHR_INCLUDE_DW_ATOMIC_IMPL__
 #  define ETHR_HAVE_NATIVE_SU_DW_ATOMIC
-#  define ETHR_NATIVE_DW_ATOMIC_IMPL "gcc"
 
-#  if defined(__i386__) || defined(__x86_64__)
-/*
- * If ETHR_RTCHK_USE_NATIVE_DW_ATOMIC_IMPL__ is defined, it will be used
- * at runtime in order to determine if native or fallback implementation
- * should be used.
- */
-#    define ETHR_RTCHK_USE_NATIVE_DW_ATOMIC_IMPL__ \
-       ETHR_X86_RUNTIME_CONF_HAVE_DW_CMPXCHG__
-#  endif
+#if ((ETHR_HAVE___sync_val_compare_and_swap & (2*ETHR_SIZEOF_PTR))	\
+     && (ETHR_HAVE___atomic_compare_exchange_n & (2*ETHR_SIZEOF_PTR)))
+#  define ETHR_NATIVE_DW_ATOMIC_IMPL "gcc_atomic_and_sync_builtins"
+#elif (ETHR_HAVE___atomic_compare_exchange_n & (2*ETHR_SIZEOF_PTR))
+#  define ETHR_NATIVE_DW_ATOMIC_IMPL "gcc_atomic_builtins"
+#elif (ETHR_HAVE___sync_val_compare_and_swap & (2*ETHR_SIZEOF_PTR))
+#  define ETHR_NATIVE_DW_ATOMIC_IMPL "gcc_sync_builtins"
+#else
+#  error "!?"
+#endif
 
 #  if ETHR_SIZEOF_PTR == 4
 #    define ETHR_DW_NATMC_ALIGN_MASK__ 0x7
@@ -89,15 +93,138 @@ typedef union {
 #    define ETHR_DW_DBG_ALIGNED__(PTR)
 #  endif
 
-#define ETHR_HAVE_ETHR_NATIVE_DW_ATOMIC_ADDR
+
+#define ETHR_HAVE_ETHR_NATIVE_DW_ATOMIC_ADDR 1
+
 static ETHR_INLINE ethr_sint_t *
 ethr_native_dw_atomic_addr(ethr_native_dw_atomic_t *var)
 {
     return (ethr_sint_t *) ETHR_DW_NATMC_MEM__(var);
 }
 
+#if (ETHR_HAVE___atomic_store_n & (2*ETHR_SIZEOF_PTR))
+
+#if (ETHR_GCC_RELAXED_VERSIONS__ & (2*ETHR_SIZEOF_PTR))
+
+#define ETHR_HAVE_ETHR_NATIVE_SU_DW_ATOMIC_SET 1
+
+static ETHR_INLINE void
+ethr_native_su_dw_atomic_set(ethr_native_dw_atomic_t *var,
+			     ETHR_NATIVE_SU_DW_SINT_T value)
+{
+    ethr_native_dw_ptr_t p = (ethr_native_dw_ptr_t) ETHR_DW_NATMC_MEM__(var);
+    ETHR_DW_DBG_ALIGNED__(p);
+    __atomic_store_n(p, value, __ATOMIC_RELAXED);
+}
+
+#endif /* ETHR_GCC_RELAXED_VERSIONS__ */
+
+#if (ETHR_GCC_RELB_VERSIONS__ & (2*ETHR_SIZEOF_PTR))
 
-#define ETHR_HAVE_ETHR_NATIVE_SU_DW_ATOMIC_CMPXCHG_MB
+#define ETHR_HAVE_ETHR_NATIVE_SU_DW_ATOMIC_SET_RELB 1
+
+static ETHR_INLINE void
+ethr_native_su_dw_atomic_set_relb(ethr_native_dw_atomic_t *var,
+				  ETHR_NATIVE_SU_DW_SINT_T value)
+{
+    ethr_native_dw_ptr_t p = (ethr_native_dw_ptr_t) ETHR_DW_NATMC_MEM__(var);
+    ETHR_DW_DBG_ALIGNED__(p);
+    __atomic_store_n(p, value, __ATOMIC_RELEASE);
+}
+
+#endif /* ETHR_GCC_RELB_VERSIONS__ */
+
+#endif /* ETHR_HAVE___atomic_store_n */
+
+#if (ETHR_HAVE___atomic_load_n & (2*ETHR_SIZEOF_PTR))
+
+#if (ETHR_GCC_RELAXED_VERSIONS__ & (2*ETHR_SIZEOF_PTR))
+
+#define ETHR_HAVE_ETHR_NATIVE_SU_DW_ATOMIC_READ 1
+
+static ETHR_INLINE ETHR_NATIVE_SU_DW_SINT_T
+ethr_native_su_dw_atomic_read(ethr_native_dw_atomic_t *var)
+{
+    ethr_native_dw_ptr_t p = (ethr_native_dw_ptr_t) ETHR_DW_NATMC_MEM__(var);
+    ETHR_DW_DBG_ALIGNED__(p);
+    return __atomic_load_n(p, __ATOMIC_RELAXED);
+}
+
+#endif /* ETHR_GCC_RELAXED_VERSIONS__ */
+
+#if ((ETHR_GCC_ACQB_VERSIONS__ & (2*ETHR_SIZEOF_PTR))	\
+     & ~ETHR___atomic_load_ACQUIRE_barrier_bug)
+
+#define ETHR_HAVE_ETHR_NATIVE_SU_DW_ATOMIC_READ_ACQB 1
+
+static ETHR_INLINE ETHR_NATIVE_SU_DW_SINT_T
+ethr_native_su_dw_atomic_read_acqb(ethr_native_dw_atomic_t *var)
+{
+    ethr_native_dw_ptr_t p = (ethr_native_dw_ptr_t) ETHR_DW_NATMC_MEM__(var);
+    ETHR_DW_DBG_ALIGNED__(p);
+    return __atomic_load_n(p, __ATOMIC_ACQUIRE);
+}
+
+#endif /* ETHR_GCC_ACQB_VERSIONS__ */
+
+#endif /* ETHR_HAVE___atomic_load_n */
+
+#if (ETHR_HAVE___atomic_compare_exchange_n & (2*ETHR_SIZEOF_PTR))
+
+#if (ETHR_GCC_RELAXED_MOD_VERSIONS__ & (2*ETHR_SIZEOF_PTR))
+
+#define ETHR_HAVE_ETHR_NATIVE_SU_DW_ATOMIC_CMPXCHG 1
+
+static ETHR_INLINE ETHR_NATIVE_SU_DW_SINT_T
+ethr_native_su_dw_atomic_cmpxchg(ethr_native_dw_atomic_t *var,
+				 ETHR_NATIVE_SU_DW_SINT_T new,
+				 ETHR_NATIVE_SU_DW_SINT_T exp)
+{
+    ethr_native_dw_ptr_t p = (ethr_native_dw_ptr_t) ETHR_DW_NATMC_MEM__(var);
+    ETHR_NATIVE_SU_DW_SINT_T xchg = exp;
+    ETHR_DW_DBG_ALIGNED__(p);
+    if (__atomic_compare_exchange_n(p,
+				    &xchg,
+				    new,
+				    0,
+				    __ATOMIC_RELAXED,
+				    __ATOMIC_RELAXED))
+	return exp;
+    return xchg;
+}
+
+#endif /* ETHR_GCC_RELAXED_MOD_VERSIONS__ */
+
+#if (ETHR_GCC_ACQB_MOD_VERSIONS__ & (2*ETHR_SIZEOF_PTR))
+
+#define ETHR_HAVE_ETHR_NATIVE_SU_DW_ATOMIC_CMPXCHG_ACQB 1
+
+static ETHR_INLINE ETHR_NATIVE_SU_DW_SINT_T
+ethr_native_su_dw_atomic_cmpxchg_acqb(ethr_native_dw_atomic_t *var,
+				      ETHR_NATIVE_SU_DW_SINT_T new,
+				      ETHR_NATIVE_SU_DW_SINT_T exp)
+{
+    ethr_native_dw_ptr_t p = (ethr_native_dw_ptr_t) ETHR_DW_NATMC_MEM__(var);
+    ETHR_NATIVE_SU_DW_SINT_T xchg = exp;
+    ETHR_DW_DBG_ALIGNED__(p);
+    if (__atomic_compare_exchange_n(p,
+				    &xchg,
+				    new,
+				    0,
+				    __ATOMIC_ACQUIRE,
+				    __ATOMIC_ACQUIRE))
+	return exp;
+    return xchg;
+}
+
+#endif /* ETHR_GCC_ACQB_MOD_VERSIONS__ */
+
+#endif /* ETHR_HAVE___atomic_compare_exchange_n */
+
+#if ((ETHR_HAVE___sync_val_compare_and_swap & (2*ETHR_SIZEOF_PTR)) \
+     & ETHR_GCC_MB_MOD_VERSIONS__)
+
+#define ETHR_HAVE_ETHR_NATIVE_SU_DW_ATOMIC_CMPXCHG_MB 1
 
 static ETHR_INLINE ETHR_NATIVE_SU_DW_SINT_T
 ethr_native_su_dw_atomic_cmpxchg_mb(ethr_native_dw_atomic_t *var,
@@ -109,7 +236,8 @@ ethr_native_su_dw_atomic_cmpxchg_mb(ethr_native_dw_atomic_t *var,
     return __sync_val_compare_and_swap(p, old, new);
 }
 
-#endif /* ETHR_TRY_INLINE_FUNCS */
+#endif /* ETHR_HAVE___sync_val_compare_and_swap */
 
-#endif /* ETHR_GCC_DW_ATOMIC_H__ */
+#endif /* ETHR_TRY_INLINE_FUNCS */
 
+#endif /* ETHR_INCLUDE_DW_ATOMIC_IMPL__ */
diff --git a/erts/include/internal/gcc/ethr_membar.h b/erts/include/internal/gcc/ethr_membar.h
index 7d428fc68e..d2d36907f3 100644
--- a/erts/include/internal/gcc/ethr_membar.h
+++ b/erts/include/internal/gcc/ethr_membar.h
@@ -1,7 +1,7 @@
 /*
  * %CopyrightBegin%
  *
- * Copyright Ericsson AB 2011. All Rights Reserved.
+ * Copyright Ericsson AB 2011-2015. All Rights Reserved.
  *
  * The contents of this file are subject to the Erlang Public License,
  * Version 1.1, (the "License"); you may not use this file except in
@@ -18,56 +18,196 @@
  */
 
 /*
- * Description: Memory barriers when using gcc's builtins
+ * Description: Memory barriers when using gcc's __atomic and
+ *              __sync builtins
  * Author: Rickard Green
+ *
+ * Note: The C11 memory model implemented by gcc's __atomic
+ *       builtins does not match the ethread API very well.
+ *
+ *       A function with a barrier postfix in the ethread atomic
+ *       API needs to ensure that all stores and loads are
+ *       ordered around it according to the semantics of the
+ *       barrier specified.
+ *
+ *       The C11 aproch is different. The __atomic builtins
+ *       API takes a memory model parameter. Assuming that all
+ *       memory syncronizations using the involved atomic
+ *       variables are made using this API, the synchronizations
+ *       will adhere to the memory models used. That is, you do
+ *       *not* know how loads and stores will be ordered around
+ *       a specific __atomic operation in the general case. You
+ *       only know the total effect of the combination of
+ *       operations issued will adhere to the model.
+ *
+ *       This limits how we can use the __atomic builtins. What
+ *       we cannot use:
+ *
+ *       1. We cannot rely on __atomic_thread_fence() to issue
+ *          any specific memory barriers at all. This regardless
+ *          of memory model parameter passed. That is, we cannot
+ *          use the __atomic_thread_fence() builtin at all.
+ *
+ *          Why is this? If all __atomic builtins accessing
+ *          memory issue memory barriers, __atomic_thread_fence()
+ *          does not have to issue memory barriers. The
+ *          implementation for the Itanium architecture is an
+ *          example of this. Even using the __ATOMIC_RELAXED
+ *          memory model all __atomic builtins accessing memory
+ *          will issue memory barriers. Due to this no memory
+ *          barriers at all will be issued by
+ *           __atomic_thread_fence() using either one of the
+ *          __ATOMIC_CONSUME, __ATOMIC_ACQUIRE, or
+ *          __ATOMIC_RELEASE memory models.
+ *
+ *       2. We cannot rely on any __atomic builtin with the
+ *          __ATOMIC_SEQ_CST memory model parameters to
+ *          issue any specific memory barriers. That is, we
+ *          cannot use these memory models at all.
+ *
+ *          Why is this? Since all synchronizations is expected
+ *          to be made using the __atomic builtins, memory
+ *          barriers only have to be issued by some of them,
+ *          and you do not know which ones wont issue memory
+ *          barriers.
+ *
+ *          One can easily be fooled into believing that when
+ *          using the __ATOMIC_SEQ_CST memory model on all
+ *          operations, all operations will issue full memory
+ *          barriers. This is however not the case. The
+ *          implementation for the x86_64 architecture is an
+ *          example of this. Since all operations except loads
+ *          issue full memory barriers, no memory barriers at
+ *          all is issued by loads. This could also be
+ *          implemented by issuing a full memory barrier on
+ *          loads, but no barrier at all on stores.
+ *
+ *       What can be used then?
+ *       1. All (legacy) __sync builtins implying full memory
+ *          barriers issued.
+ *       2. All __atomic builtins using the __ATOMIC_RELAXED
+ *          memory model can, of course, be used. This since
+ *          no ordering guarantees at all are made.
+ *       3. All __atomic builtins accessing memory using the
+ *          __ATOMIC_ACQUIRE and __ATOMIC_RELEASE memory
+ *          models. This since an __atomic builtin memory
+ *          access using the __ATOMIC_ACQUIRE must at least
+ *          issue an aquire memory barrier and an __atomic
+ *          builtin memory acess with the __ATOMIC_RELEASE
+ *          memory model must at least issue a release memory
+ *          barrier. Otherwise the two can not be paired.
+ *       4. All __atomic builtins accessing memory using the
+ *          __ATOMIC_CONSUME builtin can be used for the same
+ *          reason __ATOMIC_ACQUIRE can be used. The ethread
+ *          atomic framework implementing the ethread API
+ *          using native implementations does not expect the
+ *          native implementations to produce versions with
+ *          data dependent read barriers, so until the
+ *          framework is changed we haven't got any use for
+ *          for it.
+ *
+ *       For some architectures we have our own memory barrier
+ *       implementations. We prefer to use these since they
+ *       should be as fine grained as possible. For other
+ *       architectures we use the __sync_synchronize() builtin
+ *       which issue a full memory barrier. For these
+ *       architectures we have to assume that all loads and
+ *       stores can be reordered without limitation. That is,
+ *       unnecessary memory barriers will be issued if such
+ *       reordering actually cannot occur.
  */
 
-#ifndef ETHR_GCC_MEMBAR_H__
-#define ETHR_GCC_MEMBAR_H__
+/*
+ * We prefer to use our own memory barrier implementation if
+ * such exist instead of using __sync_synchronize()...
+ */
+#if defined(__i386__) || defined(__x86_64__)
+#  include "../i386/ethr_membar.h"
+#elif defined(__sparc__)
+#  include "../sparc32/ethr_membar.h"
+#elif defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__)
+#  include "../ppc32/ethr_membar.h"
+#elif !defined(ETHR_GCC_ATOMIC_MEMBAR_H__)			\
+    && (ETHR_HAVE_GCC_ASM_ARM_DMB_INSTRUCTION			\
+	|| ETHR_HAVE___sync_synchronize				\
+	|| (ETHR_HAVE___sync_val_compare_and_swap & 12))
+#define ETHR_GCC_ATOMIC_MEMBAR_H__
 
 #define ETHR_LoadLoad	(1 << 0)
 #define ETHR_LoadStore	(1 << 1)
 #define ETHR_StoreLoad	(1 << 2)
 #define ETHR_StoreStore	(1 << 3)
 
+#define ETHR_COMPILER_BARRIER __asm__ __volatile__("" : : : "memory")
+
+#if ETHR_HAVE_GCC_ASM_ARM_DMB_INSTRUCTION
+
+static __inline__ __attribute__((__always_inline__)) void
+ethr_full_fence__(void)
+{
+    __asm__ __volatile__("dmb sy" : : : "memory");
+}
+
+static __inline__ __attribute__((__always_inline__)) void
+ethr_store_fence__(void)
+{
+    __asm__ __volatile__("dmb st" : : : "memory");
+}
+
+#define ETHR_MEMBAR(B) \
+ ETHR_CHOOSE_EXPR((B) == ETHR_StoreStore, ethr_store_fence__(), ethr_full_fence__())
+
+#elif ETHR_HAVE___sync_synchronize
+
+static __inline__ __attribute__((__always_inline__)) void
+ethr_full_fence__(void)
+{
+    /*
+     * The compiler barriers are here to fix missing clobbers
+     * in __sync_synchronize() when using buggy LLVM
+     * implementation of __sync_synchronize(). They
+     * do not introduce any unnecessary overhead when used
+     * here, so we use them for all systems.
+     */
+    ETHR_COMPILER_BARRIER;
+    __sync_synchronize();
+    ETHR_COMPILER_BARRIER;
+}
+
+#else /* !ETHR_HAVE___sync_synchronize */
+
 /*
- * According to the documentation __sync_synchronize() will
- * issue a full memory barrier. However, __sync_synchronize()
- * is known to erroneously be a noop on at least some
- * platforms with some gcc versions. This has suposedly been
- * fixed in some gcc version, but we don't know from which
- * version. Therefore, we only use it when it has been
- * verified to work. Otherwise we use the workaround
- * below.
+ * Buggy __sync_synchronize(); call __sync_val_compare_and_swap()
+ * instead which imply a full memory barrier (and hope that one
+ * isn't buggy too).
  */
 
-#if defined(ETHR_HAVE___SYNC_VAL_COMPARE_AND_SWAP32)
+#if (ETHR_HAVE___sync_val_compare_and_swap & 4)
 #  define ETHR_MB_T__ ethr_sint32_t
-#elif defined(ETHR_HAVE___SYNC_VAL_COMPARE_AND_SWAP64)
+#elif (ETHR_HAVE___sync_val_compare_and_swap & 8)
 #  define ETHR_MB_T__ ethr_sint64_t
-#else
-#  error "No __sync_val_compare_and_swap"
 #endif
-#define ETHR_SYNC_SYNCHRONIZE_WORKAROUND__ \
-do { \
-    volatile ETHR_MB_T__ x___ = 0; \
-    (void) __sync_val_compare_and_swap(&x___, (ETHR_MB_T__) 0, (ETHR_MB_T__) 1); \
-} while (0)
 
-#define ETHR_COMPILER_BARRIER __asm__ __volatile__("" : : : "memory")
+static __inline__ __attribute__((__always_inline__)) void
+ethr_full_fence__(void)
+{
+    volatile ETHR_MB_T__ x = 0;
+    (void) __sync_val_compare_and_swap(&x, (ETHR_MB_T__) 0, (ETHR_MB_T__) 1);
+}
 
-#if defined(__mips__) && ETHR_AT_LEAST_GCC_VSN__(4, 2, 0)
-#  define ETHR_MEMBAR(B) __sync_synchronize()
-#  define ETHR_READ_DEPEND_MEMORY_BARRIER __sync_synchronize()
-#elif ((defined(__powerpc__) || defined(__ppc__)) \
-       && ETHR_AT_LEAST_GCC_VSN__(4, 1, 2))
-#  define ETHR_MEMBAR(B) __sync_synchronize()
-#else /* Use workaround */
-#  define ETHR_MEMBAR(B) \
-     ETHR_SYNC_SYNCHRONIZE_WORKAROUND__
-#  define ETHR_READ_DEPEND_MEMORY_BARRIER \
-     ETHR_SYNC_SYNCHRONIZE_WORKAROUND__
+#endif /* !ETHR_HAVE___sync_synchronize */
+
+#ifndef ETHR_MEMBAR
+#  define ETHR_MEMBAR(B) ethr_full_fence__()
 #endif
 
+/*
+ * Define ETHR_READ_DEPEND_MEMORY_BARRIER for all architechtures
+ * not known to order data dependent loads
+ */
+
+#if !defined(__ia64__) && !defined(__arm__)
+#  define ETHR_READ_DEPEND_MEMORY_BARRIER ETHR_MEMBAR(ETHR_LoadLoad)
+#endif
 
-#endif /* ETHR_GCC_MEMBAR_H__ */
+#endif /* ETHR_GCC_ATOMIC_MEMBAR_H__ */
diff --git a/erts/include/internal/gcc/ethread.h b/erts/include/internal/gcc/ethread.h
index 365a3535cf..be3e1da90e 100644
--- a/erts/include/internal/gcc/ethread.h
+++ b/erts/include/internal/gcc/ethread.h
@@ -1,7 +1,7 @@
 /*
  * %CopyrightBegin%
  *
- * Copyright Ericsson AB 2010-2011. All Rights Reserved.
+ * Copyright Ericsson AB 2010-2015. All Rights Reserved.
  *
  * The contents of this file are subject to the Erlang Public License,
  * Version 1.1, (the "License"); you may not use this file except in
@@ -18,20 +18,292 @@
  */
 
 /*
- * Description: Native atomic ethread support when using gcc
+ * Description: Native atomic ethread support when using gcc's __atomic
+ *              and __sync builtins
  * Author: Rickard Green
  */
 
-#ifndef ETHREAD_GCC_H__
-#define ETHREAD_GCC_H__
-
-#if defined(ETHR_HAVE___SYNC_VAL_COMPARE_AND_SWAP32) \
-     || defined(ETHR_HAVE___SYNC_VAL_COMPARE_AND_SWAP64)
+#if !defined(ETHREAD_GCC_NATIVE_H__) && ETHR_GCC_COMPILER
+#define ETHREAD_GCC_NATIVE_H__
 
 #ifndef ETHR_MEMBAR
 #  include "ethr_membar.h"
 #endif
 
+#define ETHR_GCC_VERSIONS_MASK__ 28
+
+#undef ETHR_GCC_VOLATILE_STORE_IS_ATOMIC_STORE__
+#undef ETHR_GCC_VOLATILE_STORE_IS_ATOMIC_STORE_RELB__
+#undef ETHR_GCC_VOLATILE_LOAD_IS_ATOMIC_LOAD__
+#undef ETHR_GCC_VOLATILE_LOAD_IS_ATOMIC_LOAD_ACQB__
+#undef ETHR_GCC_RELAXED_VERSIONS__
+#undef ETHR_GCC_RELAXED_MOD_VERSIONS__
+#undef ETHR_GCC_ACQB_VERSIONS__
+#undef ETHR_GCC_ACQB_MOD_VERSIONS__
+#undef ETHR_GCC_RELB_VERSIONS__
+#undef ETHR_GCC_RELB_MOD_VERSIONS__
+#undef ETHR_GCC_MB_MOD_VERSIONS__
+
+/*
+ * True GNU GCCs before version 4.8 do not emit a memory barrier
+ * after the load in the __atomic_load_n(_, __ATOMIC_ACQUIRE)
+ * case (which is needed on most architectures).
+ */
+#undef ETHR___atomic_load_ACQUIRE_barrier_bug
+#if ETHR_GCC_COMPILER != ETHR_GCC_COMPILER_TRUE
+/*
+ * A gcc compatible compiler. We have no information
+ * about the existence of this bug, but we assume
+ * that it is not impossible that it could have
+ * been "inherited". Therefore, until we are certain
+ * that the bug does not exist, we assume that it
+ * does.
+ */
+#  define ETHR___atomic_load_ACQUIRE_barrier_bug ETHR_GCC_VERSIONS_MASK__
+#elif !ETHR_AT_LEAST_GCC_VSN__(4, 8, 0)
+/* True gcc of version < 4.8, i.e., bug exist... */
+#  define ETHR___atomic_load_ACQUIRE_barrier_bug ETHR_GCC_VERSIONS_MASK__
+#else /* True gcc of version >= 4.8 */
+/*
+ * Sizes less than or equal to word size have been fixed,
+ * but double word size has not been fixed.
+ */
+#  if ETHR_SIZEOF_PTR == 8
+#    define ETHR___atomic_load_ACQUIRE_barrier_bug \
+    (~(8|4) & ETHR_GCC_VERSIONS_MASK__)
+#  elif ETHR_SIZEOF_PTR == 4
+#    define ETHR___atomic_load_ACQUIRE_barrier_bug \
+    (~4 & ETHR_GCC_VERSIONS_MASK__)
+#  else
+#    error word size not supported
+#  endif
+#endif
+
+#define ETHR_GCC_VOLATILE_STORE_IS_ATOMIC_STORE__ 0
+#define ETHR_GCC_VOLATILE_STORE_IS_ATOMIC_STORE_RELB__ 0
+#define ETHR_GCC_VOLATILE_LOAD_IS_ATOMIC_LOAD__ 0
+#define ETHR_GCC_VOLATILE_LOAD_IS_ATOMIC_LOAD_ACQB__ 0
+#define ETHR_GCC_RELAXED_VERSIONS__ ETHR_GCC_VERSIONS_MASK__
+#define ETHR_GCC_RELAXED_MOD_VERSIONS__ ETHR_GCC_VERSIONS_MASK__
+
+#if ETHR_TRUST_GCC_ATOMIC_BUILTINS_MEMORY_BARRIERS
+#  define ETHR_GCC_ACQB_VERSIONS__ ETHR_GCC_VERSIONS_MASK__
+#  define ETHR_GCC_ACQB_MOD_VERSIONS__ ETHR_GCC_VERSIONS_MASK__
+#  define ETHR_GCC_RELB_VERSIONS__ ETHR_GCC_VERSIONS_MASK__
+#  define ETHR_GCC_RELB_MOD_VERSIONS__ ETHR_GCC_VERSIONS_MASK__
+#else
+/*
+ * This is currently the default (on most platforms) since
+ * we've seen too many memory barrier bugs produced by gcc...
+ */
+#  define ETHR_GCC_ACQB_VERSIONS__ 0
+#  define ETHR_GCC_ACQB_MOD_VERSIONS__ 0
+#  define ETHR_GCC_RELB_VERSIONS__ 0
+#  define ETHR_GCC_RELB_MOD_VERSIONS__ 0
+#endif
+/*
+ * In the general case we do not want any full barrier versions
+ * if we can implement more relaxed ones (using __atomic_* builtins).
+ * This since the implementations normally need extra memory barrier
+ * instructions to implement these. The x86/x86_64 implementations
+ * are an exception see below.
+ */
+#define ETHR_GCC_MB_MOD_VERSIONS__ \
+    (ETHR_GCC_VERSIONS_MASK__ & ~ETHR_HAVE___atomic_compare_exchange_n)
+
+#if ETHR_SIZEOF_PTR == 8
+#  define ETHR_GCC_VOLATILE_BIT_MASK__ 12
+#elif ETHR_SIZEOF_PTR == 4
+#  define ETHR_GCC_VOLATILE_BIT_MASK__ 4
+#endif
+
+#if defined(__i386__) || defined(__x86_64__) || defined(__sparc__)	\
+    || defined(__powerpc__) || defined(__ppc__) || defined(__mips__)	\
+    || defined(__alpha__) || defined(__ia64__)
+
+/*
+ * Aligned volatile stores and loads of data smaller
+ * than or equal to word size are atomic...
+ */
+#  undef ETHR_GCC_VOLATILE_STORE_IS_ATOMIC_STORE__
+#  define ETHR_GCC_VOLATILE_STORE_IS_ATOMIC_STORE__ ETHR_GCC_VOLATILE_BIT_MASK__
+#  undef ETHR_GCC_VOLATILE_LOAD_IS_ATOMIC_LOAD__
+#  define ETHR_GCC_VOLATILE_LOAD_IS_ATOMIC_LOAD__ ETHR_GCC_VOLATILE_BIT_MASK__
+
+#elif defined(__arm__)
+
+/* volatile stores are problematic on some machines... */
+#  undef ETHR_GCC_VOLATILE_LOAD_IS_ATOMIC_LOAD__
+#  define ETHR_GCC_VOLATILE_LOAD_IS_ATOMIC_LOAD__ ETHR_GCC_VOLATILE_BIT_MASK__
+
+#endif
+
+#if defined(__ia64__)
+
+/* Volatile stores produce stores with release barriers. */
+#  undef ETHR_GCC_VOLATILE_STORE_IS_ATOMIC_STORE_RELB__
+#  define ETHR_GCC_VOLATILE_STORE_IS_ATOMIC_STORE_RELB__ ETHR_GCC_VOLATILE_BIT_MASK__
+
+/* Volatile loads produce loads with acquire barrier. */
+#  undef ETHR_GCC_VOLATILE_LOAD_IS_ATOMIC_LOAD_ACQB__
+#  define ETHR_GCC_VOLATILE_LOAD_IS_ATOMIC_LOAD_ACQB__ ETHR_GCC_VOLATILE_BIT_MASK__
+
+/*
+ * We trust gcc to produce acquire/release barriers on itanium.
+ * Since all atomic ops also have at least acquire or release
+ * barriers (also when passed the relaxed memory model) it
+ * would be very inefficient not to use these as native
+ * barriers on Itanium.
+ */
+#  undef ETHR_GCC_ACQB_VERSIONS__
+#  define ETHR_GCC_ACQB_VERSIONS__ ETHR_GCC_VERSIONS_MASK__
+#  undef ETHR_GCC_ACQB_MOD_VERSIONS__
+#  define ETHR_GCC_ACQB_MOD_VERSIONS__ ETHR_GCC_VERSIONS_MASK__
+#  undef ETHR_GCC_RELB_VERSIONS__
+#  define ETHR_GCC_RELB_VERSIONS__ ETHR_GCC_VERSIONS_MASK__
+#  undef ETHR_GCC_RELB_MOD_VERSIONS__
+#  define ETHR_GCC_RELB_MOD_VERSIONS__ ETHR_GCC_VERSIONS_MASK__
+
+/*
+ * Itanium is not effected by the load acquire
+ * bug since the barrier is part of the instruction
+ * on Itanium (ld.acq), and not a separate instruction
+ * as on most platforms.
+ */
+#  undef ETHR___atomic_load_ACQUIRE_barrier_bug
+#  define ETHR___atomic_load_ACQUIRE_barrier_bug 0
+
+/*
+ * No point exposing relaxed versions since they are
+ * implemended using either acquire or release
+ * barriers.
+ */
+#  undef ETHR_GCC_RELAXED_VERSIONS__
+#  define ETHR_GCC_RELAXED_VERSIONS__ 0
+
+/* #endif defined(__ia64__) */
+#elif defined(__i386__) || defined(__x86_64__)
+
+/*
+ * Want full barrier versions of all modification
+ * operations since all of these are implemented
+ * using locked instructions implying full memory
+ * barriers.
+ */
+#  undef ETHR_GCC_MB_MOD_VERSIONS__
+#  define ETHR_GCC_MB_MOD_VERSIONS__ ETHR_HAVE___sync_val_compare_and_swap
+
+/*
+ * No point exposing acquire/release versions
+ * when we got full memory barrier versions
+ * of modification operations since all of these
+ * are implemented using locked instructions
+ * implying full memory barriers.
+ */
+#  if ETHR_GCC_ACQB_MOD_VERSIONS__
+#    undef ETHR_GCC_ACQB_MOD_VERSIONS__
+#    define ETHR_GCC_ACQB_MOD_VERSIONS__ \
+    (ETHR_GCC_VERSIONS_MASK__ & ~ETHR_HAVE___sync_val_compare_and_swap)
+#  endif
+#  if ETHR_GCC_RELB_MOD_VERSIONS__
+#    undef ETHR_GCC_RELB_MOD_VERSIONS__
+#    define ETHR_GCC_RELB_MOD_VERSIONS__ \
+    (ETHR_GCC_VERSIONS_MASK__ & ~ETHR_HAVE___sync_val_compare_and_swap)
+#  endif
+
+#  ifdef ETHR_X86_OUT_OF_ORDER
+
+/* See above... */
+#    undef ETHR_GCC_RELAXED_MOD_VERSIONS__
+#    define ETHR_GCC_RELAXED_MOD_VERSIONS__ 0
+
+#  else /* !ETHR_X86_OUT_OF_ORDER, i.e., we don't use any x86-OOO instructions... */
+
+/*
+ * Not effected by the load acquire barrier bug,
+ * since no barrier at all is needed for a load
+ * acquire...
+ */
+#    undef ETHR___atomic_load_ACQUIRE_barrier_bug
+#    define ETHR___atomic_load_ACQUIRE_barrier_bug 0
+
+/* Stores imply release barriers semantics. */
+#    undef ETHR_GCC_VOLATILE_STORE_IS_ATOMIC_STORE_RELB__
+#    define ETHR_GCC_VOLATILE_STORE_IS_ATOMIC_STORE_RELB__ ETHR_GCC_VOLATILE_BIT_MASK__
+
+/* Loads imply acquire barrier semantics. */
+#    undef ETHR_GCC_VOLATILE_LOAD_IS_ATOMIC_LOAD_ACQB__
+#    define ETHR_GCC_VOLATILE_LOAD_IS_ATOMIC_LOAD_ACQB__ ETHR_GCC_VOLATILE_BIT_MASK__
+
+/*
+ * Trust load acquire and store release for sizes
+ * where volatile operation implies these barrier
+ * semantics since no barriers are needed.
+ */
+#    if !ETHR_GCC_ACQB_VERSIONS__
+#      undef ETHR_GCC_ACQB_VERSIONS__
+#      define ETHR_GCC_ACQB_VERSIONS__ ETHR_GCC_VOLATILE_BIT_MASK__
+#    endif
+#    if !ETHR_GCC_RELB_VERSIONS__
+#      undef ETHR_GCC_RELB_VERSIONS__
+#      define ETHR_GCC_RELB_VERSIONS__ ETHR_GCC_VOLATILE_BIT_MASK__
+#    endif
+
+/*
+ * No point exposing relaxed versions at all since
+ * all mod operations are implemented with locked
+ * instructions implying full memory barriers and
+ * volatile store and load imply release and
+ * acquire barrier semantics.
+ */
+#    undef ETHR_GCC_RELAXED_VERSIONS__
+#    define ETHR_GCC_RELAXED_VERSIONS__ 0
+
+#  endif /* !ETHR_X86_OUT_OF_ORDER */
+
+/* #endif defined(__i386__) || defined(__x86_64__) */
+#elif defined(__powerpc__) || defined(__ppc__)
+
+#  if !defined(ETHR_PPC_HAVE_LWSYNC)
+/*
+ * Release barriers are typically implemented using
+ * the lwsync instruction. We want our runtime
+ * configure test to determine if the lwsync
+ * instruction is available on the system or not
+ * before we use it. Therefore, do not implement any
+ * native ops using the __ATOMIC_RELEASE model.
+ */
+#    undef ETHR_GCC_RELB_VERSIONS__
+#    define ETHR_GCC_RELB_VERSIONS__ 0
+#    if defined(ETHR_GCC_IMPLEMENT_ACQB_USING_LWSYNC)
+/*
+ * Acquire barriers are usually implemented by other means
+ * than lwsync, but can be implemented using lwsync. Define
+ * ETHR_GCC_IMPLEMENT_ACQB_USING_LWSYNC if acquire barriers
+ * are implemented using lwsync.
+ */
+#      undef ETHR_GCC_ACQB_VERSIONS__
+#      define ETHR_GCC_ACQB_VERSIONS__ 0
+#    endif
+#  endif
+
+#endif /* defined(__powerpc__) || defined(__ppc__) */
+
+#if !ETHR_GCC_RELAXED_VERSIONS__
+#  undef ETHR_GCC_RELAXED_MOD_VERSIONS__
+#  define ETHR_GCC_RELAXED_MOD_VERSIONS__ 0
+#endif
+
+#if !ETHR_GCC_ACQB_VERSIONS__
+#  undef ETHR_GCC_ACQB_MOD_VERSIONS__
+#  define ETHR_GCC_ACQB_MOD_VERSIONS__ 0
+#endif
+
+#if !ETHR_GCC_RELB_VERSIONS__
+#  undef ETHR_GCC_RELB_MOD_VERSIONS__
+#  define ETHR_GCC_RELB_MOD_VERSIONS__ 0
+#endif
+
 #if !defined(ETHR_HAVE_NATIVE_ATOMIC32)
 #  define ETHR_ATOMIC_WANT_32BIT_IMPL__
 #  include "ethr_atomic.h"
@@ -42,12 +314,51 @@
 #  include "ethr_atomic.h"
 #endif
 
+#if defined(__x86_64__)
+/*
+ * No instructions available for native implementation
+ * of these for dw-atomics...
+ */
+#  undef ETHR_GCC_RELAXED_VERSIONS__
+#  define ETHR_GCC_RELAXED_VERSIONS__ 0
+#  undef ETHR_GCC_ACQB_VERSIONS__
+#  define ETHR_GCC_ACQB_VERSIONS__ 0
+#  undef ETHR_GCC_RELB_VERSIONS__
+#  define ETHR_GCC_RELB_VERSIONS__ 0
+#endif
+
+#if !ETHR_GCC_RELAXED_VERSIONS__
+#  undef ETHR_GCC_RELAXED_MOD_VERSIONS__
+#  define ETHR_GCC_RELAXED_MOD_VERSIONS__ 0
+#endif
+
+#if !ETHR_GCC_ACQB_VERSIONS__
+#  undef ETHR_GCC_ACQB_MOD_VERSIONS__
+#  define ETHR_GCC_ACQB_MOD_VERSIONS__ 0
+#endif
+
+#if !ETHR_GCC_RELB_VERSIONS__
+#  undef ETHR_GCC_RELB_MOD_VERSIONS__
+#  define ETHR_GCC_RELB_MOD_VERSIONS__ 0
+#endif
+
 #if (!defined(ETHR_HAVE_NATIVE_DW_ATOMIC) \
      && !(ETHR_SIZEOF_PTR == 4 && defined(ETHR_HAVE_NATIVE_ATOMIC64)) \
      && !(ETHR_SIZEOF_PTR == 8 && defined(ETHR_HAVE_NATIVE_ATOMIC128)))
 #  include "ethr_dw_atomic.h"
 #endif
 
-#endif
+#undef ETHR___atomic_load_ACQUIRE_barrier_bug
+#undef ETHR_GCC_VOLATILE_STORE_IS_ATOMIC_STORE__
+#undef ETHR_GCC_VOLATILE_STORE_IS_ATOMIC_STORE_RELB__
+#undef ETHR_GCC_VOLATILE_LOAD_IS_ATOMIC_LOAD__
+#undef ETHR_GCC_VOLATILE_LOAD_IS_ATOMIC_LOAD_ACQB__
+#undef ETHR_GCC_RELAXED_VERSIONS__
+#undef ETHR_GCC_RELB_VERSIONS__
+#undef ETHR_GCC_RELB_VERSIONS__
+#undef ETHR_GCC_RELAXED_MOD_VERSIONS__
+#undef ETHR_GCC_ACQB_MOD_VERSIONS__
+#undef ETHR_GCC_RELB_MOD_VERSIONS__
+#undef ETHR_GCC_MB_MOD_VERSIONS__
 
-#endif
+#endif /* ETHREAD_GCC_NATIVE_H__ */
author	Rickard Green <[email protected]>	2015-01-05 11:04:34 +0100
committer	Rickard Green <[email protected]>	2015-01-14 20:24:45 +0100
commit	24fa075b5c0d54f2035a2ff510a82aa19187eda4 (patch)
tree	2e26371bbcf360ae53f75c6bad1799aa375c1b72 /erts/include
parent	ce73c38b10d1dee5209b505ef054b108e747b522 (diff)
download	otp-24fa075b5c0d54f2035a2ff510a82aa19187eda4.tar.gz otp-24fa075b5c0d54f2035a2ff510a82aa19187eda4.tar.bz2 otp-24fa075b5c0d54f2035a2ff510a82aa19187eda4.zip