diff options
Diffstat (limited to 'erts')
-rw-r--r-- | erts/aclocal.m4 | 304 | ||||
-rw-r--r-- | erts/configure.in | 25 | ||||
-rw-r--r-- | erts/include/internal/ethread.h | 5 | ||||
-rw-r--r-- | erts/include/internal/ethread_header_config.h.in | 68 | ||||
-rw-r--r-- | erts/include/internal/ethread_inline.h | 23 | ||||
-rw-r--r-- | erts/include/internal/gcc/ethr_atomic.h | 477 | ||||
-rw-r--r-- | erts/include/internal/gcc/ethr_dw_atomic.h | 178 | ||||
-rw-r--r-- | erts/include/internal/gcc/ethr_membar.h | 208 | ||||
-rw-r--r-- | erts/include/internal/gcc/ethread.h | 329 |
9 files changed, 1404 insertions, 213 deletions
diff --git a/erts/aclocal.m4 b/erts/aclocal.m4 index f2c6b7ccd0..b2d30449cd 100644 --- a/erts/aclocal.m4 +++ b/erts/aclocal.m4 @@ -1,7 +1,7 @@ dnl dnl %CopyrightBegin% dnl -dnl Copyright Ericsson AB 1998-2013. All Rights Reserved. +dnl Copyright Ericsson AB 1998-2015. All Rights Reserved. dnl dnl The contents of this file are subject to the Erlang Public License, dnl Version 1.1, (the "License"); you may not use this file except in @@ -1019,24 +1019,226 @@ AC_SUBST(ERTS_INTERNAL_X_LIBS) ]) -AC_DEFUN(ETHR_CHK_SYNC_OP, +AC_DEFUN(ETHR_CHK_GCC_ATOMIC_OP__, [ - AC_MSG_CHECKING([for $3-bit $1()]) - case "$2" in - "1") sync_call="$1(&var);";; - "2") sync_call="$1(&var, ($4) 0);";; - "3") sync_call="$1(&var, ($4) 0, ($4) 0);";; + # $1 - atomic_op + + for atomic_bit_size in 32 64 128; do + case $atomic_bit_size in + 32) gcc_atomic_type="$gcc_atomic_type32";; + 64) gcc_atomic_type="$gcc_atomic_type64";; + 128) gcc_atomic_type="$gcc_atomic_type128";; + esac + gcc_atomic_lockfree="int x[[(2*__atomic_always_lock_free(sizeof($gcc_atomic_type), 0))-1]]" + case $1 in + __sync_add_and_fetch | __sync_fetch_and_and | __sync_fetch_and_or) + atomic_call="volatile $gcc_atomic_type var; $gcc_atomic_type res = $1(&var, ($gcc_atomic_type) 0);" + ;; + __sync_val_compare_and_swap) + atomic_call="volatile $gcc_atomic_type var; $gcc_atomic_type res = $1(&var, ($gcc_atomic_type) 0, ($gcc_atomic_type) 0);" + ;; + __atomic_store_n) + atomic_call="$gcc_atomic_lockfree; volatile $gcc_atomic_type var; $1(&var, ($gcc_atomic_type) 0, __ATOMIC_RELAXED); $1(&var, ($gcc_atomic_type) 0, __ATOMIC_RELEASE);" + ;; + __atomic_load_n) + atomic_call="$gcc_atomic_lockfree; volatile $gcc_atomic_type var; $gcc_atomic_type res = $1(&var, __ATOMIC_RELAXED); res = $1(&var, __ATOMIC_ACQUIRE);" + ;; + __atomic_add_fetch| __atomic_fetch_and | __atomic_fetch_or) + atomic_call="$gcc_atomic_lockfree; volatile $gcc_atomic_type var; $gcc_atomic_type res = $1(&var, ($gcc_atomic_type) 0, __ATOMIC_RELAXED); res = $1(&var, ($gcc_atomic_type) 0, __ATOMIC_ACQUIRE); res = $1(&var, ($gcc_atomic_type) 0, __ATOMIC_RELEASE);" + ;; + __atomic_compare_exchange_n) + atomic_call="$gcc_atomic_lockfree; volatile $gcc_atomic_type var; $gcc_atomic_type val; int res = $1(&var, &val, ($gcc_atomic_type) 0, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); res = $1(&var, &val, ($gcc_atomic_type) 0, 0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE);" + ;; + *) + AC_MSG_ERROR([Internal error: missing implementation for $1]) + ;; + esac + eval atomic${atomic_bit_size}_call=\"$atomic_call\" + done + + AC_CACHE_CHECK([for 32-bit $1()], ethr_cv_32bit_$1, + [ + ethr_cv_32bit_$1=no + AC_TRY_LINK([], [$atomic32_call], [ethr_cv_32bit_$1=yes]) + ]) + AC_CACHE_CHECK([for 64-bit $1()], ethr_cv_64bit_$1, + [ + ethr_cv_64bit_$1=no + AC_TRY_LINK([], [$atomic64_call], [ethr_cv_64bit_$1=yes]) + ]) + AC_CACHE_CHECK([for 128-bit $1()], ethr_cv_128bit_$1, + [ + ethr_cv_128bit_$1=no + AC_TRY_LINK([], [$atomic128_call], [ethr_cv_128bit_$1=yes]) + ]) + + case $ethr_cv_128bit_$1-$ethr_cv_64bit_$1-$ethr_cv_32bit_$1 in + no-no-no) + have_atomic_ops=0;; + no-no-yes) + have_atomic_ops=4;; + no-yes-no) + have_atomic_ops=8;; + no-yes-yes) + have_atomic_ops=12;; + yes-no-no) + have_atomic_ops=16;; + yes-no-yes) + have_atomic_ops=20;; + yes-yes-no) + have_atomic_ops=24;; + yes-yes-yes) + have_atomic_ops=28;; + esac + AC_DEFINE_UNQUOTED([ETHR_HAVE_$1], [$have_atomic_ops], [Define as a bitmask corresponding to the word sizes that $1() can handle on your system]) +]) + +AC_DEFUN(ETHR_CHK_IF_NOOP, +[ + ethr_test_filename="chk_if_$1$3_noop_config1test.$$" + cat > "${ethr_test_filename}.c" <<EOF +int +my_test(void) +{ + $1$2; + return 0; +} +EOF + $CC -O3 $ETHR_DEFS -c "${ethr_test_filename}.c" -o "${ethr_test_filename}1.o" + cat > "${ethr_test_filename}.c" <<EOF +int +my_test(void) +{ + ; + return 0; +} +EOF + $CC -O3 $ETHR_DEFS -c "${ethr_test_filename}.c" -o "${ethr_test_filename}2.o" + if diff "${ethr_test_filename}1.o" "${ethr_test_filename}2.o" >/dev/null 2>&1; then + ethr_$1$3_noop=yes + else + ethr_$1$3_noop=no + fi + rm -f "${ethr_test_filename}.c" "${ethr_test_filename}1.o" "${ethr_test_filename}2.o" +]) + +AC_DEFUN(ETHR_CHK_GCC_ATOMIC_OPS, +[ + AC_CHECK_SIZEOF(short) + AC_CHECK_SIZEOF(int) + AC_CHECK_SIZEOF(long) + AC_CHECK_SIZEOF(long long) + AC_CHECK_SIZEOF(__int128_t) + + if test "$ac_cv_sizeof_short" = "4"; then + gcc_atomic_type32="short" + elif test "$ac_cv_sizeof_int" = "4"; then + gcc_atomic_type32="int" + elif test "$ac_cv_sizeof_long" = "4"; then + gcc_atomic_type32="long" + else + AC_MSG_ERROR([No 32-bit type found]) + fi + + if test "$ac_cv_sizeof_int" = "8"; then + gcc_atomic_type64="int" + elif test "$ac_cv_sizeof_long" = "8"; then + gcc_atomic_type64="long" + elif test "$ac_cv_sizeof_long_long" = "8"; then + gcc_atomic_type64="long long" + else + AC_MSG_ERROR([No 64-bit type found]) + fi + + if test "$ac_cv_sizeof___int128_t" = "16"; then + gcc_atomic_type128="__int128_t" + else + gcc_atomic_type128="#error " + fi + AC_CACHE_CHECK([for a working __sync_synchronize()], ethr_cv___sync_synchronize, + [ + ethr_cv___sync_synchronize=no + AC_TRY_LINK([], + [ __sync_synchronize(); ], + [ethr_cv___sync_synchronize=yes]) + if test $ethr_cv___sync_synchronize = yes; then + # + # Old gcc versions on at least x86 have a buggy + # __sync_synchronize() which does not emit a + # memory barrier. We try to detect this by + # compiling to assembly with and without + # __sync_synchronize() and compare the results. + # + ETHR_CHK_IF_NOOP(__sync_synchronize, [()], []) + if test $ethr___sync_synchronize_noop = yes; then + # Got a buggy implementation of + # __sync_synchronize... + ethr_cv___sync_synchronize="no; buggy implementation" + fi + fi + ]) + + if test "$ethr_cv___sync_synchronize" = "yes"; then + have_sync_synchronize_value="~0" + else + have_sync_synchronize_value="0" + fi + AC_DEFINE_UNQUOTED([ETHR_HAVE___sync_synchronize], [$have_sync_synchronize_value], [Define as a bitmask corresponding to the word sizes that __sync_synchronize() can handle on your system]) + + ETHR_CHK_GCC_ATOMIC_OP__(__sync_add_and_fetch) + ETHR_CHK_GCC_ATOMIC_OP__(__sync_fetch_and_and) + ETHR_CHK_GCC_ATOMIC_OP__(__sync_fetch_and_or) + ETHR_CHK_GCC_ATOMIC_OP__(__sync_val_compare_and_swap) + + ETHR_CHK_GCC_ATOMIC_OP__(__atomic_store_n) + ETHR_CHK_GCC_ATOMIC_OP__(__atomic_load_n) + ETHR_CHK_GCC_ATOMIC_OP__(__atomic_add_fetch) + ETHR_CHK_GCC_ATOMIC_OP__(__atomic_fetch_and) + ETHR_CHK_GCC_ATOMIC_OP__(__atomic_fetch_or) + ETHR_CHK_GCC_ATOMIC_OP__(__atomic_compare_exchange_n) + + ethr_have_gcc_native_atomics=no + ethr_arm_dbm_instr_val=0 + case "$GCC-$host_cpu" in + yes-arm*) + AC_CACHE_CHECK([for ARM DMB instruction], ethr_cv_arm_dbm_instr, + [ + ethr_cv_arm_dbm_instr=no + AC_TRY_LINK([], + [ + __asm__ __volatile__("dmb sy" : : : "memory"); + __asm__ __volatile__("dmb st" : : : "memory"); + ], + [ethr_cv_arm_dbm_instr=yes]) + ]) + if test $ethr_cv_arm_dbm_instr = yes; then + ethr_arm_dbm_instr_val=1 + test $ethr_cv_64bit___atomic_compare_exchange_n = yes && + ethr_have_gcc_native_atomics=yes + fi;; + *) + ;; esac - have_sync_op=no - AC_TRY_LINK([], - [ - $4 res; - volatile $4 var; - res = $sync_call - ], - [have_sync_op=yes]) - test $have_sync_op = yes && $5 - AC_MSG_RESULT([$have_sync_op]) + AC_DEFINE_UNQUOTED([ETHR_HAVE_GCC_ASM_ARM_DMB_INSTRUCTION], [$ethr_arm_dbm_instr_val], [Define as a boolean indicating whether you have a gcc compatible compiler capable of generating the ARM DMB instruction, and are compiling for an ARM processor with ARM DMB instruction support, or not]) + test $ethr_cv_32bit___sync_val_compare_and_swap = yes && + ethr_have_gcc_native_atomics=yes + test $ethr_cv_64bit___sync_val_compare_and_swap = yes && + ethr_have_gcc_native_atomics=yes + if test "$ethr_cv___sync_synchronize" = "yes"; then + test $ethr_cv_64bit___atomic_compare_exchange_n = yes && + ethr_have_gcc_native_atomics=yes + test $ethr_cv_32bit___atomic_compare_exchange_n = yes && + ethr_have_gcc_native_atomics=yes + fi + ethr_have_gcc_atomic_builtins=0 + if test $ethr_have_gcc_native_atomics = yes; then + ethr_native_atomic_implementation=gcc_sync + test $ethr_cv_32bit___atomic_compare_exchange_n = yes && ethr_have_gcc_atomic_builtins=1 + test $ethr_cv_64bit___atomic_compare_exchange_n = yes && ethr_have_gcc_atomic_builtins=1 + test $ethr_have_gcc_atomic_builtins = 1 && ethr_native_atomic_implementation=gcc_atomic_sync + fi + AC_DEFINE_UNQUOTED([ETHR_HAVE_GCC___ATOMIC_BUILTINS], [$ethr_have_gcc_atomic_builtins], [Define as a boolean indicating whether you have a gcc __atomic builtins or not]) + test $ethr_have_gcc_native_atomics = yes && ethr_have_native_atomics=yes ]) AC_DEFUN(ETHR_CHK_INTERLOCKED, @@ -1116,6 +1318,16 @@ AC_ARG_ENABLE(prefer-gcc-native-ethr-impls, test $enable_prefer_gcc_native_ethr_impls = yes && AC_DEFINE(ETHR_PREFER_GCC_NATIVE_IMPLS, 1, [Define if you prefer gcc native ethread implementations]) +AC_ARG_ENABLE(trust-gcc-atomic-builtins-memory-barriers, + AS_HELP_STRING([--enable-trust-gcc-atomic-builtins-memory-barriers], + [trust gcc atomic builtins memory barriers]), +[ case "$enableval" in + yes) trust_gcc_atomic_builtins_mbs=1 ;; + *) trust_gcc_atomic_builtins_mbs=0 ;; + esac ], trust_gcc_atomic_builtins_mbs=0) + +AC_DEFINE_UNQUOTED(ETHR_TRUST_GCC_ATOMIC_BUILTINS_MEMORY_BARRIERS, [$trust_gcc_atomic_builtins_mbs], [Define as a boolean indicating whether you trust gcc's __atomic_* builtins memory barrier implementations, or not]) + AC_ARG_WITH(libatomic_ops, AS_HELP_STRING([--with-libatomic_ops=PATH], [specify and prefer usage of libatomic_ops in the ethread library])) @@ -1147,6 +1359,7 @@ if test "x$erl_monotonic_clock_id" != "x"; then AC_DEFINE_UNQUOTED(ETHR_MONOTONIC_CLOCK_ID, [$erl_monotonic_clock_id], [Define to the monotonic clock id to use]) fi +ethr_native_atomic_implementation=none ethr_have_native_atomics=no ethr_have_native_spinlock=no ETHR_THR_LIB_BASE="$THR_LIB_NAME" @@ -1231,7 +1444,10 @@ case "$THR_LIB_NAME" in ETHR_CHK_INTERLOCKED([_InterlockedCompareExchange128], [4], [__int64], AC_DEFINE_UNQUOTED(ETHR_HAVE__INTERLOCKEDCOMPAREEXCHANGE128, 1, [Define if you have _InterlockedCompareExchange128()])) fi - test "$ethr_have_native_atomics" = "yes" && ethr_have_native_spinlock=yes + if test "$ethr_have_native_atomics" = "yes"; then + ethr_native_atomic_implementation=windows + ethr_have_native_spinlock=yes + fi ;; pthread|ose_threads) @@ -1575,54 +1791,11 @@ case "$THR_LIB_NAME" in fi ## test "x$THR_LIB_NAME" = "xpthread" - AC_CHECK_SIZEOF(int) - AC_CHECK_SIZEOF(long) - AC_CHECK_SIZEOF(long long) - AC_CHECK_SIZEOF(__int128_t) - - if test "$ac_cv_sizeof_int" = "4"; then - int32="int" - elif test "$ac_cv_sizeof_long" = "4"; then - int32="long" - elif test "$ac_cv_sizeof_long_long" = "4"; then - int32="long long" - else - AC_MSG_ERROR([No 32-bit type found]) - fi - - if test "$ac_cv_sizeof_int" = "8"; then - int64="int" - elif test "$ac_cv_sizeof_long" = "8"; then - int64="long" - elif test "$ac_cv_sizeof_long_long" = "8"; then - int64="long long" - else - AC_MSG_ERROR([No 64-bit type found]) - fi - - int128=no - if test "$ac_cv_sizeof___int128_t" = "16"; then - int128="__int128_t" - fi - if test "X$disable_native_ethr_impls" = "Xyes"; then ethr_have_native_atomics=no else - ETHR_CHK_SYNC_OP([__sync_val_compare_and_swap], [3], [32], [$int32], AC_DEFINE(ETHR_HAVE___SYNC_VAL_COMPARE_AND_SWAP32, 1, [Define if you have __sync_val_compare_and_swap() for 32-bit integers])) - test "$have_sync_op" = "yes" && ethr_have_native_atomics=yes - ETHR_CHK_SYNC_OP([__sync_add_and_fetch], [2], [32], [$int32], AC_DEFINE(ETHR_HAVE___SYNC_ADD_AND_FETCH32, 1, [Define if you have __sync_add_and_fetch() for 32-bit integers])) - ETHR_CHK_SYNC_OP([__sync_fetch_and_and], [2], [32], [$int32], AC_DEFINE(ETHR_HAVE___SYNC_FETCH_AND_AND32, 1, [Define if you have __sync_fetch_and_and() for 32-bit integers])) - ETHR_CHK_SYNC_OP([__sync_fetch_and_or], [2], [32], [$int32], AC_DEFINE(ETHR_HAVE___SYNC_FETCH_AND_OR32, 1, [Define if you have __sync_fetch_and_or() for 32-bit integers])) - - ETHR_CHK_SYNC_OP([__sync_val_compare_and_swap], [3], [64], [$int64], AC_DEFINE(ETHR_HAVE___SYNC_VAL_COMPARE_AND_SWAP64, 1, [Define if you have __sync_val_compare_and_swap() for 64-bit integers])) - test "$have_sync_op" = "yes" && ethr_have_native_atomics=yes - ETHR_CHK_SYNC_OP([__sync_add_and_fetch], [2], [64], [$int64], AC_DEFINE(ETHR_HAVE___SYNC_ADD_AND_FETCH64, 1, [Define if you have __sync_add_and_fetch() for 64-bit integers])) - ETHR_CHK_SYNC_OP([__sync_fetch_and_and], [2], [64], [$int64], AC_DEFINE(ETHR_HAVE___SYNC_FETCH_AND_AND64, 1, [Define if you have __sync_fetch_and_and() for 64-bit integers])) - ETHR_CHK_SYNC_OP([__sync_fetch_and_or], [2], [64], [$int64], AC_DEFINE(ETHR_HAVE___SYNC_FETCH_AND_OR64, 1, [Define if you have __sync_fetch_and_or() for 64-bit integers])) - - if test $int128 != no; then - ETHR_CHK_SYNC_OP([__sync_val_compare_and_swap], [3], [128], [$int128], AC_DEFINE(ETHR_HAVE___SYNC_VAL_COMPARE_AND_SWAP128, 1, [Define if you have __sync_val_compare_and_swap() for 128-bit integers])) - fi + + ETHR_CHK_GCC_ATOMIC_OPS([]) AC_MSG_CHECKING([for a usable libatomic_ops implementation]) case "x$with_libatomic_ops" in @@ -1672,6 +1845,7 @@ case "$THR_LIB_NAME" in #endif ], [ethr_have_native_atomics=yes + ethr_native_atomic_implementation=libatomic_ops ethr_have_libatomic_ops=yes]) AC_MSG_RESULT([$ethr_have_libatomic_ops]) if test $ethr_have_libatomic_ops = yes; then @@ -1703,15 +1877,19 @@ case "$THR_LIB_NAME" in *) AC_MSG_ERROR([Unsupported Sparc memory order: $with_sparc_memory_order]);; esac + ethr_native_atomic_implementation=ethread ethr_have_native_atomics=yes;; i86pc | i*86 | x86_64 | amd64) if test "$enable_x86_out_of_order" = "yes"; then AC_DEFINE(ETHR_X86_OUT_OF_ORDER, 1, [Define if x86/x86_64 out of order instructions should be synchronized]) fi + ethr_native_atomic_implementation=ethread ethr_have_native_atomics=yes;; macppc | ppc | powerpc | "Power Macintosh") + ethr_native_atomic_implementation=ethread ethr_have_native_atomics=yes;; tile) + ethr_native_atomic_implementation=ethread ethr_have_native_atomics=yes;; *) ;; diff --git a/erts/configure.in b/erts/configure.in index f248cbe34d..481dfe405e 100644 --- a/erts/configure.in +++ b/erts/configure.in @@ -2,7 +2,7 @@ dnl Process this file with autoconf to produce a configure script. -*-m4-*- dnl %CopyrightBegin% dnl -dnl Copyright Ericsson AB 1997-2014. All Rights Reserved. +dnl Copyright Ericsson AB 1997-2015. All Rights Reserved. dnl dnl The contents of this file are subject to the Erlang Public License, dnl Version 1.1, (the "License"); you may not use this file except in @@ -1105,10 +1105,31 @@ if test $ERTS_BUILD_SMP_EMU = yes; then case "$ethr_have_native_atomics-$smp_require_native_atomics-$ethr_have_native_spinlock" in yes-*) + if test "$ethr_native_atomic_implementation" = "gcc_sync"; then + test -f "$ERL_TOP/erts/CONF_INFO" || + echo "" > "$ERL_TOP/erts/CONF_INFO" + cat >> $ERL_TOP/erts/CONF_INFO <<EOF + + WARNING: + Only gcc's __sync_* builtins available for + atomic memory access. This will cause lots + of expensive and unnecessary memory barrier + instructions to be issued which will make + the performance of the runtime system + suffer. You are *strongly* advised to + upgrade to a gcc version that supports the + __atomic_* builtins (at least gcc version + 4.7) or build with libatomic_ops. See the + "Atomic Memory Operations and the VM" + chapter of \$ERL_TOP/HOWTO/INSTALL.md for + more information. + +EOF + fi ;; no-yes-*) - AC_MSG_ERROR([No native atomic implementation found. See Configuring section in INSTALL.md for more information.]) + AC_MSG_ERROR([No native atomic implementation found. See the \"Atomic Memory Operations and the VM\" chapter of \$ERL_TOP/HOWTO/INSTALL.md for more information.]) ;; no-no-yes) diff --git a/erts/include/internal/ethread.h b/erts/include/internal/ethread.h index 0d9a4a4305..e598017ada 100644 --- a/erts/include/internal/ethread.h +++ b/erts/include/internal/ethread.h @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2004-2013. All Rights Reserved. + * Copyright Ericsson AB 2004-2015. All Rights Reserved. * * The contents of this file are subject to the Erlang Public License, * Version 1.1, (the "License"); you may not use this file except in @@ -362,6 +362,9 @@ extern ethr_runtime_t ethr_runtime__; # include "sparc64/ethread.h" # endif # endif +# if ETHR_HAVE_GCC___ATOMIC_BUILTINS +# include "gcc/ethread.h" +# endif # include "libatomic_ops/ethread.h" # include "gcc/ethread.h" # endif diff --git a/erts/include/internal/ethread_header_config.h.in b/erts/include/internal/ethread_header_config.h.in index 618fcba380..a9727568a2 100644 --- a/erts/include/internal/ethread_header_config.h.in +++ b/erts/include/internal/ethread_header_config.h.in @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2004-2011. All Rights Reserved. + * Copyright Ericsson AB 2004-2015. All Rights Reserved. * * The contents of this file are subject to the Erlang Public License, * Version 1.1, (the "License"); you may not use this file except in @@ -84,32 +84,62 @@ /* Define if run in Sparc RMO, PSO, or TSO mode */ #undef ETHR_SPARC_RMO -/* Define if you have __sync_add_and_fetch() for 32-bit integers */ -#undef ETHR_HAVE___SYNC_ADD_AND_FETCH32 +/* Define as a boolean indicating whether you have a gcc compatible compiler + capable of generating the ARM DMB instruction, and are compiling for an ARM + processor with ARM DMB instruction support, or not */ +#undef ETHR_HAVE_GCC_ASM_ARM_DMB_INSTRUCTION -/* Define if you have __sync_add_and_fetch() for 64-bit integers */ -#undef ETHR_HAVE___SYNC_ADD_AND_FETCH64 +/* Define as a bitmask corresponding to the word sizes that + __sync_synchronize() can handle on your system */ +#undef ETHR_HAVE___sync_synchronize -/* Define if you have __sync_fetch_and_and() for 32-bit integers */ -#undef ETHR_HAVE___SYNC_FETCH_AND_AND32 +/* Define as a bitmask corresponding to the word sizes that + __sync_add_and_fetch() can handle on your system */ +#undef ETHR_HAVE___sync_add_and_fetch -/* Define if you have __sync_fetch_and_and() for 64-bit integers */ -#undef ETHR_HAVE___SYNC_FETCH_AND_AND64 +/* Define as a bitmask corresponding to the word sizes that + __sync_fetch_and_and() can handle on your system */ +#undef ETHR_HAVE___sync_fetch_and_and -/* Define if you have __sync_fetch_and_or() for 32-bit integers */ -#undef ETHR_HAVE___SYNC_FETCH_AND_OR32 +/* Define as a bitmask corresponding to the word sizes that + __sync_fetch_and_or() can handle on your system */ +#undef ETHR_HAVE___sync_fetch_and_or -/* Define if you have __sync_fetch_and_or() for 64-bit integers */ -#undef ETHR_HAVE___SYNC_FETCH_AND_OR64 +/* Define as a bitmask corresponding to the word sizes that + __sync_val_compare_and_swap() can handle on your system */ +#undef ETHR_HAVE___sync_val_compare_and_swap -/* Define if you have __sync_val_compare_and_swap() for 32-bit integers */ -#undef ETHR_HAVE___SYNC_VAL_COMPARE_AND_SWAP32 +/* Define as a boolean indicating whether you have a gcc __atomic builtins or + not */ +#undef ETHR_HAVE_GCC___ATOMIC_BUILTINS -/* Define if you have __sync_val_compare_and_swap() for 64-bit integers */ -#undef ETHR_HAVE___SYNC_VAL_COMPARE_AND_SWAP64 +/* Define as a boolean indicating whether you trust gcc's __atomic_* builtins + memory barrier implementations, or not */ +#undef ETHR_TRUST_GCC_ATOMIC_BUILTINS_MEMORY_BARRIERS -/* Define if you have __sync_val_compare_and_swap() for 128-bit integers */ -#undef ETHR_HAVE___SYNC_VAL_COMPARE_AND_SWAP128 +/* Define as a bitmask corresponding to the word sizes that __atomic_store_n() + can handle on your system */ +#undef ETHR_HAVE___atomic_store_n + +/* Define as a bitmask corresponding to the word sizes that __atomic_load_n() + can handle on your system */ +#undef ETHR_HAVE___atomic_load_n + +/* Define as a bitmask corresponding to the word sizes that + __atomic_add_fetch() can handle on your system */ +#undef ETHR_HAVE___atomic_add_fetch + +/* Define as a bitmask corresponding to the word sizes that + __atomic_fetch_and() can handle on your system */ +#undef ETHR_HAVE___atomic_fetch_and + +/* Define as a bitmask corresponding to the word sizes that + __atomic_fetch_or() can handle on your system */ +#undef ETHR_HAVE___atomic_fetch_or + +/* Define as a bitmask corresponding to the word sizes that + __atomic_compare_exchange_n() can handle on your system */ +#undef ETHR_HAVE___atomic_compare_exchange_n /* Define if you prefer gcc native ethread implementations */ #undef ETHR_PREFER_GCC_NATIVE_IMPLS diff --git a/erts/include/internal/ethread_inline.h b/erts/include/internal/ethread_inline.h index ffb756c84f..c09a67619a 100644 --- a/erts/include/internal/ethread_inline.h +++ b/erts/include/internal/ethread_inline.h @@ -20,6 +20,29 @@ #ifndef ETHREAD_INLINE_H__ #define ETHREAD_INLINE_H__ +#define ETHR_GCC_COMPILER_FALSE 0 /* Not a gcc compatible compiler */ +#define ETHR_GCC_COMPILER_TRUE 1 /* The GNU gcc compiler */ +/* Negative integers for gcc compatible compilers */ +#define ETHR_GCC_COMPILER_CLANG -1 /* The Clang gcc compatible compiler */ +#define ETHR_GCC_COMPILER_ICC -2 /* The Intel gcc compatible compiler */ +/* Line them up... */ + +/* + * Unfortunately there is no easy and certain way of + * detecting a true gcc compiler, since the compatible + * ones all define the same defines as the true gnu-gcc... + */ +#if !defined(__GNUC__) && !defined(__GNUG__) +# define ETHR_GCC_COMPILER ETHR_GCC_COMPILER_FALSE +#elif defined(__clang__) +# define ETHR_GCC_COMPILER ETHR_GCC_COMPILER_CLANG +#elif defined(__ICC) || defined(__INTEL_COMPILER) +# define ETHR_GCC_COMPILER ETHR_GCC_COMPILER_ICC +#else +/* Seems to be the true gnu-gcc... */ +# define ETHR_GCC_COMPILER ETHR_GCC_COMPILER_TRUE +#endif + #if !defined(__GNUC__) # define ETHR_AT_LEAST_GCC_VSN__(MAJ, MIN, PL) 0 #elif !defined(__GNUC_MINOR__) diff --git a/erts/include/internal/gcc/ethr_atomic.h b/erts/include/internal/gcc/ethr_atomic.h index f598f8537b..62eed78f76 100644 --- a/erts/include/internal/gcc/ethr_atomic.h +++ b/erts/include/internal/gcc/ethr_atomic.h @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2010-2011. All Rights Reserved. + * Copyright Ericsson AB 2010-2015. All Rights Reserved. * * The contents of this file are subject to the Erlang Public License, * Version 1.1, (the "License"); you may not use this file except in @@ -18,78 +18,81 @@ */ /* - * Description: Native atomics ethread support using gcc's builtins + * Description: Native atomics ethread support using gcc's __atomic + * and __sync builtins * Author: Rickard Green + * + * Note: The C11 memory model implemented by gcc's __atomic + * builtins does not match the ethread API very well. + * + * Due to this we cannot use the __ATOMIC_SEQ_CST + * memory model. For more information see the comment + * in the begining of ethr_membar.h in this directory. */ #undef ETHR_INCLUDE_ATOMIC_IMPL__ -#if !defined(ETHR_GCC_ATOMIC32_H__) && defined(ETHR_ATOMIC_WANT_32BIT_IMPL__) -#define ETHR_GCC_ATOMIC32_H__ -#if defined(ETHR_HAVE___SYNC_VAL_COMPARE_AND_SWAP32) -# define ETHR_INCLUDE_ATOMIC_IMPL__ 4 -#endif +#if !defined(ETHR_GCC_ATOMIC_ATOMIC32_H__) \ + && defined(ETHR_ATOMIC_WANT_32BIT_IMPL__) \ + && ((ETHR_HAVE___sync_val_compare_and_swap & 4) \ + || (ETHR_HAVE___atomic_compare_exchange_n & 4)) + +#define ETHR_GCC_ATOMIC_ATOMIC32_H__ +#define ETHR_INCLUDE_ATOMIC_IMPL__ 4 #undef ETHR_ATOMIC_WANT_32BIT_IMPL__ -#elif !defined(ETHR_GCC_ATOMIC64_H__) && defined(ETHR_ATOMIC_WANT_64BIT_IMPL__) -#define ETHR_GCC_ATOMIC64_H__ -#if defined(ETHR_HAVE___SYNC_VAL_COMPARE_AND_SWAP64) -# define ETHR_INCLUDE_ATOMIC_IMPL__ 8 -#endif -#undef ETHR_ATOMIC_WANT_64BIT_IMPL__ -#endif -#ifdef ETHR_INCLUDE_ATOMIC_IMPL__ +#elif !defined(ETHR_GCC_ATOMIC64_H__) \ + && defined(ETHR_ATOMIC_WANT_64BIT_IMPL__) \ + && ((ETHR_HAVE___sync_val_compare_and_swap & 8) \ + || (ETHR_HAVE___atomic_compare_exchange_n & 8)) -#ifndef ETHR_GCC_ATOMIC_COMMON__ -#define ETHR_GCC_ATOMIC_COMMON__ +#define ETHR_GCC_ATOMIC64_H__ +#define ETHR_INCLUDE_ATOMIC_IMPL__ 8 +#undef ETHR_ATOMIC_WANT_64BIT_IMPL__ -#define ETHR_READ_AND_SET_WITHOUT_SYNC_OP__ 0 -#if defined(__i386__) || defined(__x86_64__) || defined(__sparc__) \ - || defined(__powerpc__) || defined(__ppc__) || defined(__mips__) -# undef ETHR_READ_AND_SET_WITHOUT_SYNC_OP__ -# define ETHR_READ_AND_SET_WITHOUT_SYNC_OP__ 1 #endif -#endif /* ETHR_GCC_ATOMIC_COMMON__ */ +#ifdef ETHR_INCLUDE_ATOMIC_IMPL__ #if ETHR_INCLUDE_ATOMIC_IMPL__ == 4 #define ETHR_HAVE_NATIVE_ATOMIC32 1 -#define ETHR_NATIVE_ATOMIC32_IMPL "gcc" #define ETHR_NATMC_FUNC__(X) ethr_native_atomic32_ ## X #define ETHR_ATMC_T__ ethr_native_atomic32_t #define ETHR_AINT_T__ ethr_sint32_t -#if defined(ETHR_HAVE___SYNC_ADD_AND_FETCH32) -# define ETHR_HAVE___SYNC_ADD_AND_FETCH -#endif -#if defined(ETHR_HAVE___SYNC_FETCH_AND_AND32) -# define ETHR_HAVE___SYNC_FETCH_AND_AND -#endif -#if defined(ETHR_HAVE___SYNC_FETCH_AND_OR32) -# define ETHR_HAVE___SYNC_FETCH_AND_OR +#if ((ETHR_HAVE___sync_val_compare_and_swap & 4) \ + && (ETHR_HAVE___atomic_compare_exchange_n & 4)) +# define ETHR_NATIVE_ATOMIC32_IMPL "gcc_atomic_and_sync_builtins" +#elif (ETHR_HAVE___atomic_compare_exchange_n & 4) +# define ETHR_NATIVE_ATOMIC32_IMPL "gcc_atomic_builtins" +#elif (ETHR_HAVE___sync_val_compare_and_swap & 4) +# define ETHR_NATIVE_ATOMIC32_IMPL "gcc_sync_builtins" +#else +# error "!?" #endif #elif ETHR_INCLUDE_ATOMIC_IMPL__ == 8 #define ETHR_HAVE_NATIVE_ATOMIC64 1 -#define ETHR_NATIVE_ATOMIC64_IMPL "gcc" #define ETHR_NATMC_FUNC__(X) ethr_native_atomic64_ ## X #define ETHR_ATMC_T__ ethr_native_atomic64_t #define ETHR_AINT_T__ ethr_sint64_t -#if defined(ETHR_HAVE___SYNC_ADD_AND_FETCH64) -# define ETHR_HAVE___SYNC_ADD_AND_FETCH -#endif -#if defined(ETHR_HAVE___SYNC_FETCH_AND_AND64) -# define ETHR_HAVE___SYNC_FETCH_AND_AND -#endif -#if defined(ETHR_HAVE___SYNC_FETCH_AND_OR64) -# define ETHR_HAVE___SYNC_FETCH_AND_OR +#if ((ETHR_HAVE___sync_val_compare_and_swap & 8) \ + && (ETHR_HAVE___atomic_compare_exchange_n & 8)) +# define ETHR_NATIVE_ATOMIC64_IMPL "gcc_atomic_and_sync_builtins" +#elif (ETHR_HAVE___atomic_compare_exchange_n & 8) +# define ETHR_NATIVE_ATOMIC64_IMPL "gcc_atomic_builtins" +#elif (ETHR_HAVE___sync_val_compare_and_swap & 8) +# define ETHR_NATIVE_ATOMIC64_IMPL "gcc_sync_builtins" +#else +# error "!?" #endif #else #error "Unsupported integer size" #endif +#undef ETHR_NATIVE_ATOMIC_IMPL__ + typedef struct { - volatile ETHR_AINT_T__ counter; + volatile ETHR_AINT_T__ value; } ETHR_ATMC_T__; - #if defined(ETHR_TRY_INLINE_FUNCS) || defined(ETHR_ATOMIC_IMPL__) #if ETHR_INCLUDE_ATOMIC_IMPL__ == 4 @@ -98,13 +101,19 @@ typedef struct { # define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_ADDR 1 #endif + static ETHR_INLINE ETHR_AINT_T__ * ETHR_NATMC_FUNC__(addr)(ETHR_ATMC_T__ *var) { - return (ETHR_AINT_T__ *) &var->counter; + return (ETHR_AINT_T__ *) &var->value; } -#if ETHR_READ_AND_SET_WITHOUT_SYNC_OP__ +/* + * set() + */ +#if (ETHR_HAVE___atomic_store_n & ETHR_INCLUDE_ATOMIC_IMPL__) + +#if (ETHR_GCC_RELAXED_VERSIONS__ & ETHR_INCLUDE_ATOMIC_IMPL__) #if ETHR_INCLUDE_ATOMIC_IMPL__ == 4 # define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_SET 1 @@ -115,12 +124,109 @@ ETHR_NATMC_FUNC__(addr)(ETHR_ATMC_T__ *var) static ETHR_INLINE void ETHR_NATMC_FUNC__(set)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ value) { - var->counter = value; + __atomic_store_n(&var->value, value, __ATOMIC_RELAXED); } -#endif /* ETHR_READ_AND_SET_WITHOUT_SYNC_OP__ */ +#endif /* ETHR_GCC_RELAXED_VERSIONS__ */ -#if ETHR_READ_AND_SET_WITHOUT_SYNC_OP__ +#if (ETHR_GCC_RELB_VERSIONS__ & ETHR_INCLUDE_ATOMIC_IMPL__) + +#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4 +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_SET_RELB 1 +#else +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_SET_RELB 1 +#endif + +static ETHR_INLINE void +ETHR_NATMC_FUNC__(set_relb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ value) +{ + __atomic_store_n(&var->value, value, __ATOMIC_RELEASE); +} + +#endif /* ETHR_GCC_RELB_VERSIONS__ */ + +#elif (ETHR_GCC_VOLATILE_STORE_IS_ATOMIC_STORE__ & ETHR_INCLUDE_ATOMIC_IMPL__) + +#if (ETHR_GCC_RELAXED_VERSIONS__ & ETHR_INCLUDE_ATOMIC_IMPL__) + +#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4 +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_SET 1 +#else +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_SET 1 +#endif + +static ETHR_INLINE void +ETHR_NATMC_FUNC__(set)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ value) +{ + var->value = value; +} + +#endif /* ETHR_GCC_RELAXED_VERSIONS__ */ + +#if (ETHR_GCC_VOLATILE_STORE_IS_ATOMIC_STORE_RELB__ & ETHR_INCLUDE_ATOMIC_IMPL__) + +#if (ETHR_GCC_RELB_VERSIONS__ & ETHR_INCLUDE_ATOMIC_IMPL__) + +#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4 +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_SET_RELB 1 +#else +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_SET_RELB 1 +#endif + +static ETHR_INLINE void +ETHR_NATMC_FUNC__(set_relb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ value) +{ + var->value = value; +} + +#endif /* ETHR_GCC_RELB_VERSIONS__ */ + +#endif /* ETHR_GCC_VOLATILE_STORE_IS_ATOMIC_STORE_RELB__ */ + +#endif /* ETHR_GCC_VOLATILE_STORE_IS_ATOMIC_STORE__ */ + +/* + * read() + */ + +#if (ETHR_HAVE___atomic_load_n & ETHR_INCLUDE_ATOMIC_IMPL__) + +#if (ETHR_GCC_RELAXED_VERSIONS__ & ETHR_INCLUDE_ATOMIC_IMPL__) + +#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4 +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_READ 1 +#else +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_READ 1 +#endif + +static ETHR_INLINE ETHR_AINT_T__ +ETHR_NATMC_FUNC__(read)(ETHR_ATMC_T__ *var) +{ + return __atomic_load_n(&var->value, __ATOMIC_RELAXED); +} + +#endif /* ETHR_GCC_RELAXED_VERSIONS__ */ + +#if ((ETHR_GCC_ACQB_VERSIONS__ & ETHR_INCLUDE_ATOMIC_IMPL__) \ + & ~ETHR___atomic_load_ACQUIRE_barrier_bug) + +#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4 +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_READ_ACQB 1 +#else +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_READ_ACQB 1 +#endif + +static ETHR_INLINE ETHR_AINT_T__ +ETHR_NATMC_FUNC__(read_acqb)(ETHR_ATMC_T__ *var) +{ + return __atomic_load_n(&var->value, __ATOMIC_ACQUIRE); +} + +#endif /* ETHR_GCC_ACQB_VERSIONS__ */ + +#elif (ETHR_GCC_VOLATILE_LOAD_IS_ATOMIC_LOAD__ & ETHR_INCLUDE_ATOMIC_IMPL__) + +#if (ETHR_GCC_RELAXED_VERSIONS__ & ETHR_INCLUDE_ATOMIC_IMPL__) #if ETHR_INCLUDE_ATOMIC_IMPL__ == 4 # define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_READ 1 @@ -131,12 +237,90 @@ ETHR_NATMC_FUNC__(set)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ value) static ETHR_INLINE ETHR_AINT_T__ ETHR_NATMC_FUNC__(read)(ETHR_ATMC_T__ *var) { - return var->counter; + return var->value; +} + +#endif /* ETHR_GCC_RELAXED_VERSIONS__ */ + +#if (ETHR_GCC_VOLATILE_LOAD_IS_ATOMIC_LOAD_ACQB__ & ETHR_INCLUDE_ATOMIC_IMPL__) + +#if (ETHR_GCC_ACQB_VERSIONS__ & ETHR_INCLUDE_ATOMIC_IMPL__) + +#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4 +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_READ_ACQB 1 +#else +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_READ_ACQB 1 +#endif + +static ETHR_INLINE ETHR_AINT_T__ +ETHR_NATMC_FUNC__(read_acqb)(ETHR_ATMC_T__ *var) +{ + return var->value; +} + +#endif /* ETHR_GCC_ACQB_VERSIONS__ */ + +#endif /* ETHR_GCC_VOLATILE_LOAD_IS_ATOMIC_LOAD_ACQB__ */ + +#endif /* ETHR_GCC_VOLATILE_LOAD_IS_ATOMIC_LOAD__ */ + +/* + * add_return() + */ +#if (ETHR_HAVE___atomic_add_fetch & ETHR_INCLUDE_ATOMIC_IMPL__) + +#if (ETHR_GCC_RELAXED_MOD_VERSIONS__ & ETHR_INCLUDE_ATOMIC_IMPL__) + +#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4 +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_ADD_RETURN 1 +#else +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_ADD_RETURN 1 +#endif + +static ETHR_INLINE ETHR_AINT_T__ +ETHR_NATMC_FUNC__(add_return)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ incr) +{ + return __atomic_add_fetch(&var->value, incr, __ATOMIC_RELAXED); } -#endif /* ETHR_READ_AND_SET_WITHOUT_SYNC_OP__ */ +#endif /* ETHR_GCC_RELAXED_MOD_VERSIONS__ */ -#if defined(ETHR_HAVE___SYNC_ADD_AND_FETCH) +#if (ETHR_GCC_ACQB_MOD_VERSIONS__ & ETHR_INCLUDE_ATOMIC_IMPL__) + +#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4 +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_ADD_RETURN_ACQB 1 +#else +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_ADD_RETURN_ACQB 1 +#endif + +static ETHR_INLINE ETHR_AINT_T__ +ETHR_NATMC_FUNC__(add_return_acqb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ incr) +{ + return __atomic_add_fetch(&var->value, incr, __ATOMIC_ACQUIRE); +} + +#endif /* ETHR_GCC_ACQB_MOD_VERSIONS__ */ + +#if (ETHR_GCC_RELB_MOD_VERSIONS__ & ETHR_INCLUDE_ATOMIC_IMPL__) + +#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4 +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_ADD_RETURN_RELB 1 +#else +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_ADD_RETURN_RELB 1 +#endif + +static ETHR_INLINE ETHR_AINT_T__ +ETHR_NATMC_FUNC__(add_return_relb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ incr) +{ + return __atomic_add_fetch(&var->value, incr, __ATOMIC_RELEASE); +} + +#endif /* ETHR_GCC_RELB_MOD_VERSIONS__ */ + +#endif /* ETHR_HAVE___atomic_add_fetch */ + +#if ((ETHR_HAVE___sync_add_and_fetch & ETHR_INCLUDE_ATOMIC_IMPL__) \ + & ETHR_GCC_MB_MOD_VERSIONS__) #if ETHR_INCLUDE_ATOMIC_IMPL__ == 4 # define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_ADD_RETURN_MB 1 @@ -147,12 +331,68 @@ ETHR_NATMC_FUNC__(read)(ETHR_ATMC_T__ *var) static ETHR_INLINE ETHR_AINT_T__ ETHR_NATMC_FUNC__(add_return_mb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ incr) { - return __sync_add_and_fetch(&var->counter, incr); + return __sync_add_and_fetch(&var->value, incr); +} + +#endif /* ETHR_HAVE___sync_add_and_fetch */ + +/* + * and_retold() + */ +#if (ETHR_HAVE___atomic_fetch_and & ETHR_INCLUDE_ATOMIC_IMPL__) + +#if (ETHR_GCC_RELAXED_MOD_VERSIONS__ & ETHR_INCLUDE_ATOMIC_IMPL__) + +#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4 +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_AND_RETOLD 1 +#else +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_AND_RETOLD 1 +#endif + +static ETHR_INLINE ETHR_AINT_T__ +ETHR_NATMC_FUNC__(and_retold)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ mask) +{ + return __atomic_fetch_and(&var->value, mask, __ATOMIC_RELAXED); +} + +#endif /* ETHR_GCC_RELAXED_MOD_VERSIONS__ */ + +#if (ETHR_GCC_ACQB_MOD_VERSIONS__ & ETHR_INCLUDE_ATOMIC_IMPL__) + +#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4 +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_AND_RETOLD_ACQB 1 +#else +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_AND_RETOLD_ACQB 1 +#endif + +static ETHR_INLINE ETHR_AINT_T__ +ETHR_NATMC_FUNC__(and_retold_acqb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ mask) +{ + return __atomic_fetch_and(&var->value, mask, __ATOMIC_ACQUIRE); } +#endif /* ETHR_GCC_ACQB_MOD_VERSIONS__ */ + +#if (ETHR_GCC_RELB_MOD_VERSIONS__ & ETHR_INCLUDE_ATOMIC_IMPL__) + +#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4 +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_AND_RETOLD_RELB 1 +#else +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_AND_RETOLD_RELB 1 #endif -#if defined(ETHR_HAVE___SYNC_FETCH_AND_AND) +static ETHR_INLINE ETHR_AINT_T__ +ETHR_NATMC_FUNC__(and_retold_relb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ mask) +{ + return __atomic_fetch_and(&var->value, mask, __ATOMIC_RELEASE); +} + +#endif /* ETHR_GCC_RELB_MOD_VERSIONS__ */ + +#endif /* ETHR_HAVE___atomic_fetch_and */ + +#if ((ETHR_HAVE___sync_fetch_and_and & ETHR_INCLUDE_ATOMIC_IMPL__) \ + & ETHR_GCC_MB_MOD_VERSIONS__) #if ETHR_INCLUDE_ATOMIC_IMPL__ == 4 # define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_AND_RETOLD_MB 1 @@ -163,12 +403,68 @@ ETHR_NATMC_FUNC__(add_return_mb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ incr) static ETHR_INLINE ETHR_AINT_T__ ETHR_NATMC_FUNC__(and_retold_mb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ mask) { - return __sync_fetch_and_and(&var->counter, mask); + return __sync_fetch_and_and(&var->value, mask); +} + +#endif /* ETHR_HAVE___sync_fetch_and_and */ + +/* + * or_retold() + */ +#if (ETHR_HAVE___atomic_fetch_or & ETHR_INCLUDE_ATOMIC_IMPL__) + +#if (ETHR_GCC_RELAXED_MOD_VERSIONS__ & ETHR_INCLUDE_ATOMIC_IMPL__) + +#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4 +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_OR_RETOLD 1 +#else +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_OR_RETOLD 1 +#endif + +static ETHR_INLINE ETHR_AINT_T__ +ETHR_NATMC_FUNC__(or_retold)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ mask) +{ + return __atomic_fetch_or(&var->value, mask, __ATOMIC_RELAXED); +} + +#endif /* ETHR_GCC_RELAXED_MOD_VERSIONS__ */ + +#if (ETHR_GCC_ACQB_MOD_VERSIONS__ & ETHR_INCLUDE_ATOMIC_IMPL__) + +#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4 +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_OR_RETOLD_ACQB 1 +#else +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_OR_RETOLD_ACQB 1 +#endif + +static ETHR_INLINE ETHR_AINT_T__ +ETHR_NATMC_FUNC__(or_retold_acqb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ mask) +{ + return __atomic_fetch_or(&var->value, mask, __ATOMIC_ACQUIRE); } +#endif /* ETHR_GCC_ACQB_MOD_VERSIONS__ */ + +#if (ETHR_GCC_RELB_MOD_VERSIONS__ & ETHR_INCLUDE_ATOMIC_IMPL__) + +#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4 +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_OR_RETOLD_RELB 1 +#else +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_OR_RETOLD_RELB 1 #endif -#if defined(ETHR_HAVE___SYNC_FETCH_AND_OR) +static ETHR_INLINE ETHR_AINT_T__ +ETHR_NATMC_FUNC__(or_retold_relb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ mask) +{ + return __atomic_fetch_or(&var->value, mask, __ATOMIC_RELEASE); +} + +#endif /* ETHR_GCC_RELB_MOD_VERSIONS__ */ + +#endif /* ETHR_HAVE___atomic_fetch_or */ + +#if ((ETHR_HAVE___sync_fetch_and_or & ETHR_INCLUDE_ATOMIC_IMPL__) \ + & ETHR_GCC_MB_MOD_VERSIONS__) #if ETHR_INCLUDE_ATOMIC_IMPL__ == 4 # define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_OR_RETOLD_MB 1 @@ -179,11 +475,73 @@ ETHR_NATMC_FUNC__(and_retold_mb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ mask) static ETHR_INLINE ETHR_AINT_T__ ETHR_NATMC_FUNC__(or_retold_mb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ mask) { - return (ETHR_AINT_T__) __sync_fetch_and_or(&var->counter, mask); + return (ETHR_AINT_T__) __sync_fetch_and_or(&var->value, mask); +} + +#endif /* ETHR_HAVE___sync_fetch_and_or */ + +/* + * cmpxchg() + */ +#if (ETHR_HAVE___atomic_compare_exchange_n & ETHR_INCLUDE_ATOMIC_IMPL__) + +#if (ETHR_GCC_RELAXED_MOD_VERSIONS__ & ETHR_INCLUDE_ATOMIC_IMPL__) + +#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4 +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_CMPXCHG 1 +#else +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_CMPXCHG 1 +#endif + +static ETHR_INLINE ETHR_AINT_T__ +ETHR_NATMC_FUNC__(cmpxchg)(ETHR_ATMC_T__ *var, + ETHR_AINT_T__ new, + ETHR_AINT_T__ exp) +{ + ETHR_AINT_T__ xchg = exp; + if (__atomic_compare_exchange_n(&var->value, + &xchg, + new, + 0, /* No spurious failures, please */ + __ATOMIC_RELAXED, + __ATOMIC_RELAXED)) + return exp; + return xchg; } +#endif /* ETHR_GCC_RELAXED_MOD_VERSIONS__ */ + +#if (ETHR_GCC_ACQB_MOD_VERSIONS__ & ETHR_INCLUDE_ATOMIC_IMPL__) + +#if ETHR_INCLUDE_ATOMIC_IMPL__ == 4 +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_CMPXCHG_ACQB 1 +#else +# define ETHR_HAVE_ETHR_NATIVE_ATOMIC64_CMPXCHG_ACQB 1 #endif +static ETHR_INLINE ETHR_AINT_T__ +ETHR_NATMC_FUNC__(cmpxchg_acqb)(ETHR_ATMC_T__ *var, + ETHR_AINT_T__ new, + ETHR_AINT_T__ exp) +{ + ETHR_AINT_T__ xchg = exp; + if (__atomic_compare_exchange_n(&var->value, + &xchg, + new, + 0, /* No spurious failures, please */ + __ATOMIC_ACQUIRE, + __ATOMIC_ACQUIRE)) + return exp; + return xchg; +} + +#endif /* ETHR_GCC_ACQB_MOD_VERSIONS__ */ + +#endif /* ETHR_HAVE___atomic_compare_exchange_n */ + +#if ((ETHR_HAVE___sync_val_compare_and_swap & ETHR_INCLUDE_ATOMIC_IMPL__) \ + & ETHR_GCC_MB_MOD_VERSIONS__) + #if ETHR_INCLUDE_ATOMIC_IMPL__ == 4 # define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_CMPXCHG_MB 1 #else @@ -195,17 +553,16 @@ ETHR_NATMC_FUNC__(cmpxchg_mb)(ETHR_ATMC_T__ *var, ETHR_AINT_T__ new, ETHR_AINT_T__ old) { - return __sync_val_compare_and_swap(&var->counter, old, new); + return __sync_val_compare_and_swap(&var->value, old, new); } +#endif /* ETHR_HAVE___sync_val_compare_and_swap */ + #endif /* ETHR_TRY_INLINE_FUNCS */ #undef ETHR_NATMC_FUNC__ #undef ETHR_ATMC_T__ #undef ETHR_AINT_T__ #undef ETHR_AINT_SUFFIX__ -#undef ETHR_HAVE___SYNC_ADD_AND_FETCH -#undef ETHR_HAVE___SYNC_FETCH_AND_AND -#undef ETHR_HAVE___SYNC_FETCH_AND_OR #endif diff --git a/erts/include/internal/gcc/ethr_dw_atomic.h b/erts/include/internal/gcc/ethr_dw_atomic.h index 6736f9c547..c2c8f85b7b 100644 --- a/erts/include/internal/gcc/ethr_dw_atomic.h +++ b/erts/include/internal/gcc/ethr_dw_atomic.h @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2011. All Rights Reserved. + * Copyright Ericsson AB 2011-2015. All Rights Reserved. * * The contents of this file are subject to the Erlang Public License, * Version 1.1, (the "License"); you may not use this file except in @@ -18,35 +18,39 @@ */ /* - * Description: Native double word atomics using gcc's builtins + * Description: Native double word atomics using gcc's __atomic + * and __sync builtins * Author: Rickard Green + * + * Note: The C11 memory model implemented by gcc's __atomic + * builtins does not match the ethread API very well. + * + * Due to this we cannot use the __ATOMIC_SEQ_CST + * memory model. For more information see the comment + * in the begining of ethr_membar.h in this directory. */ #undef ETHR_INCLUDE_DW_ATOMIC_IMPL__ -#ifndef ETHR_GCC_DW_ATOMIC_H__ -# define ETHR_GCC_DW_ATOMIC_H__ -# if ((ETHR_SIZEOF_PTR == 4 \ - && defined(ETHR_HAVE___SYNC_VAL_COMPARE_AND_SWAP64)) \ - || (ETHR_SIZEOF_PTR == 8 \ - && defined(ETHR_HAVE___SYNC_VAL_COMPARE_AND_SWAP128) \ - && defined(ETHR_HAVE_INT128_T))) -# define ETHR_INCLUDE_DW_ATOMIC_IMPL__ -# endif +#if !defined(ETHR_GCC_ATOMIC_DW_ATOMIC_H__) \ + && ((ETHR_HAVE___sync_val_compare_and_swap & (2*ETHR_SIZEOF_PTR)) \ + || (ETHR_HAVE___atomic_compare_exchange_n & (2*ETHR_SIZEOF_PTR))) +# define ETHR_GCC_ATOMIC_DW_ATOMIC_H__ +# define ETHR_INCLUDE_DW_ATOMIC_IMPL__ #endif #ifdef ETHR_INCLUDE_DW_ATOMIC_IMPL__ # define ETHR_HAVE_NATIVE_SU_DW_ATOMIC -# define ETHR_NATIVE_DW_ATOMIC_IMPL "gcc" -# if defined(__i386__) || defined(__x86_64__) -/* - * If ETHR_RTCHK_USE_NATIVE_DW_ATOMIC_IMPL__ is defined, it will be used - * at runtime in order to determine if native or fallback implementation - * should be used. - */ -# define ETHR_RTCHK_USE_NATIVE_DW_ATOMIC_IMPL__ \ - ETHR_X86_RUNTIME_CONF_HAVE_DW_CMPXCHG__ -# endif +#if ((ETHR_HAVE___sync_val_compare_and_swap & (2*ETHR_SIZEOF_PTR)) \ + && (ETHR_HAVE___atomic_compare_exchange_n & (2*ETHR_SIZEOF_PTR))) +# define ETHR_NATIVE_DW_ATOMIC_IMPL "gcc_atomic_and_sync_builtins" +#elif (ETHR_HAVE___atomic_compare_exchange_n & (2*ETHR_SIZEOF_PTR)) +# define ETHR_NATIVE_DW_ATOMIC_IMPL "gcc_atomic_builtins" +#elif (ETHR_HAVE___sync_val_compare_and_swap & (2*ETHR_SIZEOF_PTR)) +# define ETHR_NATIVE_DW_ATOMIC_IMPL "gcc_sync_builtins" +#else +# error "!?" +#endif # if ETHR_SIZEOF_PTR == 4 # define ETHR_DW_NATMC_ALIGN_MASK__ 0x7 @@ -89,15 +93,138 @@ typedef union { # define ETHR_DW_DBG_ALIGNED__(PTR) # endif -#define ETHR_HAVE_ETHR_NATIVE_DW_ATOMIC_ADDR + +#define ETHR_HAVE_ETHR_NATIVE_DW_ATOMIC_ADDR 1 + static ETHR_INLINE ethr_sint_t * ethr_native_dw_atomic_addr(ethr_native_dw_atomic_t *var) { return (ethr_sint_t *) ETHR_DW_NATMC_MEM__(var); } +#if (ETHR_HAVE___atomic_store_n & (2*ETHR_SIZEOF_PTR)) + +#if (ETHR_GCC_RELAXED_VERSIONS__ & (2*ETHR_SIZEOF_PTR)) + +#define ETHR_HAVE_ETHR_NATIVE_SU_DW_ATOMIC_SET 1 + +static ETHR_INLINE void +ethr_native_su_dw_atomic_set(ethr_native_dw_atomic_t *var, + ETHR_NATIVE_SU_DW_SINT_T value) +{ + ethr_native_dw_ptr_t p = (ethr_native_dw_ptr_t) ETHR_DW_NATMC_MEM__(var); + ETHR_DW_DBG_ALIGNED__(p); + __atomic_store_n(p, value, __ATOMIC_RELAXED); +} + +#endif /* ETHR_GCC_RELAXED_VERSIONS__ */ + +#if (ETHR_GCC_RELB_VERSIONS__ & (2*ETHR_SIZEOF_PTR)) -#define ETHR_HAVE_ETHR_NATIVE_SU_DW_ATOMIC_CMPXCHG_MB +#define ETHR_HAVE_ETHR_NATIVE_SU_DW_ATOMIC_SET_RELB 1 + +static ETHR_INLINE void +ethr_native_su_dw_atomic_set_relb(ethr_native_dw_atomic_t *var, + ETHR_NATIVE_SU_DW_SINT_T value) +{ + ethr_native_dw_ptr_t p = (ethr_native_dw_ptr_t) ETHR_DW_NATMC_MEM__(var); + ETHR_DW_DBG_ALIGNED__(p); + __atomic_store_n(p, value, __ATOMIC_RELEASE); +} + +#endif /* ETHR_GCC_RELB_VERSIONS__ */ + +#endif /* ETHR_HAVE___atomic_store_n */ + +#if (ETHR_HAVE___atomic_load_n & (2*ETHR_SIZEOF_PTR)) + +#if (ETHR_GCC_RELAXED_VERSIONS__ & (2*ETHR_SIZEOF_PTR)) + +#define ETHR_HAVE_ETHR_NATIVE_SU_DW_ATOMIC_READ 1 + +static ETHR_INLINE ETHR_NATIVE_SU_DW_SINT_T +ethr_native_su_dw_atomic_read(ethr_native_dw_atomic_t *var) +{ + ethr_native_dw_ptr_t p = (ethr_native_dw_ptr_t) ETHR_DW_NATMC_MEM__(var); + ETHR_DW_DBG_ALIGNED__(p); + return __atomic_load_n(p, __ATOMIC_RELAXED); +} + +#endif /* ETHR_GCC_RELAXED_VERSIONS__ */ + +#if ((ETHR_GCC_ACQB_VERSIONS__ & (2*ETHR_SIZEOF_PTR)) \ + & ~ETHR___atomic_load_ACQUIRE_barrier_bug) + +#define ETHR_HAVE_ETHR_NATIVE_SU_DW_ATOMIC_READ_ACQB 1 + +static ETHR_INLINE ETHR_NATIVE_SU_DW_SINT_T +ethr_native_su_dw_atomic_read_acqb(ethr_native_dw_atomic_t *var) +{ + ethr_native_dw_ptr_t p = (ethr_native_dw_ptr_t) ETHR_DW_NATMC_MEM__(var); + ETHR_DW_DBG_ALIGNED__(p); + return __atomic_load_n(p, __ATOMIC_ACQUIRE); +} + +#endif /* ETHR_GCC_ACQB_VERSIONS__ */ + +#endif /* ETHR_HAVE___atomic_load_n */ + +#if (ETHR_HAVE___atomic_compare_exchange_n & (2*ETHR_SIZEOF_PTR)) + +#if (ETHR_GCC_RELAXED_MOD_VERSIONS__ & (2*ETHR_SIZEOF_PTR)) + +#define ETHR_HAVE_ETHR_NATIVE_SU_DW_ATOMIC_CMPXCHG 1 + +static ETHR_INLINE ETHR_NATIVE_SU_DW_SINT_T +ethr_native_su_dw_atomic_cmpxchg(ethr_native_dw_atomic_t *var, + ETHR_NATIVE_SU_DW_SINT_T new, + ETHR_NATIVE_SU_DW_SINT_T exp) +{ + ethr_native_dw_ptr_t p = (ethr_native_dw_ptr_t) ETHR_DW_NATMC_MEM__(var); + ETHR_NATIVE_SU_DW_SINT_T xchg = exp; + ETHR_DW_DBG_ALIGNED__(p); + if (__atomic_compare_exchange_n(p, + &xchg, + new, + 0, + __ATOMIC_RELAXED, + __ATOMIC_RELAXED)) + return exp; + return xchg; +} + +#endif /* ETHR_GCC_RELAXED_MOD_VERSIONS__ */ + +#if (ETHR_GCC_ACQB_MOD_VERSIONS__ & (2*ETHR_SIZEOF_PTR)) + +#define ETHR_HAVE_ETHR_NATIVE_SU_DW_ATOMIC_CMPXCHG_ACQB 1 + +static ETHR_INLINE ETHR_NATIVE_SU_DW_SINT_T +ethr_native_su_dw_atomic_cmpxchg_acqb(ethr_native_dw_atomic_t *var, + ETHR_NATIVE_SU_DW_SINT_T new, + ETHR_NATIVE_SU_DW_SINT_T exp) +{ + ethr_native_dw_ptr_t p = (ethr_native_dw_ptr_t) ETHR_DW_NATMC_MEM__(var); + ETHR_NATIVE_SU_DW_SINT_T xchg = exp; + ETHR_DW_DBG_ALIGNED__(p); + if (__atomic_compare_exchange_n(p, + &xchg, + new, + 0, + __ATOMIC_ACQUIRE, + __ATOMIC_ACQUIRE)) + return exp; + return xchg; +} + +#endif /* ETHR_GCC_ACQB_MOD_VERSIONS__ */ + +#endif /* ETHR_HAVE___atomic_compare_exchange_n */ + +#if ((ETHR_HAVE___sync_val_compare_and_swap & (2*ETHR_SIZEOF_PTR)) \ + & ETHR_GCC_MB_MOD_VERSIONS__) + +#define ETHR_HAVE_ETHR_NATIVE_SU_DW_ATOMIC_CMPXCHG_MB 1 static ETHR_INLINE ETHR_NATIVE_SU_DW_SINT_T ethr_native_su_dw_atomic_cmpxchg_mb(ethr_native_dw_atomic_t *var, @@ -109,7 +236,8 @@ ethr_native_su_dw_atomic_cmpxchg_mb(ethr_native_dw_atomic_t *var, return __sync_val_compare_and_swap(p, old, new); } -#endif /* ETHR_TRY_INLINE_FUNCS */ +#endif /* ETHR_HAVE___sync_val_compare_and_swap */ -#endif /* ETHR_GCC_DW_ATOMIC_H__ */ +#endif /* ETHR_TRY_INLINE_FUNCS */ +#endif /* ETHR_INCLUDE_DW_ATOMIC_IMPL__ */ diff --git a/erts/include/internal/gcc/ethr_membar.h b/erts/include/internal/gcc/ethr_membar.h index 7d428fc68e..d2d36907f3 100644 --- a/erts/include/internal/gcc/ethr_membar.h +++ b/erts/include/internal/gcc/ethr_membar.h @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2011. All Rights Reserved. + * Copyright Ericsson AB 2011-2015. All Rights Reserved. * * The contents of this file are subject to the Erlang Public License, * Version 1.1, (the "License"); you may not use this file except in @@ -18,56 +18,196 @@ */ /* - * Description: Memory barriers when using gcc's builtins + * Description: Memory barriers when using gcc's __atomic and + * __sync builtins * Author: Rickard Green + * + * Note: The C11 memory model implemented by gcc's __atomic + * builtins does not match the ethread API very well. + * + * A function with a barrier postfix in the ethread atomic + * API needs to ensure that all stores and loads are + * ordered around it according to the semantics of the + * barrier specified. + * + * The C11 aproch is different. The __atomic builtins + * API takes a memory model parameter. Assuming that all + * memory syncronizations using the involved atomic + * variables are made using this API, the synchronizations + * will adhere to the memory models used. That is, you do + * *not* know how loads and stores will be ordered around + * a specific __atomic operation in the general case. You + * only know the total effect of the combination of + * operations issued will adhere to the model. + * + * This limits how we can use the __atomic builtins. What + * we cannot use: + * + * 1. We cannot rely on __atomic_thread_fence() to issue + * any specific memory barriers at all. This regardless + * of memory model parameter passed. That is, we cannot + * use the __atomic_thread_fence() builtin at all. + * + * Why is this? If all __atomic builtins accessing + * memory issue memory barriers, __atomic_thread_fence() + * does not have to issue memory barriers. The + * implementation for the Itanium architecture is an + * example of this. Even using the __ATOMIC_RELAXED + * memory model all __atomic builtins accessing memory + * will issue memory barriers. Due to this no memory + * barriers at all will be issued by + * __atomic_thread_fence() using either one of the + * __ATOMIC_CONSUME, __ATOMIC_ACQUIRE, or + * __ATOMIC_RELEASE memory models. + * + * 2. We cannot rely on any __atomic builtin with the + * __ATOMIC_SEQ_CST memory model parameters to + * issue any specific memory barriers. That is, we + * cannot use these memory models at all. + * + * Why is this? Since all synchronizations is expected + * to be made using the __atomic builtins, memory + * barriers only have to be issued by some of them, + * and you do not know which ones wont issue memory + * barriers. + * + * One can easily be fooled into believing that when + * using the __ATOMIC_SEQ_CST memory model on all + * operations, all operations will issue full memory + * barriers. This is however not the case. The + * implementation for the x86_64 architecture is an + * example of this. Since all operations except loads + * issue full memory barriers, no memory barriers at + * all is issued by loads. This could also be + * implemented by issuing a full memory barrier on + * loads, but no barrier at all on stores. + * + * What can be used then? + * 1. All (legacy) __sync builtins implying full memory + * barriers issued. + * 2. All __atomic builtins using the __ATOMIC_RELAXED + * memory model can, of course, be used. This since + * no ordering guarantees at all are made. + * 3. All __atomic builtins accessing memory using the + * __ATOMIC_ACQUIRE and __ATOMIC_RELEASE memory + * models. This since an __atomic builtin memory + * access using the __ATOMIC_ACQUIRE must at least + * issue an aquire memory barrier and an __atomic + * builtin memory acess with the __ATOMIC_RELEASE + * memory model must at least issue a release memory + * barrier. Otherwise the two can not be paired. + * 4. All __atomic builtins accessing memory using the + * __ATOMIC_CONSUME builtin can be used for the same + * reason __ATOMIC_ACQUIRE can be used. The ethread + * atomic framework implementing the ethread API + * using native implementations does not expect the + * native implementations to produce versions with + * data dependent read barriers, so until the + * framework is changed we haven't got any use for + * for it. + * + * For some architectures we have our own memory barrier + * implementations. We prefer to use these since they + * should be as fine grained as possible. For other + * architectures we use the __sync_synchronize() builtin + * which issue a full memory barrier. For these + * architectures we have to assume that all loads and + * stores can be reordered without limitation. That is, + * unnecessary memory barriers will be issued if such + * reordering actually cannot occur. */ -#ifndef ETHR_GCC_MEMBAR_H__ -#define ETHR_GCC_MEMBAR_H__ +/* + * We prefer to use our own memory barrier implementation if + * such exist instead of using __sync_synchronize()... + */ +#if defined(__i386__) || defined(__x86_64__) +# include "../i386/ethr_membar.h" +#elif defined(__sparc__) +# include "../sparc32/ethr_membar.h" +#elif defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__) +# include "../ppc32/ethr_membar.h" +#elif !defined(ETHR_GCC_ATOMIC_MEMBAR_H__) \ + && (ETHR_HAVE_GCC_ASM_ARM_DMB_INSTRUCTION \ + || ETHR_HAVE___sync_synchronize \ + || (ETHR_HAVE___sync_val_compare_and_swap & 12)) +#define ETHR_GCC_ATOMIC_MEMBAR_H__ #define ETHR_LoadLoad (1 << 0) #define ETHR_LoadStore (1 << 1) #define ETHR_StoreLoad (1 << 2) #define ETHR_StoreStore (1 << 3) +#define ETHR_COMPILER_BARRIER __asm__ __volatile__("" : : : "memory") + +#if ETHR_HAVE_GCC_ASM_ARM_DMB_INSTRUCTION + +static __inline__ __attribute__((__always_inline__)) void +ethr_full_fence__(void) +{ + __asm__ __volatile__("dmb sy" : : : "memory"); +} + +static __inline__ __attribute__((__always_inline__)) void +ethr_store_fence__(void) +{ + __asm__ __volatile__("dmb st" : : : "memory"); +} + +#define ETHR_MEMBAR(B) \ + ETHR_CHOOSE_EXPR((B) == ETHR_StoreStore, ethr_store_fence__(), ethr_full_fence__()) + +#elif ETHR_HAVE___sync_synchronize + +static __inline__ __attribute__((__always_inline__)) void +ethr_full_fence__(void) +{ + /* + * The compiler barriers are here to fix missing clobbers + * in __sync_synchronize() when using buggy LLVM + * implementation of __sync_synchronize(). They + * do not introduce any unnecessary overhead when used + * here, so we use them for all systems. + */ + ETHR_COMPILER_BARRIER; + __sync_synchronize(); + ETHR_COMPILER_BARRIER; +} + +#else /* !ETHR_HAVE___sync_synchronize */ + /* - * According to the documentation __sync_synchronize() will - * issue a full memory barrier. However, __sync_synchronize() - * is known to erroneously be a noop on at least some - * platforms with some gcc versions. This has suposedly been - * fixed in some gcc version, but we don't know from which - * version. Therefore, we only use it when it has been - * verified to work. Otherwise we use the workaround - * below. + * Buggy __sync_synchronize(); call __sync_val_compare_and_swap() + * instead which imply a full memory barrier (and hope that one + * isn't buggy too). */ -#if defined(ETHR_HAVE___SYNC_VAL_COMPARE_AND_SWAP32) +#if (ETHR_HAVE___sync_val_compare_and_swap & 4) # define ETHR_MB_T__ ethr_sint32_t -#elif defined(ETHR_HAVE___SYNC_VAL_COMPARE_AND_SWAP64) +#elif (ETHR_HAVE___sync_val_compare_and_swap & 8) # define ETHR_MB_T__ ethr_sint64_t -#else -# error "No __sync_val_compare_and_swap" #endif -#define ETHR_SYNC_SYNCHRONIZE_WORKAROUND__ \ -do { \ - volatile ETHR_MB_T__ x___ = 0; \ - (void) __sync_val_compare_and_swap(&x___, (ETHR_MB_T__) 0, (ETHR_MB_T__) 1); \ -} while (0) -#define ETHR_COMPILER_BARRIER __asm__ __volatile__("" : : : "memory") +static __inline__ __attribute__((__always_inline__)) void +ethr_full_fence__(void) +{ + volatile ETHR_MB_T__ x = 0; + (void) __sync_val_compare_and_swap(&x, (ETHR_MB_T__) 0, (ETHR_MB_T__) 1); +} -#if defined(__mips__) && ETHR_AT_LEAST_GCC_VSN__(4, 2, 0) -# define ETHR_MEMBAR(B) __sync_synchronize() -# define ETHR_READ_DEPEND_MEMORY_BARRIER __sync_synchronize() -#elif ((defined(__powerpc__) || defined(__ppc__)) \ - && ETHR_AT_LEAST_GCC_VSN__(4, 1, 2)) -# define ETHR_MEMBAR(B) __sync_synchronize() -#else /* Use workaround */ -# define ETHR_MEMBAR(B) \ - ETHR_SYNC_SYNCHRONIZE_WORKAROUND__ -# define ETHR_READ_DEPEND_MEMORY_BARRIER \ - ETHR_SYNC_SYNCHRONIZE_WORKAROUND__ +#endif /* !ETHR_HAVE___sync_synchronize */ + +#ifndef ETHR_MEMBAR +# define ETHR_MEMBAR(B) ethr_full_fence__() #endif +/* + * Define ETHR_READ_DEPEND_MEMORY_BARRIER for all architechtures + * not known to order data dependent loads + */ + +#if !defined(__ia64__) && !defined(__arm__) +# define ETHR_READ_DEPEND_MEMORY_BARRIER ETHR_MEMBAR(ETHR_LoadLoad) +#endif -#endif /* ETHR_GCC_MEMBAR_H__ */ +#endif /* ETHR_GCC_ATOMIC_MEMBAR_H__ */ diff --git a/erts/include/internal/gcc/ethread.h b/erts/include/internal/gcc/ethread.h index 365a3535cf..be3e1da90e 100644 --- a/erts/include/internal/gcc/ethread.h +++ b/erts/include/internal/gcc/ethread.h @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2010-2011. All Rights Reserved. + * Copyright Ericsson AB 2010-2015. All Rights Reserved. * * The contents of this file are subject to the Erlang Public License, * Version 1.1, (the "License"); you may not use this file except in @@ -18,20 +18,292 @@ */ /* - * Description: Native atomic ethread support when using gcc + * Description: Native atomic ethread support when using gcc's __atomic + * and __sync builtins * Author: Rickard Green */ -#ifndef ETHREAD_GCC_H__ -#define ETHREAD_GCC_H__ - -#if defined(ETHR_HAVE___SYNC_VAL_COMPARE_AND_SWAP32) \ - || defined(ETHR_HAVE___SYNC_VAL_COMPARE_AND_SWAP64) +#if !defined(ETHREAD_GCC_NATIVE_H__) && ETHR_GCC_COMPILER +#define ETHREAD_GCC_NATIVE_H__ #ifndef ETHR_MEMBAR # include "ethr_membar.h" #endif +#define ETHR_GCC_VERSIONS_MASK__ 28 + +#undef ETHR_GCC_VOLATILE_STORE_IS_ATOMIC_STORE__ +#undef ETHR_GCC_VOLATILE_STORE_IS_ATOMIC_STORE_RELB__ +#undef ETHR_GCC_VOLATILE_LOAD_IS_ATOMIC_LOAD__ +#undef ETHR_GCC_VOLATILE_LOAD_IS_ATOMIC_LOAD_ACQB__ +#undef ETHR_GCC_RELAXED_VERSIONS__ +#undef ETHR_GCC_RELAXED_MOD_VERSIONS__ +#undef ETHR_GCC_ACQB_VERSIONS__ +#undef ETHR_GCC_ACQB_MOD_VERSIONS__ +#undef ETHR_GCC_RELB_VERSIONS__ +#undef ETHR_GCC_RELB_MOD_VERSIONS__ +#undef ETHR_GCC_MB_MOD_VERSIONS__ + +/* + * True GNU GCCs before version 4.8 do not emit a memory barrier + * after the load in the __atomic_load_n(_, __ATOMIC_ACQUIRE) + * case (which is needed on most architectures). + */ +#undef ETHR___atomic_load_ACQUIRE_barrier_bug +#if ETHR_GCC_COMPILER != ETHR_GCC_COMPILER_TRUE +/* + * A gcc compatible compiler. We have no information + * about the existence of this bug, but we assume + * that it is not impossible that it could have + * been "inherited". Therefore, until we are certain + * that the bug does not exist, we assume that it + * does. + */ +# define ETHR___atomic_load_ACQUIRE_barrier_bug ETHR_GCC_VERSIONS_MASK__ +#elif !ETHR_AT_LEAST_GCC_VSN__(4, 8, 0) +/* True gcc of version < 4.8, i.e., bug exist... */ +# define ETHR___atomic_load_ACQUIRE_barrier_bug ETHR_GCC_VERSIONS_MASK__ +#else /* True gcc of version >= 4.8 */ +/* + * Sizes less than or equal to word size have been fixed, + * but double word size has not been fixed. + */ +# if ETHR_SIZEOF_PTR == 8 +# define ETHR___atomic_load_ACQUIRE_barrier_bug \ + (~(8|4) & ETHR_GCC_VERSIONS_MASK__) +# elif ETHR_SIZEOF_PTR == 4 +# define ETHR___atomic_load_ACQUIRE_barrier_bug \ + (~4 & ETHR_GCC_VERSIONS_MASK__) +# else +# error word size not supported +# endif +#endif + +#define ETHR_GCC_VOLATILE_STORE_IS_ATOMIC_STORE__ 0 +#define ETHR_GCC_VOLATILE_STORE_IS_ATOMIC_STORE_RELB__ 0 +#define ETHR_GCC_VOLATILE_LOAD_IS_ATOMIC_LOAD__ 0 +#define ETHR_GCC_VOLATILE_LOAD_IS_ATOMIC_LOAD_ACQB__ 0 +#define ETHR_GCC_RELAXED_VERSIONS__ ETHR_GCC_VERSIONS_MASK__ +#define ETHR_GCC_RELAXED_MOD_VERSIONS__ ETHR_GCC_VERSIONS_MASK__ + +#if ETHR_TRUST_GCC_ATOMIC_BUILTINS_MEMORY_BARRIERS +# define ETHR_GCC_ACQB_VERSIONS__ ETHR_GCC_VERSIONS_MASK__ +# define ETHR_GCC_ACQB_MOD_VERSIONS__ ETHR_GCC_VERSIONS_MASK__ +# define ETHR_GCC_RELB_VERSIONS__ ETHR_GCC_VERSIONS_MASK__ +# define ETHR_GCC_RELB_MOD_VERSIONS__ ETHR_GCC_VERSIONS_MASK__ +#else +/* + * This is currently the default (on most platforms) since + * we've seen too many memory barrier bugs produced by gcc... + */ +# define ETHR_GCC_ACQB_VERSIONS__ 0 +# define ETHR_GCC_ACQB_MOD_VERSIONS__ 0 +# define ETHR_GCC_RELB_VERSIONS__ 0 +# define ETHR_GCC_RELB_MOD_VERSIONS__ 0 +#endif +/* + * In the general case we do not want any full barrier versions + * if we can implement more relaxed ones (using __atomic_* builtins). + * This since the implementations normally need extra memory barrier + * instructions to implement these. The x86/x86_64 implementations + * are an exception see below. + */ +#define ETHR_GCC_MB_MOD_VERSIONS__ \ + (ETHR_GCC_VERSIONS_MASK__ & ~ETHR_HAVE___atomic_compare_exchange_n) + +#if ETHR_SIZEOF_PTR == 8 +# define ETHR_GCC_VOLATILE_BIT_MASK__ 12 +#elif ETHR_SIZEOF_PTR == 4 +# define ETHR_GCC_VOLATILE_BIT_MASK__ 4 +#endif + +#if defined(__i386__) || defined(__x86_64__) || defined(__sparc__) \ + || defined(__powerpc__) || defined(__ppc__) || defined(__mips__) \ + || defined(__alpha__) || defined(__ia64__) + +/* + * Aligned volatile stores and loads of data smaller + * than or equal to word size are atomic... + */ +# undef ETHR_GCC_VOLATILE_STORE_IS_ATOMIC_STORE__ +# define ETHR_GCC_VOLATILE_STORE_IS_ATOMIC_STORE__ ETHR_GCC_VOLATILE_BIT_MASK__ +# undef ETHR_GCC_VOLATILE_LOAD_IS_ATOMIC_LOAD__ +# define ETHR_GCC_VOLATILE_LOAD_IS_ATOMIC_LOAD__ ETHR_GCC_VOLATILE_BIT_MASK__ + +#elif defined(__arm__) + +/* volatile stores are problematic on some machines... */ +# undef ETHR_GCC_VOLATILE_LOAD_IS_ATOMIC_LOAD__ +# define ETHR_GCC_VOLATILE_LOAD_IS_ATOMIC_LOAD__ ETHR_GCC_VOLATILE_BIT_MASK__ + +#endif + +#if defined(__ia64__) + +/* Volatile stores produce stores with release barriers. */ +# undef ETHR_GCC_VOLATILE_STORE_IS_ATOMIC_STORE_RELB__ +# define ETHR_GCC_VOLATILE_STORE_IS_ATOMIC_STORE_RELB__ ETHR_GCC_VOLATILE_BIT_MASK__ + +/* Volatile loads produce loads with acquire barrier. */ +# undef ETHR_GCC_VOLATILE_LOAD_IS_ATOMIC_LOAD_ACQB__ +# define ETHR_GCC_VOLATILE_LOAD_IS_ATOMIC_LOAD_ACQB__ ETHR_GCC_VOLATILE_BIT_MASK__ + +/* + * We trust gcc to produce acquire/release barriers on itanium. + * Since all atomic ops also have at least acquire or release + * barriers (also when passed the relaxed memory model) it + * would be very inefficient not to use these as native + * barriers on Itanium. + */ +# undef ETHR_GCC_ACQB_VERSIONS__ +# define ETHR_GCC_ACQB_VERSIONS__ ETHR_GCC_VERSIONS_MASK__ +# undef ETHR_GCC_ACQB_MOD_VERSIONS__ +# define ETHR_GCC_ACQB_MOD_VERSIONS__ ETHR_GCC_VERSIONS_MASK__ +# undef ETHR_GCC_RELB_VERSIONS__ +# define ETHR_GCC_RELB_VERSIONS__ ETHR_GCC_VERSIONS_MASK__ +# undef ETHR_GCC_RELB_MOD_VERSIONS__ +# define ETHR_GCC_RELB_MOD_VERSIONS__ ETHR_GCC_VERSIONS_MASK__ + +/* + * Itanium is not effected by the load acquire + * bug since the barrier is part of the instruction + * on Itanium (ld.acq), and not a separate instruction + * as on most platforms. + */ +# undef ETHR___atomic_load_ACQUIRE_barrier_bug +# define ETHR___atomic_load_ACQUIRE_barrier_bug 0 + +/* + * No point exposing relaxed versions since they are + * implemended using either acquire or release + * barriers. + */ +# undef ETHR_GCC_RELAXED_VERSIONS__ +# define ETHR_GCC_RELAXED_VERSIONS__ 0 + +/* #endif defined(__ia64__) */ +#elif defined(__i386__) || defined(__x86_64__) + +/* + * Want full barrier versions of all modification + * operations since all of these are implemented + * using locked instructions implying full memory + * barriers. + */ +# undef ETHR_GCC_MB_MOD_VERSIONS__ +# define ETHR_GCC_MB_MOD_VERSIONS__ ETHR_HAVE___sync_val_compare_and_swap + +/* + * No point exposing acquire/release versions + * when we got full memory barrier versions + * of modification operations since all of these + * are implemented using locked instructions + * implying full memory barriers. + */ +# if ETHR_GCC_ACQB_MOD_VERSIONS__ +# undef ETHR_GCC_ACQB_MOD_VERSIONS__ +# define ETHR_GCC_ACQB_MOD_VERSIONS__ \ + (ETHR_GCC_VERSIONS_MASK__ & ~ETHR_HAVE___sync_val_compare_and_swap) +# endif +# if ETHR_GCC_RELB_MOD_VERSIONS__ +# undef ETHR_GCC_RELB_MOD_VERSIONS__ +# define ETHR_GCC_RELB_MOD_VERSIONS__ \ + (ETHR_GCC_VERSIONS_MASK__ & ~ETHR_HAVE___sync_val_compare_and_swap) +# endif + +# ifdef ETHR_X86_OUT_OF_ORDER + +/* See above... */ +# undef ETHR_GCC_RELAXED_MOD_VERSIONS__ +# define ETHR_GCC_RELAXED_MOD_VERSIONS__ 0 + +# else /* !ETHR_X86_OUT_OF_ORDER, i.e., we don't use any x86-OOO instructions... */ + +/* + * Not effected by the load acquire barrier bug, + * since no barrier at all is needed for a load + * acquire... + */ +# undef ETHR___atomic_load_ACQUIRE_barrier_bug +# define ETHR___atomic_load_ACQUIRE_barrier_bug 0 + +/* Stores imply release barriers semantics. */ +# undef ETHR_GCC_VOLATILE_STORE_IS_ATOMIC_STORE_RELB__ +# define ETHR_GCC_VOLATILE_STORE_IS_ATOMIC_STORE_RELB__ ETHR_GCC_VOLATILE_BIT_MASK__ + +/* Loads imply acquire barrier semantics. */ +# undef ETHR_GCC_VOLATILE_LOAD_IS_ATOMIC_LOAD_ACQB__ +# define ETHR_GCC_VOLATILE_LOAD_IS_ATOMIC_LOAD_ACQB__ ETHR_GCC_VOLATILE_BIT_MASK__ + +/* + * Trust load acquire and store release for sizes + * where volatile operation implies these barrier + * semantics since no barriers are needed. + */ +# if !ETHR_GCC_ACQB_VERSIONS__ +# undef ETHR_GCC_ACQB_VERSIONS__ +# define ETHR_GCC_ACQB_VERSIONS__ ETHR_GCC_VOLATILE_BIT_MASK__ +# endif +# if !ETHR_GCC_RELB_VERSIONS__ +# undef ETHR_GCC_RELB_VERSIONS__ +# define ETHR_GCC_RELB_VERSIONS__ ETHR_GCC_VOLATILE_BIT_MASK__ +# endif + +/* + * No point exposing relaxed versions at all since + * all mod operations are implemented with locked + * instructions implying full memory barriers and + * volatile store and load imply release and + * acquire barrier semantics. + */ +# undef ETHR_GCC_RELAXED_VERSIONS__ +# define ETHR_GCC_RELAXED_VERSIONS__ 0 + +# endif /* !ETHR_X86_OUT_OF_ORDER */ + +/* #endif defined(__i386__) || defined(__x86_64__) */ +#elif defined(__powerpc__) || defined(__ppc__) + +# if !defined(ETHR_PPC_HAVE_LWSYNC) +/* + * Release barriers are typically implemented using + * the lwsync instruction. We want our runtime + * configure test to determine if the lwsync + * instruction is available on the system or not + * before we use it. Therefore, do not implement any + * native ops using the __ATOMIC_RELEASE model. + */ +# undef ETHR_GCC_RELB_VERSIONS__ +# define ETHR_GCC_RELB_VERSIONS__ 0 +# if defined(ETHR_GCC_IMPLEMENT_ACQB_USING_LWSYNC) +/* + * Acquire barriers are usually implemented by other means + * than lwsync, but can be implemented using lwsync. Define + * ETHR_GCC_IMPLEMENT_ACQB_USING_LWSYNC if acquire barriers + * are implemented using lwsync. + */ +# undef ETHR_GCC_ACQB_VERSIONS__ +# define ETHR_GCC_ACQB_VERSIONS__ 0 +# endif +# endif + +#endif /* defined(__powerpc__) || defined(__ppc__) */ + +#if !ETHR_GCC_RELAXED_VERSIONS__ +# undef ETHR_GCC_RELAXED_MOD_VERSIONS__ +# define ETHR_GCC_RELAXED_MOD_VERSIONS__ 0 +#endif + +#if !ETHR_GCC_ACQB_VERSIONS__ +# undef ETHR_GCC_ACQB_MOD_VERSIONS__ +# define ETHR_GCC_ACQB_MOD_VERSIONS__ 0 +#endif + +#if !ETHR_GCC_RELB_VERSIONS__ +# undef ETHR_GCC_RELB_MOD_VERSIONS__ +# define ETHR_GCC_RELB_MOD_VERSIONS__ 0 +#endif + #if !defined(ETHR_HAVE_NATIVE_ATOMIC32) # define ETHR_ATOMIC_WANT_32BIT_IMPL__ # include "ethr_atomic.h" @@ -42,12 +314,51 @@ # include "ethr_atomic.h" #endif +#if defined(__x86_64__) +/* + * No instructions available for native implementation + * of these for dw-atomics... + */ +# undef ETHR_GCC_RELAXED_VERSIONS__ +# define ETHR_GCC_RELAXED_VERSIONS__ 0 +# undef ETHR_GCC_ACQB_VERSIONS__ +# define ETHR_GCC_ACQB_VERSIONS__ 0 +# undef ETHR_GCC_RELB_VERSIONS__ +# define ETHR_GCC_RELB_VERSIONS__ 0 +#endif + +#if !ETHR_GCC_RELAXED_VERSIONS__ +# undef ETHR_GCC_RELAXED_MOD_VERSIONS__ +# define ETHR_GCC_RELAXED_MOD_VERSIONS__ 0 +#endif + +#if !ETHR_GCC_ACQB_VERSIONS__ +# undef ETHR_GCC_ACQB_MOD_VERSIONS__ +# define ETHR_GCC_ACQB_MOD_VERSIONS__ 0 +#endif + +#if !ETHR_GCC_RELB_VERSIONS__ +# undef ETHR_GCC_RELB_MOD_VERSIONS__ +# define ETHR_GCC_RELB_MOD_VERSIONS__ 0 +#endif + #if (!defined(ETHR_HAVE_NATIVE_DW_ATOMIC) \ && !(ETHR_SIZEOF_PTR == 4 && defined(ETHR_HAVE_NATIVE_ATOMIC64)) \ && !(ETHR_SIZEOF_PTR == 8 && defined(ETHR_HAVE_NATIVE_ATOMIC128))) # include "ethr_dw_atomic.h" #endif -#endif +#undef ETHR___atomic_load_ACQUIRE_barrier_bug +#undef ETHR_GCC_VOLATILE_STORE_IS_ATOMIC_STORE__ +#undef ETHR_GCC_VOLATILE_STORE_IS_ATOMIC_STORE_RELB__ +#undef ETHR_GCC_VOLATILE_LOAD_IS_ATOMIC_LOAD__ +#undef ETHR_GCC_VOLATILE_LOAD_IS_ATOMIC_LOAD_ACQB__ +#undef ETHR_GCC_RELAXED_VERSIONS__ +#undef ETHR_GCC_RELB_VERSIONS__ +#undef ETHR_GCC_RELB_VERSIONS__ +#undef ETHR_GCC_RELAXED_MOD_VERSIONS__ +#undef ETHR_GCC_ACQB_MOD_VERSIONS__ +#undef ETHR_GCC_RELB_MOD_VERSIONS__ +#undef ETHR_GCC_MB_MOD_VERSIONS__ -#endif +#endif /* ETHREAD_GCC_NATIVE_H__ */ |