aboutsummaryrefslogtreecommitdiffstats
path: root/erts/include/internal/ppc32
diff options
context:
space:
mode:
authorRickard Green <[email protected]>2011-01-02 10:03:54 +0100
committerRickard Green <[email protected]>2011-06-14 11:40:19 +0200
commit7f19af0423934f85c74ccb75546e5e3a6b6d10e8 (patch)
tree612d1010f37517f813a94d8a5f38cfd0126ce3f8 /erts/include/internal/ppc32
parent4a5a75811e2cd590b5c94f71864a5245fd511ccf (diff)
downloadotp-7f19af0423934f85c74ccb75546e5e3a6b6d10e8.tar.gz
otp-7f19af0423934f85c74ccb75546e5e3a6b6d10e8.tar.bz2
otp-7f19af0423934f85c74ccb75546e5e3a6b6d10e8.zip
Improve ethread atomics
The ethread atomics API now also provide double word size atomics. Double word size atomics are implemented using native atomic instructions on x86 (when the cmpxchg8b instruction is available) and on x86_64 (when the cmpxchg16b instruction is available). On other hardware where 32-bit atomics or word size atomics are available, an optimized fallback is used; otherwise, a spinlock, or a mutex based fallback is used. The ethread library now performs runtime tests for presence of hardware features, such as for example SSE2 instructions, instead of requiring this to be determined at compile time. There are now functions implementing each atomic operation with the following implied memory barrier semantics: none, read, write, acquire, release, and full. Some of the operation-barrier combinations aren't especially useful. But instead of filtering useful ones out, and potentially miss a useful one, we implement them all. A much smaller set of functionality for native atomics are required to be implemented than before. More or less only cmpxchg and a membar macro are required to be implemented for each atomic size. Other functions will automatically be constructed from these. It is, of course, often wise to implement more that this if possible from a performance perspective.
Diffstat (limited to 'erts/include/internal/ppc32')
-rw-r--r--erts/include/internal/ppc32/atomic.h148
-rw-r--r--erts/include/internal/ppc32/ethr_membar.h63
-rw-r--r--erts/include/internal/ppc32/ethread.h5
-rw-r--r--erts/include/internal/ppc32/rwlock.h15
-rw-r--r--erts/include/internal/ppc32/spinlock.h10
5 files changed, 181 insertions, 60 deletions
diff --git a/erts/include/internal/ppc32/atomic.h b/erts/include/internal/ppc32/atomic.h
index 522f433649..6001620677 100644
--- a/erts/include/internal/ppc32/atomic.h
+++ b/erts/include/internal/ppc32/atomic.h
@@ -1,7 +1,7 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2005-2010. All Rights Reserved.
+ * Copyright Ericsson AB 2005-2011. All Rights Reserved.
*
* The contents of this file are subject to the Erlang Public License,
* Version 1.1, (the "License"); you may not use this file except in
@@ -29,27 +29,31 @@
#define ETHREAD_PPC_ATOMIC_H
#define ETHR_HAVE_NATIVE_ATOMIC32 1
+#define ETHR_NATIVE_ATOMIC32_IMPL "ethread"
typedef struct {
volatile ethr_sint32_t counter;
} ethr_native_atomic32_t;
-#define ETHR_MEMORY_BARRIER __asm__ __volatile__("sync" : : : "memory")
-
#if defined(ETHR_TRY_INLINE_FUNCS) || defined(ETHR_ATOMIC_IMPL__)
+#define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_ADDR 1
+
static ETHR_INLINE ethr_sint32_t *
ethr_native_atomic32_addr(ethr_native_atomic32_t *var)
{
return (ethr_sint32_t *) &var->counter;
}
+#define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_SET 1
+
static ETHR_INLINE void
-ethr_native_atomic32_init(ethr_native_atomic32_t *var, ethr_sint32_t i)
+ethr_native_atomic32_set(ethr_native_atomic32_t *var, ethr_sint32_t i)
{
var->counter = i;
}
-#define ethr_native_atomic32_set(v, i) ethr_native_atomic32_init((v), (i))
+
+#define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_READ 1
static ETHR_INLINE ethr_sint32_t
ethr_native_atomic32_read(ethr_native_atomic32_t *var)
@@ -57,57 +61,68 @@ ethr_native_atomic32_read(ethr_native_atomic32_t *var)
return var->counter;
}
+#define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_ADD_RETURN 1
+
static ETHR_INLINE ethr_sint32_t
ethr_native_atomic32_add_return(ethr_native_atomic32_t *var, ethr_sint32_t incr)
{
ethr_sint32_t tmp;
__asm__ __volatile__(
- "eieio\n\t"
"1:\t"
"lwarx %0,0,%1\n\t"
"add %0,%2,%0\n\t"
"stwcx. %0,0,%1\n\t"
"bne- 1b\n\t"
- "isync"
: "=&r"(tmp)
: "r"(&var->counter), "r"(incr)
: "cc", "memory");
return tmp;
}
-static ETHR_INLINE void
-ethr_native_atomic32_add(ethr_native_atomic32_t *var, ethr_sint32_t incr)
+#define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_ADD_RETURN_ACQB 1
+
+static ETHR_INLINE ethr_sint32_t
+ethr_native_atomic32_add_return_acqb(ethr_native_atomic32_t *var, ethr_sint32_t incr)
{
- /* XXX: could use weaker version here w/o eieio+isync */
- (void)ethr_native_atomic32_add_return(var, incr);
+ ethr_sint32_t res;
+ res = ethr_native_atomic32_add_return(var, incr);
+ __asm__ __volatile("isync\n\t" : : : "memory");
+ return res;
}
+#define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_INC_RETURN 1
+
static ETHR_INLINE ethr_sint32_t
ethr_native_atomic32_inc_return(ethr_native_atomic32_t *var)
{
ethr_sint32_t tmp;
__asm__ __volatile__(
- "eieio\n\t"
"1:\t"
"lwarx %0,0,%1\n\t"
"addic %0,%0,1\n\t" /* due to addi's (rA|0) behaviour */
"stwcx. %0,0,%1\n\t"
"bne- 1b\n\t"
- "isync"
: "=&r"(tmp)
: "r"(&var->counter)
: "cc", "memory");
return tmp;
}
-static ETHR_INLINE void
-ethr_native_atomic32_inc(ethr_native_atomic32_t *var)
+#define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_INC_RETURN_ACQB 1
+
+static ETHR_INLINE ethr_sint32_t
+ethr_native_atomic32_inc_return_acqb(ethr_native_atomic32_t *var)
{
- /* XXX: could use weaker version here w/o eieio+isync */
- (void)ethr_native_atomic32_inc_return(var);
+ ethr_sint32_t res;
+ res = ethr_native_atomic32_inc_return(var);
+ __asm__ __volatile("isync\n\t" : : : "memory");
+ return res;
}
+
+
+#define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_DEC_RETURN 1
static ETHR_INLINE ethr_sint32_t
ethr_native_atomic32_dec_return(ethr_native_atomic32_t *var)
@@ -115,82 +130,120 @@ ethr_native_atomic32_dec_return(ethr_native_atomic32_t *var)
ethr_sint32_t tmp;
__asm__ __volatile__(
- "eieio\n\t"
"1:\t"
"lwarx %0,0,%1\n\t"
"addic %0,%0,-1\n\t"
"stwcx. %0,0,%1\n\t"
"bne- 1b\n\t"
- "isync"
: "=&r"(tmp)
: "r"(&var->counter)
: "cc", "memory");
return tmp;
}
-static ETHR_INLINE void
-ethr_native_atomic32_dec(ethr_native_atomic32_t *var)
+#define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_DEC_RETURN_ACQB 1
+
+static ETHR_INLINE ethr_sint32_t
+ethr_native_atomic32_dec_return_acqb(ethr_native_atomic32_t *var)
{
- /* XXX: could use weaker version here w/o eieio+isync */
- (void)ethr_native_atomic32_dec_return(var);
+ ethr_sint32_t res;
+ res = ethr_native_atomic32_dec_return(var);
+ __asm__ __volatile("isync\n\t" : : : "memory");
+ return res;
}
+#define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_AND_RETOLD 1
+
static ETHR_INLINE ethr_sint32_t
ethr_native_atomic32_and_retold(ethr_native_atomic32_t *var, ethr_sint32_t mask)
{
ethr_sint32_t old, new;
__asm__ __volatile__(
- "eieio\n\t"
"1:\t"
"lwarx %0,0,%2\n\t"
"and %1,%0,%3\n\t"
"stwcx. %1,0,%2\n\t"
"bne- 1b\n\t"
- "isync"
: "=&r"(old), "=&r"(new)
: "r"(&var->counter), "r"(mask)
: "cc", "memory");
return old;
}
+#define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_AND_RETOLD_ACQB 1
+
+static ETHR_INLINE ethr_sint32_t
+ethr_native_atomic32_and_retold_acqb(ethr_native_atomic32_t *var, ethr_sint32_t mask)
+{
+ ethr_sint32_t res;
+ res = ethr_native_atomic32_and_retold(var, mask);
+ __asm__ __volatile("isync\n\t" : : : "memory");
+ return res;
+}
+
+#define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_OR_RETOLD 1
+
static ETHR_INLINE ethr_sint32_t
ethr_native_atomic32_or_retold(ethr_native_atomic32_t *var, ethr_sint32_t mask)
{
ethr_sint32_t old, new;
__asm__ __volatile__(
- "eieio\n\t"
"1:\t"
"lwarx %0,0,%2\n\t"
"or %1,%0,%3\n\t"
"stwcx. %1,0,%2\n\t"
"bne- 1b\n\t"
- "isync"
: "=&r"(old), "=&r"(new)
: "r"(&var->counter), "r"(mask)
: "cc", "memory");
return old;
}
+#define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_OR_RETOLD_ACQB 1
+
+static ETHR_INLINE ethr_sint32_t
+ethr_native_atomic32_or_retold_acqb(ethr_native_atomic32_t *var, ethr_sint32_t mask)
+{
+ ethr_sint32_t res;
+ res = ethr_native_atomic32_or_retold(var, mask);
+ __asm__ __volatile("isync\n\t" : : : "memory");
+ return res;
+}
+
+
+#define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_XCHG 1
+
static ETHR_INLINE ethr_sint32_t
ethr_native_atomic32_xchg(ethr_native_atomic32_t *var, ethr_sint32_t val)
{
ethr_sint32_t tmp;
__asm__ __volatile__(
- "eieio\n\t"
"1:\t"
"lwarx %0,0,%1\n\t"
"stwcx. %2,0,%1\n\t"
"bne- 1b\n\t"
- "isync"
: "=&r"(tmp)
: "r"(&var->counter), "r"(val)
: "cc", "memory");
return tmp;
}
+#define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_XCHG_ACQB 1
+
+static ETHR_INLINE ethr_sint32_t
+ethr_native_atomic32_xchg_acqb(ethr_native_atomic32_t *var, ethr_sint32_t val)
+{
+ ethr_sint32_t res;
+ res = ethr_native_atomic32_xchg(var, val);
+ __asm__ __volatile("isync\n\t" : : : "memory");
+ return res;
+}
+
+#define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_CMPXCHG 1
+
static ETHR_INLINE ethr_sint32_t
ethr_native_atomic32_cmpxchg(ethr_native_atomic32_t *var,
ethr_sint32_t new,
@@ -199,14 +252,12 @@ ethr_native_atomic32_cmpxchg(ethr_native_atomic32_t *var,
ethr_sint32_t old;
__asm__ __volatile__(
- "eieio\n\t"
"1:\t"
"lwarx %0,0,%2\n\t"
"cmpw 0,%0,%3\n\t"
"bne 2f\n\t"
"stwcx. %1,0,%2\n\t"
"bne- 1b\n\t"
- "isync\n"
"2:"
: "=&r"(old)
: "r"(new), "r"(&var->counter), "r"(expected)
@@ -215,25 +266,30 @@ ethr_native_atomic32_cmpxchg(ethr_native_atomic32_t *var,
return old;
}
-/*
- * Atomic ops with at least specified barriers.
- */
+#define ETHR_HAVE_ETHR_NATIVE_ATOMIC32_CMPXCHG_ACQB 1
-static ETHR_INLINE long
-ethr_native_atomic32_read_acqb(ethr_native_atomic32_t *var)
+static ETHR_INLINE ethr_sint32_t
+ethr_native_atomic32_cmpxchg_acqb(ethr_native_atomic32_t *var,
+ ethr_sint32_t new,
+ ethr_sint32_t expected)
{
- long res = ethr_native_atomic32_read(var);
- ETHR_MEMORY_BARRIER;
- return res;
-}
+ ethr_sint32_t old;
-#define ethr_native_atomic32_set_relb ethr_native_atomic32_xchg
-#define ethr_native_atomic32_inc_return_acqb ethr_native_atomic32_inc_return
-#define ethr_native_atomic32_dec_relb ethr_native_atomic32_dec_return
-#define ethr_native_atomic32_dec_return_relb ethr_native_atomic32_dec_return
+ __asm__ __volatile__(
+ "1:\t"
+ "lwarx %0,0,%2\n\t"
+ "cmpw 0,%0,%3\n\t"
+ "bne 2f\n\t"
+ "stwcx. %1,0,%2\n\t"
+ "bne- 1b\n\t"
+ "isync\n"
+ "2:"
+ : "=&r"(old)
+ : "r"(new), "r"(&var->counter), "r"(expected)
+ : "cc", "memory");
-#define ethr_native_atomic32_cmpxchg_acqb ethr_native_atomic32_cmpxchg
-#define ethr_native_atomic32_cmpxchg_relb ethr_native_atomic32_cmpxchg
+ return old;
+}
#endif /* ETHR_TRY_INLINE_FUNCS */
diff --git a/erts/include/internal/ppc32/ethr_membar.h b/erts/include/internal/ppc32/ethr_membar.h
new file mode 100644
index 0000000000..ff5cc86bfb
--- /dev/null
+++ b/erts/include/internal/ppc32/ethr_membar.h
@@ -0,0 +1,63 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2011. All Rights Reserved.
+ *
+ * The contents of this file are subject to the Erlang Public License,
+ * Version 1.1, (the "License"); you may not use this file except in
+ * compliance with the License. You should have received a copy of the
+ * Erlang Public License along with this software. If not, it can be
+ * retrieved online at http://www.erlang.org/.
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/*
+ * Description: Memory barriers for PowerPC
+ * Author: Rickard Green
+ */
+
+#ifndef ETHR_PPC_MEMBAR_H__
+#define ETHR_PPC_MEMBAR_H__
+
+#define ETHR_LoadLoad (1 << 0)
+#define ETHR_LoadStore (1 << 1)
+#define ETHR_StoreLoad (1 << 2)
+#define ETHR_StoreStore (1 << 3)
+
+static __inline__ void
+ethr_lwsync__(void)
+{
+#ifdef ETHR_PPC_HAVE_NO_LWSYNC
+ __asm__ __volatile__ ("sync\n\t" : : : "memory");
+#else
+#ifndef ETHR_PPC_HAVE_LWSYNC
+ if (ETHR_PPC_RUNTIME_CONF_HAVE_NO_LWSYNC__)
+ __asm__ __volatile__ ("sync\n\t" : : : "memory");
+ else
+#endif
+ __asm__ __volatile__ ("lwsync\n\t" : : : "memory");
+#endif
+}
+
+static __inline__ void
+ethr_sync__(void)
+{
+ __asm__ __volatile__ ("sync\n\t" : : : "memory");
+}
+
+/*
+ * According to the "memory barrier intstructions" section of
+ * http://www.ibm.com/developerworks/systems/articles/powerpc.html
+ * we want to use sync when a StoreLoad is needed and lwsync for
+ * everything else.
+ */
+#define ETHR_MEMBAR(B) \
+ ETHR_CHOOSE_EXPR((B) & ETHR_StoreLoad, ethr_sync__(), ethr_lwsync__())
+
+#endif
diff --git a/erts/include/internal/ppc32/ethread.h b/erts/include/internal/ppc32/ethread.h
index 3b619e9d01..e41c83c5da 100644
--- a/erts/include/internal/ppc32/ethread.h
+++ b/erts/include/internal/ppc32/ethread.h
@@ -24,12 +24,9 @@
#ifndef ETHREAD_PPC32_ETHREAD_H
#define ETHREAD_PPC32_ETHREAD_H
+#include "ethr_membar.h"
#include "atomic.h"
#include "spinlock.h"
#include "rwlock.h"
-#define ETHR_HAVE_NATIVE_ATOMICS 1
-#define ETHR_HAVE_NATIVE_SPINLOCKS 1
-#define ETHR_HAVE_NATIVE_RWSPINLOCKS 1
-
#endif /* ETHREAD_PPC32_ETHREAD_H */
diff --git a/erts/include/internal/ppc32/rwlock.h b/erts/include/internal/ppc32/rwlock.h
index 19ec26ab68..311f000b69 100644
--- a/erts/include/internal/ppc32/rwlock.h
+++ b/erts/include/internal/ppc32/rwlock.h
@@ -1,7 +1,7 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2005-2010. All Rights Reserved.
+ * Copyright Ericsson AB 2005-2011. All Rights Reserved.
*
* The contents of this file are subject to the Erlang Public License,
* Version 1.1, (the "License"); you may not use this file except in
@@ -23,12 +23,14 @@
*
* Based on the examples in Appendix E of Motorola's
* "Programming Environments Manual For 32-Bit Implementations
- * of the PowerPC Architecture". Uses eieio instead of sync
- * in the unlock sequence, as suggested in the manual.
+ * of the PowerPC Architecture".
*/
#ifndef ETHREAD_PPC_RWLOCK_H
#define ETHREAD_PPC_RWLOCK_H
+#define ETHR_HAVE_NATIVE_RWSPINLOCKS 1
+#define ETHR_NATIVE_RWSPINLOCK_IMPL "ethread"
+
/* Unlocked if zero, read-locked if negative, write-locked if +1. */
typedef struct {
volatile int lock;
@@ -47,9 +49,10 @@ ethr_native_read_unlock(ethr_native_rwlock_t *lock)
{
int tmp;
- /* this is eieio + ethr_native_atomic_inc() - isync */
+ ETHR_MEMBAR(ETHR_LoadStore|ETHR_StoreStore);
+
+ /* this is ethr_native_atomic_inc() - isync */
__asm__ __volatile__(
- "eieio\n\t"
"1:\t"
"lwarx %0,0,%1\n\t"
"addic %0,%0,1\n\t"
@@ -105,7 +108,7 @@ ethr_native_read_lock(ethr_native_rwlock_t *lock)
static ETHR_INLINE void
ethr_native_write_unlock(ethr_native_rwlock_t *lock)
{
- __asm__ __volatile__("eieio" : : : "memory");
+ ETHR_MEMBAR(ETHR_LoadStore|ETHR_StoreStore);
lock->lock = 0;
}
diff --git a/erts/include/internal/ppc32/spinlock.h b/erts/include/internal/ppc32/spinlock.h
index c8460a3e8a..4c95ec9efb 100644
--- a/erts/include/internal/ppc32/spinlock.h
+++ b/erts/include/internal/ppc32/spinlock.h
@@ -1,7 +1,7 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2005-2010. All Rights Reserved.
+ * Copyright Ericsson AB 2005-2011. All Rights Reserved.
*
* The contents of this file are subject to the Erlang Public License,
* Version 1.1, (the "License"); you may not use this file except in
@@ -23,12 +23,14 @@
*
* Based on the examples in Appendix E of Motorola's
* "Programming Environments Manual For 32-Bit Implementations
- * of the PowerPC Architecture". Uses eieio instead of sync
- * in the unlock sequence, as suggested in the manual.
+ * of the PowerPC Architecture".
*/
#ifndef ETHREAD_PPC_SPINLOCK_H
#define ETHREAD_PPC_SPINLOCK_H
+#define ETHR_HAVE_NATIVE_SPINLOCKS 1
+#define ETHR_NATIVE_SPINLOCK_IMPL "ethread"
+
/* Unlocked if zero, locked if non-zero. */
typedef struct {
volatile unsigned int lock;
@@ -45,7 +47,7 @@ ethr_native_spinlock_init(ethr_native_spinlock_t *lock)
static ETHR_INLINE void
ethr_native_spin_unlock(ethr_native_spinlock_t *lock)
{
- __asm__ __volatile__("eieio" : : : "memory");
+ ETHR_MEMBAR(ETHR_LoadStore|ETHR_StoreStore);
lock->lock = 0;
}