aboutsummaryrefslogtreecommitdiffstats
path: root/erts/include/internal/win/ethr_membar.h
diff options
context:
space:
mode:
authorRickard Green <[email protected]>2011-01-02 10:03:54 +0100
committerRickard Green <[email protected]>2011-06-14 11:40:19 +0200
commit7f19af0423934f85c74ccb75546e5e3a6b6d10e8 (patch)
tree612d1010f37517f813a94d8a5f38cfd0126ce3f8 /erts/include/internal/win/ethr_membar.h
parent4a5a75811e2cd590b5c94f71864a5245fd511ccf (diff)
downloadotp-7f19af0423934f85c74ccb75546e5e3a6b6d10e8.tar.gz
otp-7f19af0423934f85c74ccb75546e5e3a6b6d10e8.tar.bz2
otp-7f19af0423934f85c74ccb75546e5e3a6b6d10e8.zip
Improve ethread atomics
The ethread atomics API now also provide double word size atomics. Double word size atomics are implemented using native atomic instructions on x86 (when the cmpxchg8b instruction is available) and on x86_64 (when the cmpxchg16b instruction is available). On other hardware where 32-bit atomics or word size atomics are available, an optimized fallback is used; otherwise, a spinlock, or a mutex based fallback is used. The ethread library now performs runtime tests for presence of hardware features, such as for example SSE2 instructions, instead of requiring this to be determined at compile time. There are now functions implementing each atomic operation with the following implied memory barrier semantics: none, read, write, acquire, release, and full. Some of the operation-barrier combinations aren't especially useful. But instead of filtering useful ones out, and potentially miss a useful one, we implement them all. A much smaller set of functionality for native atomics are required to be implemented than before. More or less only cmpxchg and a membar macro are required to be implemented for each atomic size. Other functions will automatically be constructed from these. It is, of course, often wise to implement more that this if possible from a performance perspective.
Diffstat (limited to 'erts/include/internal/win/ethr_membar.h')
-rw-r--r--erts/include/internal/win/ethr_membar.h145
1 files changed, 145 insertions, 0 deletions
diff --git a/erts/include/internal/win/ethr_membar.h b/erts/include/internal/win/ethr_membar.h
new file mode 100644
index 0000000000..8237660b2c
--- /dev/null
+++ b/erts/include/internal/win/ethr_membar.h
@@ -0,0 +1,145 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2011. All Rights Reserved.
+ *
+ * The contents of this file are subject to the Erlang Public License,
+ * Version 1.1, (the "License"); you may not use this file except in
+ * compliance with the License. You should have received a copy of the
+ * Erlang Public License along with this software. If not, it can be
+ * retrieved online at http://www.erlang.org/.
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/*
+ * Description: Memory barriers for Windows
+ * Author: Rickard Green
+ */
+
+#if (!defined(ETHR_WIN_MEMBAR_H__) \
+ && (defined(_MSC_VER) && _MSC_VER >= 1400) \
+ && (defined(_M_AMD64) \
+ || defined(_M_IA64) \
+ || defined(ETHR_HAVE__INTERLOCKEDCOMPAREEXCHANGE)))
+#define ETHR_WIN_MEMBAR_H__
+
+#define ETHR_LoadLoad (1 << 0)
+#define ETHR_LoadStore (1 << 1)
+#define ETHR_StoreLoad (1 << 2)
+#define ETHR_StoreStore (1 << 3)
+
+#include <intrin.h>
+#undef ETHR_COMPILER_BARRIER
+#define ETHR_COMPILER_BARRIER _ReadWriteBarrier()
+#pragma intrinsic(_ReadWriteBarrier)
+
+#pragma intrinsic(_InterlockedCompareExchange)
+
+#define ETHR_MB_USING_INTERLOCKED__ \
+do { \
+ volatile long x___ = 0; \
+ (void) _InterlockedCompareExchange(&x___, 2, 1); \
+} while (0)
+
+#if defined(_M_IA64)
+
+#define ETHR_MEMBAR(B) __mf()
+
+#elif defined(_M_AMD64) || defined(_M_IX86)
+
+#include <emmintrin.h>
+#include <mmintrin.h>
+
+#if ETHR_SIZEOF_PTR == 4
+# define ETHR_NO_SSE2_MB__ ETHR_MB_USING_INTERLOCKED__
+#endif
+#pragma intrinsic(_mm_mfence)
+#pragma intrinsic(_mm_sfence)
+#pragma intrinsic(_mm_lfence)
+
+static __forceinline void
+ethr_cfence__(void)
+{
+ _ReadWriteBarrier();
+}
+
+static __forceinline void
+ethr_mfence__(void)
+{
+#if ETHR_SIZEOF_PTR == 4
+ if (ETHR_X86_RUNTIME_CONF_HAVE_NO_SSE2__)
+ ETHR_NO_SSE2_MB__;
+ else
+#endif
+ _mm_mfence();
+}
+
+static __forceinline void
+ethr_sfence__(void)
+{
+#if ETHR_SIZEOF_PTR == 4
+ if (ETHR_X86_RUNTIME_CONF_HAVE_NO_SSE2__)
+ ETHR_NO_SSE2_MB__;
+ else
+#endif
+ _mm_sfence();
+}
+
+static __forceinline void
+ethr_lfence__(void)
+{
+#if ETHR_SIZEOF_PTR == 4
+ if (ETHR_X86_RUNTIME_CONF_HAVE_NO_SSE2__)
+ ETHR_NO_SSE2_MB__;
+ else
+#endif
+ _mm_lfence();
+}
+
+#define ETHR_X86_OUT_OF_ORDER_MEMBAR(B) \
+ ETHR_CHOOSE_EXPR((B) == ETHR_StoreStore, \
+ ethr_sfence__(), \
+ ETHR_CHOOSE_EXPR((B) == ETHR_LoadLoad, \
+ ethr_lfence__(), \
+ ethr_mfence__()))
+
+#ifdef ETHR_X86_OUT_OF_ORDER
+
+#define ETHR_MEMBAR(B) \
+ ETHR_X86_OUT_OF_ORDER_MEMBAR((B))
+
+#else /* !ETHR_X86_OUT_OF_ORDER (the default) */
+
+/*
+ * We assume that only stores before loads may be reordered. That is,
+ * we assume that *no* instructions like these are used:
+ * - CLFLUSH,
+ * - streaming stores executed with non-temporal move,
+ * - string operations, or
+ * - other instructions which aren't LoadLoad, LoadStore, and StoreStore
+ * ordered by themselves
+ * If such instructions are used, either insert memory barriers
+ * using ETHR_X86_OUT_OF_ORDER_MEMBAR() at appropriate places, or
+ * define ETHR_X86_OUT_OF_ORDER. For more info see Intel 64 and IA-32
+ * Architectures Software Developer's Manual; Vol 3A; Chapter 8.2.2.
+ */
+
+#define ETHR_MEMBAR(B) \
+ ETHR_CHOOSE_EXPR((B) & ETHR_StoreLoad, ethr_mfence__(), ethr_cfence__())
+
+#endif
+
+#else /* No knowledge about platform; use interlocked fallback */
+
+#define ETHR_MEMBAR(B) ETHR_MB_USING_INTERLOCKED__
+#define ETHR_READ_DEPEND_MEMORY_BARRIER ETHR_MB_USING_INTERLOCKED__
+
+#endif
+
+#endif /* ETHR_WIN_MEMBAR_H__ */