diff options
author | Rickard Green <[email protected]> | 2016-02-22 19:21:45 +0100 |
---|---|---|
committer | Rickard Green <[email protected]> | 2016-02-22 19:21:45 +0100 |
commit | d8699b0ca6a4e52bc04cb439d225da147161258c (patch) | |
tree | 07866379d83e4a2e037099e267de05b384b80c01 | |
parent | 167675dcb94e3809a224d3b4293594717cec48d9 (diff) | |
parent | 3ac4a18199b9f35224624851b01a0d3cb6e8e0e1 (diff) | |
download | otp-d8699b0ca6a4e52bc04cb439d225da147161258c.tar.gz otp-d8699b0ca6a4e52bc04cb439d225da147161258c.tar.bz2 otp-d8699b0ca6a4e52bc04cb439d225da147161258c.zip |
Merge branch 'maint'
* maint:
Improve cmpxchg8b/cmpxchg16b inline asm
Improve cmpxchg8b inline asm configure test
-rw-r--r-- | erts/aclocal.m4 | 162 | ||||
-rw-r--r-- | erts/include/internal/ethread_header_config.h.in | 4 | ||||
-rw-r--r-- | erts/include/internal/i386/ethr_dw_atomic.h | 73 |
3 files changed, 181 insertions, 58 deletions
diff --git a/erts/aclocal.m4 b/erts/aclocal.m4 index f6d8f20e4e..017fdbd589 100644 --- a/erts/aclocal.m4 +++ b/erts/aclocal.m4 @@ -2075,63 +2075,159 @@ esac case "$GCC-$host_cpu" in yes-i86pc | yes-i*86 | yes-x86_64 | yes-amd64) + + if test $ac_cv_sizeof_void_p = 4; then + dw_cmpxchg="cmpxchg8b" + else + dw_cmpxchg="cmpxchg16b" + fi + gcc_dw_cmpxchg_asm=no - AC_MSG_CHECKING([for gcc double word cmpxchg asm support]) - AC_TRY_COMPILE([], + gcc_pic_dw_cmpxchg_asm=no + gcc_cflags_pic=no + gcc_cmpxchg8b_pic_no_clobber_ebx=no + gcc_cmpxchg8b_pic_no_clobber_ebx_register_shortage=no + + save_CFLAGS="$CFLAGS" + + # Check if it works out of the box using passed CFLAGS + # and with -fPIC added to CFLAGS if the passed CFLAGS + # doesn't trigger position independent code + pic_cmpxchg=unknown + while true; do + + case $pic_cmpxchg in + yes) pic_text="pic ";; + *) pic_text="";; + esac + + AC_MSG_CHECKING([for gcc $pic_text$dw_cmpxchg plain asm support]) + + plain_cmpxchg=no + AC_TRY_COMPILE([], [ char xchgd; long new[2], xchg[2], *p; __asm__ __volatile__( -#if ETHR_SIZEOF_PTR == 4 && defined(__PIC__) && __PIC__ - "pushl %%ebx\n\t" - "movl %8, %%ebx\n\t" -#endif #if ETHR_SIZEOF_PTR == 4 "lock; cmpxchg8b %0\n\t" #else "lock; cmpxchg16b %0\n\t" #endif "setz %3\n\t" -#if ETHR_SIZEOF_PTR == 4 && defined(__PIC__) && __PIC__ - "popl %%ebx\n\t" -#endif - : "=m"(*p), "=d"(xchg[1]), "=a"(xchg[0]), "=c"(xchgd) - : "m"(*p), "1"(xchg[1]), "2"(xchg[0]), "3"(new[1]), -#if ETHR_SIZEOF_PTR == 4 && defined(__PIC__) && __PIC__ - "r"(new[0]) -#else - "b"(new[0]) -#endif + : "=m"(*p), "=d"(xchg[1]), "=a"(xchg[0]), "=q"(xchgd) + : "m"(*p), "1"(xchg[1]), "2"(xchg[0]), "c"(new[1]), "b"(new[0]) : "cc", "memory"); + ], + [plain_cmpxchg=yes]) + AC_MSG_RESULT([$plain_cmpxchg]) + + if test $pic_cmpxchg = yes; then + gcc_pic_dw_cmpxchg_asm=$plain_cmpxchg + break + fi + + gcc_dw_cmpxchg_asm=$plain_cmpxchg + + # If not already compiling to position independent + # code add -fPIC to CFLAGS and do it again. This + # since we want also want to know how to compile + # to position independent code since this might + # cause problems with the use of the EBX register + # as input to the asm on 32-bit x86 and old gcc + # compilers (gcc vsn < 5). + + AC_TRY_COMPILE([], + [ +#if !defined(__PIC__) || !__PIC__ +# error no pic +#endif ], - [gcc_dw_cmpxchg_asm=yes]) - if test $gcc_dw_cmpxchg_asm = no && test $ac_cv_sizeof_void_p = 4; then + [pic_cmpxchg=yes + gcc_cflags_pic=yes], + [pic_cmpxchg=no]) + + if test $pic_cmpxchg = yes; then + gcc_pic_dw_cmpxchg_asm=$gcc_dw_cmpxchg_asm + break + fi + + CFLAGS="$save_CFLAGS -fPIC" + pic_cmpxchg=yes + + done + + if test $gcc_pic_dw_cmpxchg_asm = no && test $ac_cv_sizeof_void_p = 4; then + + AC_MSG_CHECKING([for gcc pic cmpxchg8b asm support with EBX workaround]) + + # Check if we can work around it by managing the ebx + # register explicitly in the asm... + AC_TRY_COMPILE([], + [ + char xchgd; + long new[2], xchg[2], *p; + __asm__ __volatile__( + "pushl %%ebx\n\t" + "movl %8, %%ebx\n\t" + "lock; cmpxchg8b %0\n\t" + "setz %3\n\t" + "popl %%ebx\n\t" + : "=m"(*p), "=d"(xchg[1]), "=a"(xchg[0]), "=q"(xchgd) + : "m"(*p), "1"(xchg[1]), "2"(xchg[0]), "c"(new[1]), "r"(new[0]) + : "cc", "memory"); + ], + [gcc_pic_dw_cmpxchg_asm=yes + gcc_cmpxchg8b_pic_no_clobber_ebx=yes]) + + AC_MSG_RESULT([$gcc_pic_dw_cmpxchg_asm]) + + if test $gcc_pic_dw_cmpxchg_asm = no; then + + AC_MSG_CHECKING([for gcc pic cmpxchg8b asm support with EBX and register shortage workarounds]) + # If no optimization is enabled we sometimes get a + # register shortage. Check if we can work around + # this... + + AC_TRY_COMPILE([], [ char xchgd; long new[2], xchg[2], *p; -#if !defined(__PIC__) || !__PIC__ -# error nope -#endif __asm__ __volatile__( - "pushl %%ebx\n\t" - "movl (%7), %%ebx\n\t" - "movl 4(%7), %%ecx\n\t" - "lock; cmpxchg8b %0\n\t" - "setz %3\n\t" - "popl %%ebx\n\t" - : "=m"(*p), "=d"(xchg[1]), "=a"(xchg[0]), "=c"(xchgd) - : "m"(*p), "1"(xchg[1]), "2"(xchg[0]), "3"(new) + "pushl %%ebx\n\t" + "movl (%7), %%ebx\n\t" + "movl 4(%7), %%ecx\n\t" + "lock; cmpxchg8b %0\n\t" + "setz %3\n\t" + "popl %%ebx\n\t" + : "=m"(*p), "=d"(xchg[1]), "=a"(xchg[0]), "=c"(xchgd) + : "m"(*p), "1"(xchg[1]), "2"(xchg[0]), "r"(new) : "cc", "memory"); ], - [gcc_dw_cmpxchg_asm=yes]) - if test "$gcc_dw_cmpxchg_asm" = "yes"; then - AC_DEFINE(ETHR_CMPXCHG8B_REGISTER_SHORTAGE, 1, [Define if you get a register shortage with cmpxchg8b and position independent code]) + [gcc_pic_dw_cmpxchg_asm=yes + gcc_cmpxchg8b_pic_no_clobber_ebx=yes + gcc_cmpxchg8b_pic_no_clobber_ebx_register_shortage=yes]) + + AC_MSG_RESULT([$gcc_pic_dw_cmpxchg_asm]) fi + + if test $gcc_cflags_pic = yes; then + gcc_dw_cmpxchg_asm=$gcc_pic_dw_cmpxchg_asm + fi + + fi + + CFLAGS="$save_CFLAGS" + + if test "$gcc_cmpxchg8b_pic_no_clobber_ebx" = "yes"; then + AC_DEFINE(ETHR_CMPXCHG8B_PIC_NO_CLOBBER_EBX, 1, [Define if gcc wont let you clobber ebx with cmpxchg8b and position independent code]) + fi + if test "$gcc_cmpxchg8b_pic_no_clobber_ebx_register_shortage" = "yes"; then + AC_DEFINE(ETHR_CMPXCHG8B_REGISTER_SHORTAGE, 1, [Define if you get a register shortage with cmpxchg8b and position independent code]) fi - AC_MSG_RESULT([$gcc_dw_cmpxchg_asm]) if test "$gcc_dw_cmpxchg_asm" = "yes"; then AC_DEFINE(ETHR_GCC_HAVE_DW_CMPXCHG_ASM_SUPPORT, 1, [Define if you use a gcc that supports the double word cmpxchg instruction]) fi;; diff --git a/erts/include/internal/ethread_header_config.h.in b/erts/include/internal/ethread_header_config.h.in index 9cabd0591a..f4b08cfced 100644 --- a/erts/include/internal/ethread_header_config.h.in +++ b/erts/include/internal/ethread_header_config.h.in @@ -166,6 +166,10 @@ /* Define if you use a gcc that supports the double word cmpxchg instruction */ #undef ETHR_GCC_HAVE_DW_CMPXCHG_ASM_SUPPORT +/* Define if gcc wont let you clobber ebx with cmpxchg8b and position + independent code */ +#undef ETHR_CMPXCHG8B_PIC_NO_CLOBBER_EBX + /* Define if you get a register shortage with cmpxchg8b and position independent code */ #undef ETHR_CMPXCHG8B_REGISTER_SHORTAGE diff --git a/erts/include/internal/i386/ethr_dw_atomic.h b/erts/include/internal/i386/ethr_dw_atomic.h index e8c4119ef0..5444a6345c 100644 --- a/erts/include/internal/i386/ethr_dw_atomic.h +++ b/erts/include/internal/i386/ethr_dw_atomic.h @@ -115,13 +115,19 @@ ethr_native_dw_atomic_addr(ethr_native_dw_atomic_t *var) return (ethr_sint_t *) ETHR_DW_NATMC_MEM__(var); } -#if ETHR_SIZEOF_PTR == 4 && defined(__PIC__) && __PIC__ +#if defined(ETHR_CMPXCHG8B_PIC_NO_CLOBBER_EBX) && defined(__PIC__) && __PIC__ +#if ETHR_SIZEOF_PTR != 4 +# error unexpected pic issue +#endif /* * When position independent code is used in 32-bit mode, the EBX register - * is used for storage of global offset table address, and we may not - * use it as input or output in an asm. We need to save and restore the - * EBX register explicitly (for some reason gcc doesn't provide this - * service to us). + * is used for storage of global offset table address. When compiling with + * an old gcc (< vsn 5) we may not use it as input or output in an inline + * asm. We then need to save and restore the EBX register explicitly (for + * some reason old gcc compilers didn't provide this service to us). + * ETHR_CMPXCHG8B_PIC_NO_CLOBBER_EBX will be defined if we need to + * explicitly manage EBX ourselves. + * */ # define ETHR_NO_CLOBBER_EBX__ 1 #else @@ -151,36 +157,53 @@ ethr_native_dw_atomic_cmpxchg_mb(ethr_native_dw_atomic_t *var, ETHR_DW_DBG_ALIGNED__(p); +#if ETHR_NO_CLOBBER_EBX__ && ETHR_CMPXCHG8B_REGISTER_SHORTAGE + /* + * gcc wont let us use ebx as input and we + * get a register shortage + */ + __asm__ __volatile__( -#if ETHR_NO_CLOBBER_EBX__ "pushl %%ebx\n\t" -# if ETHR_CMPXCHG8B_REGISTER_SHORTAGE "movl (%7), %%ebx\n\t" "movl 4(%7), %%ecx\n\t" -# else - "movl %8, %%ebx\n\t" -# endif -#endif - "lock; cmpxchg" ETHR_DW_CMPXCHG_SFX__ " %0\n\t" + "lock; cmpxchg8b %0\n\t" "setz %3\n\t" -#if ETHR_NO_CLOBBER_EBX__ "popl %%ebx\n\t" -#endif : "=m"(*p), "=d"(xchg[1]), "=a"(xchg[0]), "=c"(xchgd) - : "m"(*p), "1"(xchg[1]), "2"(xchg[0]), -#if ETHR_NO_CLOBBER_EBX__ -# if ETHR_CMPXCHG8B_REGISTER_SHORTAGE - "3"(new) -# else - "3"(new[1]), - "r"(new[0]) -# endif + : "m"(*p), "1"(xchg[1]), "2"(xchg[0]), "r"(new) + : "cc", "memory"); + +#elif ETHR_NO_CLOBBER_EBX__ + /* + * gcc wont let us use ebx as input + */ + + __asm__ __volatile__( + "pushl %%ebx\n\t" + "movl %8, %%ebx\n\t" + "lock; cmpxchg8b %0\n\t" + "setz %3\n\t" + "popl %%ebx\n\t" + : "=m"(*p), "=d"(xchg[1]), "=a"(xchg[0]), "=q"(xchgd) + : "m"(*p), "1"(xchg[1]), "2"(xchg[0]), "c"(new[1]), "r"(new[0]) + : "cc", "memory"); + #else - "3"(new[1]), - "b"(new[0]) -#endif + /* + * gcc lets us place values in the registers where + * we want them + */ + + __asm__ __volatile__( + "lock; cmpxchg" ETHR_DW_CMPXCHG_SFX__ " %0\n\t" + "setz %3\n\t" + : "=m"(*p), "=d"(xchg[1]), "=a"(xchg[0]), "=q"(xchgd) + : "m"(*p), "1"(xchg[1]), "2"(xchg[0]), "c"(new[1]), "b"(new[0]) : "cc", "memory"); +#endif + return (int) xchgd; } |