From: Noah Misch Date: Sat, 19 Oct 2019 03:20:52 +0000 (-0700) Subject: For all ppc compilers, implement compare_exchange and fetch_add with asm. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=30ee5d17c20dbb282a9952b3048d6ad52d56c371;p=postgresql For all ppc compilers, implement compare_exchange and fetch_add with asm. This is more like how we handle s_lock.h and arch-x86.h. Reviewed by Tom Lane. Discussion: https://postgr.es/m/20191005173400.GA3979129@rfd.leadboat.com --- diff --git a/configure b/configure index 74627a7eed..02a905bfb5 100755 --- a/configure +++ b/configure @@ -14517,6 +14517,46 @@ $as_echo "$pgac_cv_have_ppc_mutex_hint" >&6; } $as_echo "#define HAVE_PPC_LWARX_MUTEX_HINT 1" >>confdefs.h + fi + # Check if compiler accepts "i"(x) when __builtin_constant_p(x). + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether __builtin_constant_p(x) implies \"i\"(x) acceptance" >&5 +$as_echo_n "checking whether __builtin_constant_p(x) implies \"i\"(x) acceptance... " >&6; } +if ${pgac_cv_have_i_constraint__builtin_constant_p+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +static inline int + addi(int ra, int si) + { + int res = 0; + if (__builtin_constant_p(si)) + __asm__ __volatile__( + " addi %0,%1,%2\n" : "=r"(res) : "b"(ra), "i"(si)); + return res; + } + int test_adds(int x) { return addi(3, x) + addi(x, 5); } +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + pgac_cv_have_i_constraint__builtin_constant_p=yes +else + pgac_cv_have_i_constraint__builtin_constant_p=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_have_i_constraint__builtin_constant_p" >&5 +$as_echo "$pgac_cv_have_i_constraint__builtin_constant_p" >&6; } + if test x"$pgac_cv_have_i_constraint__builtin_constant_p" = xyes ; then + +$as_echo "#define HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P 1" >>confdefs.h + fi ;; esac diff --git a/configure.in b/configure.in index 0d16c1a971..88d3a59506 100644 --- a/configure.in +++ b/configure.in @@ -1539,6 +1539,26 @@ case $host_cpu in if test x"$pgac_cv_have_ppc_mutex_hint" = xyes ; then AC_DEFINE(HAVE_PPC_LWARX_MUTEX_HINT, 1, [Define to 1 if the assembler supports PPC's LWARX mutex hint bit.]) fi + # Check if compiler accepts "i"(x) when __builtin_constant_p(x). + AC_CACHE_CHECK([whether __builtin_constant_p(x) implies "i"(x) acceptance], + [pgac_cv_have_i_constraint__builtin_constant_p], + [AC_COMPILE_IFELSE([AC_LANG_PROGRAM( + [static inline int + addi(int ra, int si) + { + int res = 0; + if (__builtin_constant_p(si)) + __asm__ __volatile__( + " addi %0,%1,%2\n" : "=r"(res) : "b"(ra), "i"(si)); + return res; + } + int test_adds(int x) { return addi(3, x) + addi(x, 5); }], [])], + [pgac_cv_have_i_constraint__builtin_constant_p=yes], + [pgac_cv_have_i_constraint__builtin_constant_p=no])]) + if test x"$pgac_cv_have_i_constraint__builtin_constant_p" = xyes ; then + AC_DEFINE(HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P, 1, + [Define to 1 if __builtin_constant_p(x) implies "i"(x) acceptance.]) + fi ;; esac diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index 53e6fe3398..939245db39 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -329,6 +329,9 @@ /* Define to 1 if you have isinf(). */ #undef HAVE_ISINF +/* Define to 1 if __builtin_constant_p(x) implies "i"(x) acceptance. */ +#undef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P + /* Define to 1 if you have the header file. */ #undef HAVE_LANGINFO_H diff --git a/src/include/port/atomics.h b/src/include/port/atomics.h index fab66f8e98..363e1df4bf 100644 --- a/src/include/port/atomics.h +++ b/src/include/port/atomics.h @@ -87,14 +87,11 @@ * using compiler intrinsics are a good idea. */ /* - * Given a gcc-compatible xlc compiler, prefer the xlc implementation. The - * ppc64le "IBM XL C/C++ for Linux, V13.1.2" implements both interfaces, but - * __sync_lock_test_and_set() of one-byte types elicits SIGSEGV. + * gcc or compatible, including clang and icc. Exclude xlc. The ppc64le "IBM + * XL C/C++ for Linux, V13.1.2" emulates gcc, but __sync_lock_test_and_set() + * of one-byte types elicits SIGSEGV. */ -#if defined(__IBMC__) || defined(__IBMCPP__) -#include "port/atomics/generic-xlc.h" -/* gcc or compatible, including clang and icc */ -#elif defined(__GNUC__) || defined(__INTEL_COMPILER) +#if (defined(__GNUC__) || defined(__INTEL_COMPILER)) && !(defined(__IBMC__) || defined(__IBMCPP__)) #include "port/atomics/generic-gcc.h" #elif defined(_MSC_VER) #include "port/atomics/generic-msvc.h" diff --git a/src/include/port/atomics/arch-ppc.h b/src/include/port/atomics/arch-ppc.h index 344b39449b..568292d1d5 100644 --- a/src/include/port/atomics/arch-ppc.h +++ b/src/include/port/atomics/arch-ppc.h @@ -25,5 +25,236 @@ #define pg_write_barrier_impl() __asm__ __volatile__ ("lwsync" : : : "memory") #endif +#define PG_HAVE_ATOMIC_U32_SUPPORT +typedef struct pg_atomic_uint32 +{ + volatile uint32 value; +} pg_atomic_uint32; + +/* 64bit atomics are only supported in 64bit mode */ +#ifdef __64BIT__ +#define PG_HAVE_ATOMIC_U64_SUPPORT +typedef struct pg_atomic_uint64 +{ + volatile uint64 value pg_attribute_aligned(8); +} pg_atomic_uint64; + +#endif /* __64BIT__ */ + +/* + * This mimics gcc __atomic_compare_exchange_n(..., __ATOMIC_SEQ_CST), but + * code generation differs at the end. __atomic_compare_exchange_n(): + * 100: isync + * 104: mfcr r3 + * 108: rlwinm r3,r3,3,31,31 + * 10c: bne 120 <.eb+0x10> + * 110: clrldi r3,r3,63 + * 114: addi r1,r1,112 + * 118: blr + * 11c: nop + * 120: clrldi r3,r3,63 + * 124: stw r9,0(r4) + * 128: addi r1,r1,112 + * 12c: blr + * + * This: + * f0: isync + * f4: mfcr r9 + * f8: rldicl. r3,r9,35,63 + * fc: bne 104 <.eb> + * 100: stw r10,0(r4) + * 104: addi r1,r1,112 + * 108: blr + * + * This implementation may or may not have materially different performance. + * It's not exploiting the fact that cr0 still holds the relevant comparison + * bits, set during the __asm__. One could fix that by moving more code into + * the __asm__. (That would remove the freedom to eliminate dead stores when + * the caller ignores "expected", but few callers do.) + * + * The cmpwi variant may be dead code. In gcc 7.2.0, + * __builtin_constant_p(*expected) always reports false. + * __atomic_compare_exchange_n() does use cmpwi when its second argument + * points to a constant. Hence, using this instead of + * __atomic_compare_exchange_n() nominally penalizes the generic.h + * pg_atomic_test_set_flag_impl(). Modern GCC will use the generic-gcc.h + * version, making the penalty theoretical only. + * + * Recognizing constant "newval" would be superfluous, because there's no + * immediate-operand version of stwcx. + */ +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32 +static inline bool +pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, + uint32 *expected, uint32 newval) +{ + uint32 found; + uint32 condition_register; + bool ret; + +#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P + if (__builtin_constant_p(*expected) && + *expected <= PG_INT16_MAX && *expected >= PG_INT16_MIN) + __asm__ __volatile__( + " sync \n" + " lwarx %0,0,%5 \n" + " cmpwi %0,%3 \n" + " bne $+12 \n" /* branch to isync */ + " stwcx. %4,0,%5 \n" + " bne $-16 \n" /* branch to lwarx */ + " isync \n" + " mfcr %1 \n" +: "=&r"(found), "=r"(condition_register), "+m"(ptr->value) +: "i"(*expected), "r"(newval), "r"(&ptr->value) +: "memory", "cc"); + else +#endif + __asm__ __volatile__( + " sync \n" + " lwarx %0,0,%5 \n" + " cmpw %0,%3 \n" + " bne $+12 \n" /* branch to isync */ + " stwcx. %4,0,%5 \n" + " bne $-16 \n" /* branch to lwarx */ + " isync \n" + " mfcr %1 \n" +: "=&r"(found), "=r"(condition_register), "+m"(ptr->value) +: "r"(*expected), "r"(newval), "r"(&ptr->value) +: "memory", "cc"); + + ret = (condition_register >> 29) & 1; /* test eq bit of cr0 */ + if (!ret) + *expected = found; + return ret; +} + +/* + * This mirrors gcc __sync_fetch_and_add(). + * + * Like tas(), use constraint "=&b" to avoid allocating r0. + */ +#define PG_HAVE_ATOMIC_FETCH_ADD_U32 +static inline uint32 +pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_) +{ + uint32 _t; + uint32 res; + +#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P + if (__builtin_constant_p(add_) && + add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN) + __asm__ __volatile__( + " sync \n" + " lwarx %1,0,%4 \n" + " addi %0,%1,%3 \n" + " stwcx. %0,0,%4 \n" + " bne $-12 \n" /* branch to lwarx */ + " isync \n" +: "=&r"(_t), "=&b"(res), "+m"(ptr->value) +: "i"(add_), "r"(&ptr->value) +: "memory", "cc"); + else +#endif + __asm__ __volatile__( + " sync \n" + " lwarx %1,0,%4 \n" + " add %0,%1,%3 \n" + " stwcx. %0,0,%4 \n" + " bne $-12 \n" /* branch to lwarx */ + " isync \n" +: "=&r"(_t), "=&r"(res), "+m"(ptr->value) +: "r"(add_), "r"(&ptr->value) +: "memory", "cc"); + + return res; +} + +#ifdef PG_HAVE_ATOMIC_U64_SUPPORT + +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64 +static inline bool +pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, + uint64 *expected, uint64 newval) +{ + uint64 found; + uint32 condition_register; + bool ret; + + /* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/; s/cmpw/cmpd/ */ +#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P + if (__builtin_constant_p(*expected) && + *expected <= PG_INT16_MAX && *expected >= PG_INT16_MIN) + __asm__ __volatile__( + " sync \n" + " ldarx %0,0,%5 \n" + " cmpdi %0,%3 \n" + " bne $+12 \n" /* branch to isync */ + " stdcx. %4,0,%5 \n" + " bne $-16 \n" /* branch to ldarx */ + " isync \n" + " mfcr %1 \n" +: "=&r"(found), "=r"(condition_register), "+m"(ptr->value) +: "i"(*expected), "r"(newval), "r"(&ptr->value) +: "memory", "cc"); + else +#endif + __asm__ __volatile__( + " sync \n" + " ldarx %0,0,%5 \n" + " cmpd %0,%3 \n" + " bne $+12 \n" /* branch to isync */ + " stdcx. %4,0,%5 \n" + " bne $-16 \n" /* branch to ldarx */ + " isync \n" + " mfcr %1 \n" +: "=&r"(found), "=r"(condition_register), "+m"(ptr->value) +: "r"(*expected), "r"(newval), "r"(&ptr->value) +: "memory", "cc"); + + ret = (condition_register >> 29) & 1; /* test eq bit of cr0 */ + if (!ret) + *expected = found; + return ret; +} + +#define PG_HAVE_ATOMIC_FETCH_ADD_U64 +static inline uint64 +pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_) +{ + uint64 _t; + uint64 res; + + /* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/ */ +#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P + if (__builtin_constant_p(add_) && + add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN) + __asm__ __volatile__( + " sync \n" + " ldarx %1,0,%4 \n" + " addi %0,%1,%3 \n" + " stdcx. %0,0,%4 \n" + " bne $-12 \n" /* branch to ldarx */ + " isync \n" +: "=&r"(_t), "=&b"(res), "+m"(ptr->value) +: "i"(add_), "r"(&ptr->value) +: "memory", "cc"); + else +#endif + __asm__ __volatile__( + " sync \n" + " ldarx %1,0,%4 \n" + " add %0,%1,%3 \n" + " stdcx. %0,0,%4 \n" + " bne $-12 \n" /* branch to ldarx */ + " isync \n" +: "=&r"(_t), "=&r"(res), "+m"(ptr->value) +: "r"(add_), "r"(&ptr->value) +: "memory", "cc"); + + return res; +} + +#endif /* PG_HAVE_ATOMIC_U64_SUPPORT */ + /* per architecture manual doubleword accesses have single copy atomicity */ #define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY diff --git a/src/include/port/atomics/generic-xlc.h b/src/include/port/atomics/generic-xlc.h deleted file mode 100644 index 8b5c732970..0000000000 --- a/src/include/port/atomics/generic-xlc.h +++ /dev/null @@ -1,142 +0,0 @@ -/*------------------------------------------------------------------------- - * - * generic-xlc.h - * Atomic operations for IBM's CC - * - * Portions Copyright (c) 2013-2019, PostgreSQL Global Development Group - * - * NOTES: - * - * Documentation: - * * Synchronization and atomic built-in functions - * http://www-01.ibm.com/support/knowledgecenter/SSGH3R_13.1.2/com.ibm.xlcpp131.aix.doc/compiler_ref/bifs_sync_atomic.html - * - * src/include/port/atomics/generic-xlc.h - * - * ------------------------------------------------------------------------- - */ - -#if defined(HAVE_ATOMICS) - -#define PG_HAVE_ATOMIC_U32_SUPPORT -typedef struct pg_atomic_uint32 -{ - volatile uint32 value; -} pg_atomic_uint32; - - -/* 64bit atomics are only supported in 64bit mode */ -#ifdef __64BIT__ -#define PG_HAVE_ATOMIC_U64_SUPPORT -typedef struct pg_atomic_uint64 -{ - volatile uint64 value pg_attribute_aligned(8); -} pg_atomic_uint64; - -#endif /* __64BIT__ */ - -#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32 -static inline bool -pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, - uint32 *expected, uint32 newval) -{ - bool ret; - - /* - * atomics.h specifies sequential consistency ("full barrier semantics") - * for this interface. Since "lwsync" provides acquire/release - * consistency only, do not use it here. GCC atomics observe the same - * restriction; see its rs6000_pre_atomic_barrier(). - */ - __asm__ __volatile__ (" sync \n" ::: "memory"); - - /* - * XXX: __compare_and_swap is defined to take signed parameters, but that - * shouldn't matter since we don't perform any arithmetic operations. - */ - ret = __compare_and_swap((volatile int*)&ptr->value, - (int *)expected, (int)newval); - - /* - * xlc's documentation tells us: - * "If __compare_and_swap is used as a locking primitive, insert a call to - * the __isync built-in function at the start of any critical sections." - * - * The critical section begins immediately after __compare_and_swap(). - */ - __isync(); - - return ret; -} - -#define PG_HAVE_ATOMIC_FETCH_ADD_U32 -static inline uint32 -pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_) -{ - uint32 _t; - uint32 res; - - /* - * xlc has a no-longer-documented __fetch_and_add() intrinsic. In xlc - * 12.01.0000.0000, it emits a leading "sync" and trailing "isync". In - * xlc 13.01.0003.0004, it emits neither. Hence, using the intrinsic - * would add redundant syncs on xlc 12. - */ - __asm__ __volatile__( - " sync \n" - " lwarx %1,0,%4 \n" - " add %0,%1,%3 \n" - " stwcx. %0,0,%4 \n" - " bne $-12 \n" /* branch to lwarx */ - " isync \n" -: "=&r"(_t), "=&r"(res), "+m"(ptr->value) -: "r"(add_), "r"(&ptr->value) -: "memory", "cc"); - - return res; -} - -#ifdef PG_HAVE_ATOMIC_U64_SUPPORT - -#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64 -static inline bool -pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, - uint64 *expected, uint64 newval) -{ - bool ret; - - __asm__ __volatile__ (" sync \n" ::: "memory"); - - ret = __compare_and_swaplp((volatile long*)&ptr->value, - (long *)expected, (long)newval); - - __isync(); - - return ret; -} - -#define PG_HAVE_ATOMIC_FETCH_ADD_U64 -static inline uint64 -pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_) -{ - uint64 _t; - uint64 res; - - /* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/ */ - __asm__ __volatile__( - " sync \n" - " ldarx %1,0,%4 \n" - " add %0,%1,%3 \n" - " stdcx. %0,0,%4 \n" - " bne $-12 \n" /* branch to ldarx */ - " isync \n" -: "=&r"(_t), "=&r"(res), "+m"(ptr->value) -: "r"(add_), "r"(&ptr->value) -: "memory", "cc"); - - return res; -} - -#endif /* PG_HAVE_ATOMIC_U64_SUPPORT */ - -#endif /* defined(HAVE_ATOMICS) */ diff --git a/src/tools/pginclude/cpluspluscheck b/src/tools/pginclude/cpluspluscheck index 843d391e68..3f315bc2b0 100755 --- a/src/tools/pginclude/cpluspluscheck +++ b/src/tools/pginclude/cpluspluscheck @@ -89,7 +89,6 @@ do test "$f" = src/include/port/atomics/generic-gcc.h && continue test "$f" = src/include/port/atomics/generic-msvc.h && continue test "$f" = src/include/port/atomics/generic-sunpro.h && continue - test "$f" = src/include/port/atomics/generic-xlc.h && continue # rusagestub.h is also platform-specific, and will be included # by utils/pg_rusage.h if necessary. diff --git a/src/tools/pginclude/headerscheck b/src/tools/pginclude/headerscheck index 8dbec908c1..7c0e5047dd 100755 --- a/src/tools/pginclude/headerscheck +++ b/src/tools/pginclude/headerscheck @@ -85,7 +85,6 @@ do test "$f" = src/include/port/atomics/generic-gcc.h && continue test "$f" = src/include/port/atomics/generic-msvc.h && continue test "$f" = src/include/port/atomics/generic-sunpro.h && continue - test "$f" = src/include/port/atomics/generic-xlc.h && continue # rusagestub.h is also platform-specific, and will be included # by utils/pg_rusage.h if necessary.