From e7ff59686eacf5021fb84be921116986c3828d8a Mon Sep 17 00:00:00 2001 From: Noah Misch Date: Fri, 13 Sep 2019 19:34:30 -0700 Subject: [PATCH] For all ppc compilers, implement pg_atomic_fetch_add_ with inline asm. This is more like how we handle s_lock.h and arch-x86.h. This does not materially affect code generation for gcc 7.2.0 or xlc 13.1.3. Reviewed by Tom Lane. Discussion: https://postgr.es/m/20190831071157.GA3251746@rfd.leadboat.com --- configure | 40 +++++++++++ configure.in | 20 ++++++ src/include/pg_config.h.in | 3 + src/include/port/atomics/arch-ppc.h | 98 ++++++++++++++++++++++++++ src/include/port/atomics/generic-xlc.h | 66 ----------------- 5 files changed, 161 insertions(+), 66 deletions(-) diff --git a/configure b/configure index b3c92764be..731d12c6ac 100755 --- a/configure +++ b/configure @@ -14593,6 +14593,46 @@ $as_echo "$pgac_cv_have_ppc_mutex_hint" >&6; } $as_echo "#define HAVE_PPC_LWARX_MUTEX_HINT 1" >>confdefs.h + fi + # Check if compiler accepts "i"(x) when __builtin_constant_p(x). + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether __builtin_constant_p(x) implies \"i\"(x) acceptance" >&5 +$as_echo_n "checking whether __builtin_constant_p(x) implies \"i\"(x) acceptance... " >&6; } +if ${pgac_cv_have_i_constraint__builtin_constant_p+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +static inline int + addi(int ra, int si) + { + int res = 0; + if (__builtin_constant_p(si)) + __asm__ __volatile__( + " addi %0,%1,%2\n" : "=r"(res) : "r"(ra), "i"(si)); + return res; + } + int test_adds(int x) { return addi(3, x) + addi(x, 5); } +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + pgac_cv_have_i_constraint__builtin_constant_p=yes +else + pgac_cv_have_i_constraint__builtin_constant_p=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_have_i_constraint__builtin_constant_p" >&5 +$as_echo "$pgac_cv_have_i_constraint__builtin_constant_p" >&6; } + if test x"$pgac_cv_have_i_constraint__builtin_constant_p" = xyes ; then + +$as_echo "#define HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P 1" >>confdefs.h + fi ;; esac diff --git a/configure.in b/configure.in index 0d16c1a971..9d0e24f46b 100644 --- a/configure.in +++ b/configure.in @@ -1539,6 +1539,26 @@ case $host_cpu in if test x"$pgac_cv_have_ppc_mutex_hint" = xyes ; then AC_DEFINE(HAVE_PPC_LWARX_MUTEX_HINT, 1, [Define to 1 if the assembler supports PPC's LWARX mutex hint bit.]) fi + # Check if compiler accepts "i"(x) when __builtin_constant_p(x). + AC_CACHE_CHECK([whether __builtin_constant_p(x) implies "i"(x) acceptance], + [pgac_cv_have_i_constraint__builtin_constant_p], + [AC_COMPILE_IFELSE([AC_LANG_PROGRAM( + [static inline int + addi(int ra, int si) + { + int res = 0; + if (__builtin_constant_p(si)) + __asm__ __volatile__( + " addi %0,%1,%2\n" : "=r"(res) : "r"(ra), "i"(si)); + return res; + } + int test_adds(int x) { return addi(3, x) + addi(x, 5); }], [])], + [pgac_cv_have_i_constraint__builtin_constant_p=yes], + [pgac_cv_have_i_constraint__builtin_constant_p=no])]) + if test x"$pgac_cv_have_i_constraint__builtin_constant_p" = xyes ; then + AC_DEFINE(HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P, 1, + [Define to 1 if __builtin_constant_p(x) implies "i"(x) acceptance.]) + fi ;; esac diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index c6014e83fa..509cc92b98 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -329,6 +329,9 @@ /* Define to 1 if you have isinf(). */ #undef HAVE_ISINF +/* Define to 1 if __builtin_constant_p(x) implies "i"(x) acceptance. */ +#undef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P + /* Define to 1 if you have the header file. */ #undef HAVE_LANGINFO_H diff --git a/src/include/port/atomics/arch-ppc.h b/src/include/port/atomics/arch-ppc.h index 344b39449b..35d602e618 100644 --- a/src/include/port/atomics/arch-ppc.h +++ b/src/include/port/atomics/arch-ppc.h @@ -25,5 +25,103 @@ #define pg_write_barrier_impl() __asm__ __volatile__ ("lwsync" : : : "memory") #endif +#define PG_HAVE_ATOMIC_U32_SUPPORT +typedef struct pg_atomic_uint32 +{ + volatile uint32 value; +} pg_atomic_uint32; + +/* 64bit atomics are only supported in 64bit mode */ +#ifdef __64BIT__ +#define PG_HAVE_ATOMIC_U64_SUPPORT +typedef struct pg_atomic_uint64 +{ + volatile uint64 value pg_attribute_aligned(8); +} pg_atomic_uint64; + +#endif /* __64BIT__ */ + +#define PG_HAVE_ATOMIC_FETCH_ADD_U32 +static inline uint32 +pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_) +{ + uint32 _t; + uint32 res; + + /* + * xlc has a no-longer-documented __fetch_and_add() intrinsic. In xlc + * 12.01.0000.0000, it emits a leading "sync" and trailing "isync". In + * xlc 13.01.0003.0004, it emits neither. Hence, using the intrinsic + * would add redundant syncs on xlc 12. + */ +#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P + if (__builtin_constant_p(add_) && + add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN) + __asm__ __volatile__( + " sync \n" + " lwarx %1,0,%4 \n" + " addi %0,%1,%3 \n" + " stwcx. %0,0,%4 \n" + " bne $-12 \n" /* branch to lwarx */ + " isync \n" +: "=&r"(_t), "=&r"(res), "+m"(ptr->value) +: "i"(add_), "r"(&ptr->value) +: "memory", "cc"); + else +#endif + __asm__ __volatile__( + " sync \n" + " lwarx %1,0,%4 \n" + " add %0,%1,%3 \n" + " stwcx. %0,0,%4 \n" + " bne $-12 \n" /* branch to lwarx */ + " isync \n" +: "=&r"(_t), "=&r"(res), "+m"(ptr->value) +: "r"(add_), "r"(&ptr->value) +: "memory", "cc"); + + return res; +} + +#ifdef PG_HAVE_ATOMIC_U64_SUPPORT +#define PG_HAVE_ATOMIC_FETCH_ADD_U64 +static inline uint64 +pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_) +{ + uint64 _t; + uint64 res; + + /* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/ */ +#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P + if (__builtin_constant_p(add_) && + add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN) + __asm__ __volatile__( + " sync \n" + " ldarx %1,0,%4 \n" + " addi %0,%1,%3 \n" + " stdcx. %0,0,%4 \n" + " bne $-12 \n" /* branch to ldarx */ + " isync \n" +: "=&r"(_t), "=&r"(res), "+m"(ptr->value) +: "i"(add_), "r"(&ptr->value) +: "memory", "cc"); + else +#endif + __asm__ __volatile__( + " sync \n" + " ldarx %1,0,%4 \n" + " add %0,%1,%3 \n" + " stdcx. %0,0,%4 \n" + " bne $-12 \n" /* branch to ldarx */ + " isync \n" +: "=&r"(_t), "=&r"(res), "+m"(ptr->value) +: "r"(add_), "r"(&ptr->value) +: "memory", "cc"); + + return res; +} + +#endif /* PG_HAVE_ATOMIC_U64_SUPPORT */ + /* per architecture manual doubleword accesses have single copy atomicity */ #define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY diff --git a/src/include/port/atomics/generic-xlc.h b/src/include/port/atomics/generic-xlc.h index 8b5c732970..8330b45495 100644 --- a/src/include/port/atomics/generic-xlc.h +++ b/src/include/port/atomics/generic-xlc.h @@ -18,23 +18,6 @@ #if defined(HAVE_ATOMICS) -#define PG_HAVE_ATOMIC_U32_SUPPORT -typedef struct pg_atomic_uint32 -{ - volatile uint32 value; -} pg_atomic_uint32; - - -/* 64bit atomics are only supported in 64bit mode */ -#ifdef __64BIT__ -#define PG_HAVE_ATOMIC_U64_SUPPORT -typedef struct pg_atomic_uint64 -{ - volatile uint64 value pg_attribute_aligned(8); -} pg_atomic_uint64; - -#endif /* __64BIT__ */ - #define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32 static inline bool pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, @@ -69,33 +52,6 @@ pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, return ret; } -#define PG_HAVE_ATOMIC_FETCH_ADD_U32 -static inline uint32 -pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_) -{ - uint32 _t; - uint32 res; - - /* - * xlc has a no-longer-documented __fetch_and_add() intrinsic. In xlc - * 12.01.0000.0000, it emits a leading "sync" and trailing "isync". In - * xlc 13.01.0003.0004, it emits neither. Hence, using the intrinsic - * would add redundant syncs on xlc 12. - */ - __asm__ __volatile__( - " sync \n" - " lwarx %1,0,%4 \n" - " add %0,%1,%3 \n" - " stwcx. %0,0,%4 \n" - " bne $-12 \n" /* branch to lwarx */ - " isync \n" -: "=&r"(_t), "=&r"(res), "+m"(ptr->value) -: "r"(add_), "r"(&ptr->value) -: "memory", "cc"); - - return res; -} - #ifdef PG_HAVE_ATOMIC_U64_SUPPORT #define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64 @@ -115,28 +71,6 @@ pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, return ret; } -#define PG_HAVE_ATOMIC_FETCH_ADD_U64 -static inline uint64 -pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_) -{ - uint64 _t; - uint64 res; - - /* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/ */ - __asm__ __volatile__( - " sync \n" - " ldarx %1,0,%4 \n" - " add %0,%1,%3 \n" - " stdcx. %0,0,%4 \n" - " bne $-12 \n" /* branch to ldarx */ - " isync \n" -: "=&r"(_t), "=&r"(res), "+m"(ptr->value) -: "r"(add_), "r"(&ptr->value) -: "memory", "cc"); - - return res; -} - #endif /* PG_HAVE_ATOMIC_U64_SUPPORT */ #endif /* defined(HAVE_ATOMICS) */ -- 2.40.0