From e8fdbd58fe564a29977f4331cd26f9697d76fc40 Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Fri, 7 Apr 2017 14:44:47 -0700
Subject: [PATCH] Improve 64bit atomics support.

When adding atomics back in b64d92f1a, I added 64bit support as
optional; there wasn't yet a direct user in sight.  That turned out to
be a bit short-sighted, it'd already have been useful a number of times.

Add a fallback implementation of 64bit atomics, just like the one we
have for 32bit atomics.

Additionally optimize reads/writes to 64bit on a number of platforms
where aligned writes of that size are atomic. This can now be tested
with PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY.

Author: Andres Freund
Reviewed-By: Amit Kapila
Discussion: https://postgr.es/m/20160330230914.GH13305@awork2.anarazel.de
---
 src/backend/port/atomics.c           | 65 +++++++++++++++++++++++++++-
 src/include/port/atomics.h           | 13 +++---
 src/include/port/atomics/arch-ia64.h |  3 ++
 src/include/port/atomics/arch-ppc.h  |  3 ++
 src/include/port/atomics/arch-x86.h  | 10 +++++
 src/include/port/atomics/fallback.h  | 33 ++++++++++++++
 src/include/port/atomics/generic.h   | 22 ++++++++--
 src/test/regress/regress.c           |  4 --
 8 files changed, 136 insertions(+), 17 deletions(-)

diff --git a/src/backend/port/atomics.c b/src/backend/port/atomics.c
index 756a2ef997..231d847de7 100644
--- a/src/backend/port/atomics.c
+++ b/src/backend/port/atomics.c
@@ -89,7 +89,7 @@ void
 pg_atomic_init_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val_)
 {
 	StaticAssertStmt(sizeof(ptr->sema) >= sizeof(slock_t),
-					 "size mismatch of atomic_flag vs slock_t");
+					 "size mismatch of atomic_uint32 vs slock_t");
 
 	/*
 	 * If we're using semaphore based atomic flags, be careful about nested
@@ -157,3 +157,66 @@ pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
 }
 
 #endif   /* PG_HAVE_ATOMIC_U32_SIMULATION */
+
+
+#ifdef PG_HAVE_ATOMIC_U64_SIMULATION
+
+void
+pg_atomic_init_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 val_)
+{
+	StaticAssertStmt(sizeof(ptr->sema) >= sizeof(slock_t),
+					 "size mismatch of atomic_uint64 vs slock_t");
+
+	/*
+	 * If we're using semaphore based atomic flags, be careful about nested
+	 * usage of atomics while a spinlock is held.
+	 */
+#ifndef HAVE_SPINLOCKS
+	s_init_lock_sema((slock_t *) &ptr->sema, true);
+#else
+	SpinLockInit((slock_t *) &ptr->sema);
+#endif
+	ptr->value = val_;
+}
+
+bool
+pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
+									uint64 *expected, uint64 newval)
+{
+	bool		ret;
+
+	/*
+	 * Do atomic op under a spinlock. It might look like we could just skip
+	 * the cmpxchg if the lock isn't available, but that'd just emulate a
+	 * 'weak' compare and swap. I.e. one that allows spurious failures. Since
+	 * several algorithms rely on a strong variant and that is efficiently
+	 * implementable on most major architectures let's emulate it here as
+	 * well.
+	 */
+	SpinLockAcquire((slock_t *) &ptr->sema);
+
+	/* perform compare/exchange logic */
+	ret = ptr->value == *expected;
+	*expected = ptr->value;
+	if (ret)
+		ptr->value = newval;
+
+	/* and release lock */
+	SpinLockRelease((slock_t *) &ptr->sema);
+
+	return ret;
+}
+
+uint64
+pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
+{
+	uint64		oldval;
+
+	SpinLockAcquire((slock_t *) &ptr->sema);
+	oldval = ptr->value;
+	ptr->value += add_;
+	SpinLockRelease((slock_t *) &ptr->sema);
+	return oldval;
+}
+
+#endif   /* PG_HAVE_ATOMIC_U64_SIMULATION */
diff --git a/src/include/port/atomics.h b/src/include/port/atomics.h
index 2e2ec27639..5f3d266298 100644
--- a/src/include/port/atomics.h
+++ b/src/include/port/atomics.h
@@ -12,13 +12,14 @@
  * * pg_compiler_barrier(), pg_write_barrier(), pg_read_barrier()
  * * pg_atomic_compare_exchange_u32(), pg_atomic_fetch_add_u32()
  * * pg_atomic_test_set_flag(), pg_atomic_init_flag(), pg_atomic_clear_flag()
+ * * PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY should be defined if appropriate.
  *
  * There exist generic, hardware independent, implementations for several
  * compilers which might be sufficient, although possibly not optimal, for a
  * new platform. If no such generic implementation is available spinlocks (or
  * even OS provided semaphores) will be used to implement the API.
  *
- * Implement the _u64 variants if and only if your platform can use them
+ * Implement _u64 atomics if and only if your platform can use them
  * efficiently (and obviously correctly).
  *
  * Use higher level functionality (lwlocks, spinlocks, heavyweight locks)
@@ -110,9 +111,9 @@
 
 /*
  * Provide a full fallback of the pg_*_barrier(), pg_atomic**_flag and
- * pg_atomic_*_u32 APIs for platforms without sufficient spinlock and/or
- * atomics support. In the case of spinlock backed atomics the emulation is
- * expected to be efficient, although less so than native atomics support.
+ * pg_atomic_* APIs for platforms without sufficient spinlock and/or atomics
+ * support. In the case of spinlock backed atomics the emulation is expected
+ * to be efficient, although less so than native atomics support.
  */
 #include "port/atomics/fallback.h"
 
@@ -421,8 +422,6 @@ pg_atomic_sub_fetch_u32(volatile pg_atomic_uint32 *ptr, int32 sub_)
  * documentation.
  * ----
  */
-#ifdef PG_HAVE_ATOMIC_U64_SUPPORT
-
 static inline void
 pg_atomic_init_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
 {
@@ -506,8 +505,6 @@ pg_atomic_sub_fetch_u64(volatile pg_atomic_uint64 *ptr, int64 sub_)
 	return pg_atomic_sub_fetch_u64_impl(ptr, sub_);
 }
 
-#endif   /* PG_HAVE_64_BIT_ATOMICS */
-
 #undef INSIDE_ATOMICS_H
 
 #endif   /* ATOMICS_H */
diff --git a/src/include/port/atomics/arch-ia64.h b/src/include/port/atomics/arch-ia64.h
index 61224d63d5..3dc4b298e1 100644
--- a/src/include/port/atomics/arch-ia64.h
+++ b/src/include/port/atomics/arch-ia64.h
@@ -24,3 +24,6 @@
 #elif defined(__hpux)
 #	define pg_memory_barrier_impl()		_Asm_mf()
 #endif
+
+/* per architecture manual doubleword accesses have single copy atomicity */
+#define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY
diff --git a/src/include/port/atomics/arch-ppc.h b/src/include/port/atomics/arch-ppc.h
index ed1cd9d1b9..ed30468398 100644
--- a/src/include/port/atomics/arch-ppc.h
+++ b/src/include/port/atomics/arch-ppc.h
@@ -24,3 +24,6 @@
 #define pg_read_barrier_impl()		__asm__ __volatile__ ("lwsync" : : : "memory")
 #define pg_write_barrier_impl()		__asm__ __volatile__ ("lwsync" : : : "memory")
 #endif
+
+/* per architecture manual doubleword accesses have single copy atomicity */
+#define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY
diff --git a/src/include/port/atomics/arch-x86.h b/src/include/port/atomics/arch-x86.h
index 70b991f725..363d9680cb 100644
--- a/src/include/port/atomics/arch-x86.h
+++ b/src/include/port/atomics/arch-x86.h
@@ -239,4 +239,14 @@ pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
 
 #endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */
 
+/*
+ * 8 byte reads / writes have single-copy atomicity on 32 bit x86 platforms
+ * since at least the 586. As well as on all x86-64 cpus.
+ */
+#if defined(__i568__) || defined(__i668__) || /* gcc i586+ */  \
+	(defined(_M_IX86) && _M_IX86 >= 500) || /* msvc i586+ */ \
+	defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) /* gcc, sunpro, msvc */
+#define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY
+#endif /* 8 byte single-copy atomicity */
+
 #endif /* HAVE_ATOMICS */
diff --git a/src/include/port/atomics/fallback.h b/src/include/port/atomics/fallback.h
index 65d3d8c658..4e07add0a4 100644
--- a/src/include/port/atomics/fallback.h
+++ b/src/include/port/atomics/fallback.h
@@ -102,6 +102,24 @@ typedef struct pg_atomic_uint32
 
 #endif /* PG_HAVE_ATOMIC_U32_SUPPORT */
 
+#if !defined(PG_HAVE_ATOMIC_U64_SUPPORT)
+
+#define PG_HAVE_ATOMIC_U64_SIMULATION
+
+#define PG_HAVE_ATOMIC_U64_SUPPORT
+typedef struct pg_atomic_uint64
+{
+	/* Check pg_atomic_flag's definition above for an explanation */
+#if defined(__hppa) || defined(__hppa__)	/* HP PA-RISC, GCC and HP compilers */
+	int			sema[4];
+#else
+	int			sema;
+#endif
+	volatile uint64 value;
+} pg_atomic_uint64;
+
+#endif /* PG_HAVE_ATOMIC_U64_SUPPORT */
+
 #ifdef PG_HAVE_ATOMIC_FLAG_SIMULATION
 
 #define PG_HAVE_ATOMIC_INIT_FLAG
@@ -144,3 +162,18 @@ extern bool pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
 extern uint32 pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_);
 
 #endif /* PG_HAVE_ATOMIC_U32_SIMULATION */
+
+
+#ifdef PG_HAVE_ATOMIC_U64_SIMULATION
+
+#define PG_HAVE_ATOMIC_INIT_U64
+extern void pg_atomic_init_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 val_);
+
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
+extern bool pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
+												uint64 *expected, uint64 newval);
+
+#define PG_HAVE_ATOMIC_FETCH_ADD_U64
+extern uint64 pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_);
+
+#endif /* PG_HAVE_ATOMIC_U64_SIMULATION */
diff --git a/src/include/port/atomics/generic.h b/src/include/port/atomics/generic.h
index a5b29d83f7..c0942482fc 100644
--- a/src/include/port/atomics/generic.h
+++ b/src/include/port/atomics/generic.h
@@ -255,8 +255,6 @@ pg_atomic_sub_fetch_u32_impl(volatile pg_atomic_uint32 *ptr, int32 sub_)
 }
 #endif
 
-#ifdef PG_HAVE_ATOMIC_U64_SUPPORT
-
 #if !defined(PG_HAVE_ATOMIC_EXCHANGE_U64) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64)
 #define PG_HAVE_ATOMIC_EXCHANGE_U64
 static inline uint64
@@ -273,6 +271,24 @@ pg_atomic_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 xchg_)
 }
 #endif
 
+#ifndef PG_HAVE_ATOMIC_READ_U64
+#define PG_HAVE_ATOMIC_READ_U64
+static inline uint64
+pg_atomic_read_u64_impl(volatile pg_atomic_uint64 *ptr)
+{
+	return *(&ptr->value);
+}
+#endif
+
+#ifndef PG_HAVE_ATOMIC_WRITE_U64
+#define PG_HAVE_ATOMIC_WRITE_U64
+static inline void
+pg_atomic_write_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 val)
+{
+	ptr->value = val;
+}
+#endif
+
 #ifndef PG_HAVE_ATOMIC_WRITE_U64
 #define PG_HAVE_ATOMIC_WRITE_U64
 static inline void
@@ -388,5 +404,3 @@ pg_atomic_sub_fetch_u64_impl(volatile pg_atomic_uint64 *ptr, int64 sub_)
 	return pg_atomic_fetch_sub_u64_impl(ptr, sub_) - sub_;
 }
 #endif
-
-#endif /* PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64 */
diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c
index d7fb8498d8..80d0929df3 100644
--- a/src/test/regress/regress.c
+++ b/src/test/regress/regress.c
@@ -997,7 +997,6 @@ test_atomic_uint32(void)
 		elog(ERROR, "pg_atomic_fetch_and_u32() #3 wrong");
 }
 
-#ifdef PG_HAVE_ATOMIC_U64_SUPPORT
 static void
 test_atomic_uint64(void)
 {
@@ -1073,7 +1072,6 @@ test_atomic_uint64(void)
 	if (pg_atomic_fetch_and_u64(&var, ~0) != 0)
 		elog(ERROR, "pg_atomic_fetch_and_u64() #3 wrong");
 }
-#endif   /* PG_HAVE_ATOMIC_U64_SUPPORT */
 
 
 PG_FUNCTION_INFO_V1(test_atomic_ops);
@@ -1096,9 +1094,7 @@ test_atomic_ops(PG_FUNCTION_ARGS)
 
 	test_atomic_uint32();
 
-#ifdef PG_HAVE_ATOMIC_U64_SUPPORT
 	test_atomic_uint64();
-#endif
 
 	PG_RETURN_BOOL(true);
 }
-- 
2.50.1