From cf25b2a2f9a4306637a985cee86a9a987c0b6a7c Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Mon, 31 Aug 2015 16:30:12 -0400
Subject: [PATCH] Allow icc to use the same atomics infrastructure as gcc.

The atomics headers were written under the impression that icc doesn't
handle gcc-style asm blocks, but this is demonstrably false on x86_[64],
because s_lock.h has done it that way for more than a decade.  (The jury is
still out on whether this also works on ia64, so I'm leaving ia64-related
code alone for the moment.)  Treat gcc and icc the same in these headers.
This is less code and it should improve the results for icc, because we
hadn't gotten around to providing icc-specific implementations for most
of the atomics.
---
 src/include/port/atomics/arch-x86.h    | 39 ++++++++++----------------
 src/include/port/atomics/generic-gcc.h |  7 +----
 2 files changed, 16 insertions(+), 30 deletions(-)

diff --git a/src/include/port/atomics/arch-x86.h b/src/include/port/atomics/arch-x86.h
index 3f65accae8..2d713f2ff7 100644
--- a/src/include/port/atomics/arch-x86.h
+++ b/src/include/port/atomics/arch-x86.h
@@ -5,7 +5,7 @@
  *
  * Note that we actually require a 486 upwards because the 386 doesn't have
  * support for xadd and cmpxchg. Given that the 386 isn't supported anywhere
- * anymore that's not much of restriction luckily.
+ * anymore that's not much of a restriction luckily.
  *
  * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
@@ -28,18 +28,18 @@
  * do those things, a compiler barrier should be enough.
  *
  * "lock; addl" has worked for longer than "mfence". It's also rumored to be
- * faster in many scenarios
+ * faster in many scenarios.
  */
 
-#if defined(__INTEL_COMPILER)
-#define pg_memory_barrier_impl()		_mm_mfence()
-#elif defined(__GNUC__) && (defined(__i386__) || defined(__i386))
+#if defined(__GNUC__) || defined(__INTEL_COMPILER)
+#if defined(__i386__) || defined(__i386)
 #define pg_memory_barrier_impl()		\
 	__asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory", "cc")
-#elif defined(__GNUC__) && defined(__x86_64__)
+#elif defined(__x86_64__)
 #define pg_memory_barrier_impl()		\
 	__asm__ __volatile__ ("lock; addl $0,0(%%rsp)" : : : "memory", "cc")
 #endif
+#endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */
 
 #define pg_read_barrier_impl()		pg_compiler_barrier_impl()
 #define pg_write_barrier_impl()		pg_compiler_barrier_impl()
@@ -51,7 +51,7 @@
  */
 #if defined(HAVE_ATOMICS)
 
-#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
+#if defined(__GNUC__) || defined(__INTEL_COMPILER)
 
 #define PG_HAVE_ATOMIC_FLAG_SUPPORT
 typedef struct pg_atomic_flag
@@ -67,7 +67,7 @@ typedef struct pg_atomic_uint32
 
 /*
  * It's too complicated to write inline asm for 64bit types on 32bit and the
- * 468 can't do it.
+ * 468 can't do it anyway.
  */
 #ifdef __x86_64__
 #define PG_HAVE_ATOMIC_U64_SUPPORT
@@ -76,11 +76,11 @@ typedef struct pg_atomic_uint64
 	/* alignment guaranteed due to being on a 64bit platform */
 	volatile uint64 value;
 } pg_atomic_uint64;
-#endif
+#endif	/* __x86_64__ */
 
-#endif /* defined(HAVE_ATOMICS) */
+#endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */
 
-#endif /* defined(__GNUC__) && !defined(__INTEL_COMPILER) */
+#endif /* defined(HAVE_ATOMICS) */
 
 #if !defined(PG_HAVE_SPIN_DELAY)
 /*
@@ -106,20 +106,12 @@ typedef struct pg_atomic_uint64
  *     de-pipelines the spin-wait loop to prevent it from
  *     consuming execution resources excessively.
  */
-#if defined(__INTEL_COMPILER)
-#define PG_HAVE_SPIN_DELAY
-static inline
-pg_spin_delay_impl(void)
-{
-	_mm_pause();
-}
-#elif defined(__GNUC__)
+#if defined(__GNUC__) || defined(__INTEL_COMPILER)
 #define PG_HAVE_SPIN_DELAY
 static __inline__ void
 pg_spin_delay_impl(void)
 {
-	__asm__ __volatile__(
-		" rep; nop			\n");
+	__asm__ __volatile__(" rep; nop			\n");
 }
 #elif defined(WIN32_ONLY_COMPILER) && defined(__x86_64__)
 #define PG_HAVE_SPIN_DELAY
@@ -142,8 +134,7 @@ pg_spin_delay_impl(void)
 
 #if defined(HAVE_ATOMICS)
 
-/* inline assembly implementation for gcc */
-#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
+#if defined(__GNUC__) || defined(__INTEL_COMPILER)
 
 #define PG_HAVE_ATOMIC_TEST_SET_FLAG
 static inline bool
@@ -246,6 +237,6 @@ pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
 
 #endif /* __x86_64__ */
 
-#endif /* defined(__GNUC__) && !defined(__INTEL_COMPILER) */
+#endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */
 
 #endif /* HAVE_ATOMICS */
diff --git a/src/include/port/atomics/generic-gcc.h b/src/include/port/atomics/generic-gcc.h
index 306c38f6f7..8a50485cda 100644
--- a/src/include/port/atomics/generic-gcc.h
+++ b/src/include/port/atomics/generic-gcc.h
@@ -25,14 +25,9 @@
 #endif
 
 /*
- * icc provides all the same intrinsics but doesn't understand gcc's inline asm
+ * An empty asm block should be a sufficient compiler barrier.
  */
-#if defined(__INTEL_COMPILER)
-/* NB: Yes, __memory_barrier() is actually just a compiler barrier */
-#define pg_compiler_barrier_impl()	__memory_barrier()
-#else
 #define pg_compiler_barrier_impl()	__asm__ __volatile__("" ::: "memory")
-#endif
 
 /*
  * If we're on GCC 4.1.0 or higher, we should be able to get a memory barrier
-- 
2.50.0