[CUDA] Make std::min/max work when compiling in C++14 mode with a C++11 stdlib.

author Justin Lebar <jlebar@google.com>

Thu, 17 May 2018 16:12:42 +0000 (16:12 +0000)

committer Justin Lebar <jlebar@google.com>

Thu, 17 May 2018 16:12:42 +0000 (16:12 +0000)
author Justin Lebar <jlebar@google.com>
Thu, 17 May 2018 16:12:42 +0000 (16:12 +0000)
committer Justin Lebar <jlebar@google.com>
Thu, 17 May 2018 16:12:42 +0000 (16:12 +0000)
diff --git a/lib/Headers/cuda_wrappers/algorithm b/lib/Headers/cuda_wrappers/algorithm

index cedd70762c486c61d6ce5506f4e3b90cab8e5dbf..8ee84f7fb02c822ef6035e8192a7d5b3d34b52a0 100644 (file)
--- a/lib/Headers/cuda_wrappers/algorithm
+++ b/lib/Headers/cuda_wrappers/algorithm
@@ -24,28 +24,36 @@
  #ifndef __CLANG_CUDA_WRAPPERS_ALGORITHM
  #define __CLANG_CUDA_WRAPPERS_ALGORITHM
  
-// This header defines __device__ overloads of std::min/max, but only if we're
-// <= C++11.  In C++14, these functions are constexpr, and so are implicitly
-// __host__ __device__.
+// This header defines __device__ overloads of std::min/max.
  //
-// We don't support the initializer_list overloads because
-// initializer_list::begin() and end() are not __host__ __device__ functions.
+// Ideally we'd declare these functions only if we're <= C++11.  In C++14,
+// these functions are constexpr, and so are implicitly __host__ __device__.
  //
-// When compiling in C++14 mode, we could force std::min/max to have different
-// implementations for host and device, by declaring the device overloads
-// before the constexpr overloads appear.  We choose not to do this because
-
-//  a) why write our own implementation when we can use one from the standard
-//     library? and
-//  b) libstdc++ is evil and declares min/max inside a header that is included
-//     *before* we include <algorithm>.  So we'd have to unconditionally
-//     declare our __device__ overloads of min/max, but that would pollute
-//     things for people who choose not to include <algorithm>.
+// However, the compiler being in C++14 mode does not imply that the standard
+// library supports C++14.  There is no macro we can test to check that the
+// stdlib has constexpr std::min/max.  Thus we have to unconditionally define
+// our device overloads.
+//
+// A host+device function cannot be overloaded, and a constexpr function
+// implicitly become host device if there's no explicitly host or device
+// overload preceding it.  So the simple thing to do would be to declare our
+// device min/max overloads, and then #include_next <algorithm>.  This way our
+// device overloads would come first, and so if we have a C++14 stdlib, its
+// min/max won't become host+device and conflict with our device overloads.
+//
+// But that also doesn't work.  libstdc++ is evil and declares std::min/max in
+// an internal header that is included *before* <algorithm>.  Thus by the time
+// we're inside of this file, std::min/max may already have been declared, and
+// thus we can't prevent them from becoming host+device if they're constexpr.
+//
+// Therefore we perpetrate the following hack: We mark our __device__ overloads
+// with __attribute__((enable_if(true, ""))).  This causes the signature of the
+// function to change without changing anything else about it.  (Except that
+// overload resolution will prefer it over the __host__ __device__ version
+// rather than considering them equally good).
  
  #include_next <algorithm>
  
-#if __cplusplus <= 201103L
-
  // We need to define these overloads in exactly the namespace our standard
  // library uses (including the right inline namespace), otherwise they won't be
  // picked up by other functions in the standard library (e.g. functions in
@@ -60,24 +68,28 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  #endif
  
  template <class __T, class __Cmp>
+__attribute__((enable_if(true, "")))
  inline __device__ const __T &
  max(const __T &__a, const __T &__b, __Cmp __cmp) {
    return __cmp(__a, __b) ? __b : __a;
  }
  
  template <class __T>
+__attribute__((enable_if(true, "")))
  inline __device__ const __T &
  max(const __T &__a, const __T &__b) {
    return __a < __b ? __b : __a;
  }
  
  template <class __T, class __Cmp>
+__attribute__((enable_if(true, "")))
  inline __device__ const __T &
  min(const __T &__a, const __T &__b, __Cmp __cmp) {
    return __cmp(__b, __a) ? __b : __a;
  }
  
  template <class __T>
+__attribute__((enable_if(true, "")))
  inline __device__ const __T &
  min(const __T &__a, const __T &__b) {
    return __a < __b ? __a : __b;
@@ -92,5 +104,4 @@ _GLIBCXX_END_NAMESPACE_VERSION
  } // namespace std
  #endif
  
-#endif // __cplusplus <= 201103L
  #endif // __CLANG_CUDA_WRAPPERS_ALGORITHM
author	Justin Lebar <jlebar@google.com>
	Thu, 17 May 2018 16:12:42 +0000 (16:12 +0000)
committer	Justin Lebar <jlebar@google.com>
	Thu, 17 May 2018 16:12:42 +0000 (16:12 +0000)