#ifndef __CLANG_CUDA_WRAPPERS_ALGORITHM
#define __CLANG_CUDA_WRAPPERS_ALGORITHM
-// This header defines __device__ overloads of std::min/max, but only if we're
-// <= C++11. In C++14, these functions are constexpr, and so are implicitly
-// __host__ __device__.
+// This header defines __device__ overloads of std::min/max.
//
-// We don't support the initializer_list overloads because
-// initializer_list::begin() and end() are not __host__ __device__ functions.
+// Ideally we'd declare these functions only if we're <= C++11. In C++14,
+// these functions are constexpr, and so are implicitly __host__ __device__.
//
-// When compiling in C++14 mode, we could force std::min/max to have different
-// implementations for host and device, by declaring the device overloads
-// before the constexpr overloads appear. We choose not to do this because
-
-// a) why write our own implementation when we can use one from the standard
-// library? and
-// b) libstdc++ is evil and declares min/max inside a header that is included
-// *before* we include <algorithm>. So we'd have to unconditionally
-// declare our __device__ overloads of min/max, but that would pollute
-// things for people who choose not to include <algorithm>.
+// However, the compiler being in C++14 mode does not imply that the standard
+// library supports C++14. There is no macro we can test to check that the
+// stdlib has constexpr std::min/max. Thus we have to unconditionally define
+// our device overloads.
+//
+// A host+device function cannot be overloaded, and a constexpr function
+// implicitly become host device if there's no explicitly host or device
+// overload preceding it. So the simple thing to do would be to declare our
+// device min/max overloads, and then #include_next <algorithm>. This way our
+// device overloads would come first, and so if we have a C++14 stdlib, its
+// min/max won't become host+device and conflict with our device overloads.
+//
+// But that also doesn't work. libstdc++ is evil and declares std::min/max in
+// an internal header that is included *before* <algorithm>. Thus by the time
+// we're inside of this file, std::min/max may already have been declared, and
+// thus we can't prevent them from becoming host+device if they're constexpr.
+//
+// Therefore we perpetrate the following hack: We mark our __device__ overloads
+// with __attribute__((enable_if(true, ""))). This causes the signature of the
+// function to change without changing anything else about it. (Except that
+// overload resolution will prefer it over the __host__ __device__ version
+// rather than considering them equally good).
#include_next <algorithm>
-#if __cplusplus <= 201103L
-
// We need to define these overloads in exactly the namespace our standard
// library uses (including the right inline namespace), otherwise they won't be
// picked up by other functions in the standard library (e.g. functions in
#endif
template <class __T, class __Cmp>
+__attribute__((enable_if(true, "")))
inline __device__ const __T &
max(const __T &__a, const __T &__b, __Cmp __cmp) {
return __cmp(__a, __b) ? __b : __a;
}
template <class __T>
+__attribute__((enable_if(true, "")))
inline __device__ const __T &
max(const __T &__a, const __T &__b) {
return __a < __b ? __b : __a;
}
template <class __T, class __Cmp>
+__attribute__((enable_if(true, "")))
inline __device__ const __T &
min(const __T &__a, const __T &__b, __Cmp __cmp) {
return __cmp(__b, __a) ? __b : __a;
}
template <class __T>
+__attribute__((enable_if(true, "")))
inline __device__ const __T &
min(const __T &__a, const __T &__b) {
return __a < __b ? __a : __b;
} // namespace std
#endif
-#endif // __cplusplus <= 201103L
#endif // __CLANG_CUDA_WRAPPERS_ALGORITHM