BUILTIN(__nvvm_bar0_and, "ii", "")
BUILTIN(__nvvm_bar0_or, "ii", "")
BUILTIN(__nvvm_bar_sync, "vi", "n")
+TARGET_BUILTIN(__nvvm_bar_warp_sync, "vUi", "n", "ptx60")
+TARGET_BUILTIN(__nvvm_barrier_sync, "vUi", "n", "ptx60")
+TARGET_BUILTIN(__nvvm_barrier_sync_cnt, "vUiUi", "n", "ptx60")
// Shuffle
TARGET_BUILTIN(__nvvm_shfl_sync_idx_i32, "iUiiii", "", "ptx60")
TARGET_BUILTIN(__nvvm_shfl_sync_idx_f32, "fUifii", "", "ptx60")
+// Vote
+BUILTIN(__nvvm_vote_all, "bb", "")
+BUILTIN(__nvvm_vote_any, "bb", "")
+BUILTIN(__nvvm_vote_uni, "bb", "")
+BUILTIN(__nvvm_vote_ballot, "Uib", "")
+
+TARGET_BUILTIN(__nvvm_vote_all_sync, "bUib", "", "ptx60")
+TARGET_BUILTIN(__nvvm_vote_any_sync, "bUib", "", "ptx60")
+TARGET_BUILTIN(__nvvm_vote_uni_sync, "bUib", "", "ptx60")
+TARGET_BUILTIN(__nvvm_vote_ballot_sync, "UiUib", "", "ptx60")
+
// Membar
BUILTIN(__nvvm_membar_cta, "v", "")
#pragma pop_macro("__MAKE_SYNC_SHUFFLES")
+inline __device__ void __syncwarp(unsigned int mask = 0xffffffff) {
+ return __nvvm_bar_warp_sync(mask);
+}
+
+inline __device__ void __barrier_sync(unsigned int id) {
+ __nvvm_barrier_sync(id);
+}
+
+inline __device__ void __barrier_sync_count(unsigned int id,
+ unsigned int count) {
+ __nvvm_barrier_sync_cnt(id, count);
+}
+
+inline __device__ int __all_sync(unsigned int mask, int pred) {
+ return __nvvm_vote_sync_all(mask, pred);
+}
+
+inline __device__ int __any_sync(unsigned int mask, int pred) {
+ return __nvvm_vote_sync_any(mask, pred);
+}
+
+inline __device__ int __uni_sync(unsigned int mask, int pred) {
+ return __nvvm_vote_sync_uni(mask, pred);
+}
+
+inline __device__ unsigned int __ballot_sync(unsigned int mask, int pred) {
+ return __nvvm_vote_sync_ballot(mask, pred);
+}
+
+inline __device__ activemask() { return __nvvm_vote.ballot(1); }
+
#endif // __CUDA_VERSION >= 9000 && (!defined(__CUDA_ARCH__) ||
// __CUDA_ARCH__ >= 300)
#define __shared__ __attribute__((shared))
#define __constant__ __attribute__((constant))
-// CHECK-LABEL: nvvm_shfl_sync
-__device__ void nvvm_shfl_sync(unsigned mask, int i, float f, int a, int b) {
+// We have to keep all builtins that depend on particular target feature in the
+// same function, because the codegen will stop after the very first function
+// that encounters an error, so -verify will not be able to find errors in
+// subsequent functions.
+
+// CHECK-LABEL: nvvm_sync
+__device__ void nvvm_sync(unsigned mask, int i, float f, int a, int b,
+ bool pred) {
+ // CHECK: call void @llvm.nvvm.bar.warp.sync(i32
+ // expected-error@+1 {{'__nvvm_bar_warp_sync' needs target feature ptx60}}
+ __nvvm_bar_warp_sync(mask);
+ // CHECK: call void @llvm.nvvm.barrier.sync(i32
+ // expected-error@+1 {{'__nvvm_barrier_sync' needs target feature ptx60}}
+ __nvvm_barrier_sync(mask);
+ // CHECK: call void @llvm.nvvm.barrier.sync.cnt(i32
+ // expected-error@+1 {{'__nvvm_barrier_sync_cnt' needs target feature ptx60}}
+ __nvvm_barrier_sync_cnt(mask, i);
+
+ //
+ // SHFL.SYNC
+ //
// CHECK: call i32 @llvm.nvvm.shfl.sync.down.i32(i32 {{%[0-9]+}}, i32
// expected-error@+1 {{'__nvvm_shfl_sync_down_i32' needs target feature ptx60}}
__nvvm_shfl_sync_down_i32(mask, i, a, b);
// CHECK: call float @llvm.nvvm.shfl.sync.idx.f32(i32 {{%[0-9]+}}, float
// expected-error@+1 {{'__nvvm_shfl_sync_idx_f32' needs target feature ptx60}}
__nvvm_shfl_sync_idx_f32(mask, f, a, b);
+
+ //
+ // VOTE.SYNC
+ //
+
+ // CHECK: call i1 @llvm.nvvm.vote.all.sync(i32
+ // expected-error@+1 {{'__nvvm_vote_all_sync' needs target feature ptx60}}
+ __nvvm_vote_all_sync(mask, pred);
+ // CHECK: call i1 @llvm.nvvm.vote.any.sync(i32
+ // expected-error@+1 {{'__nvvm_vote_any_sync' needs target feature ptx60}}
+ __nvvm_vote_any_sync(mask, pred);
+ // CHECK: call i1 @llvm.nvvm.vote.uni.sync(i32
+ // expected-error@+1 {{'__nvvm_vote_uni_sync' needs target feature ptx60}}
+ __nvvm_vote_uni_sync(mask, pred);
+ // CHECK: call i32 @llvm.nvvm.vote.ballot.sync(i32
+ // expected-error@+1 {{'__nvvm_vote_ballot_sync' needs target feature ptx60}}
+ __nvvm_vote_ballot_sync(mask, pred);
+
// CHECK: ret void
}
__nvvm_shfl_idx_f32(f, a, b);
// CHECK: ret void
}
+
+__device__ void nvvm_vote(int pred) {
+ // CHECK: call i1 @llvm.nvvm.vote.all(i1
+ __nvvm_vote_all(pred);
+ // CHECK: call i1 @llvm.nvvm.vote.any(i1
+ __nvvm_vote_any(pred);
+ // CHECK: call i1 @llvm.nvvm.vote.uni(i1
+ __nvvm_vote_uni(pred);
+ // CHECK: call i32 @llvm.nvvm.vote.ballot(i1
+ __nvvm_vote_ballot(pred);
+ // CHECK: ret void
+}