]> granicus.if.org Git - clang/commitdiff
[NVPTX] Implemented bar.warp.sync, barrier.sync, and vote{.sync} instructions/intrins...
authorArtem Belevich <tra@google.com>
Thu, 21 Sep 2017 18:44:49 +0000 (18:44 +0000)
committerArtem Belevich <tra@google.com>
Thu, 21 Sep 2017 18:44:49 +0000 (18:44 +0000)
Differential Revision: https://reviews.llvm.org/D38148

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@313898 91177308-0d34-0410-b5e6-96231b3b80d8

include/clang/Basic/BuiltinsNVPTX.def
lib/Headers/__clang_cuda_intrinsics.h
test/CodeGen/builtins-nvptx-ptx60.cu
test/CodeGen/builtins-nvptx.c

index 0cd4195b55324738e87c121ce97fe7dcb029d3bb..4cd839323d18b5519dd4a650a3f1d3871c801c7b 100644 (file)
@@ -378,6 +378,9 @@ BUILTIN(__nvvm_bar0_popc, "ii", "")
 BUILTIN(__nvvm_bar0_and, "ii", "")
 BUILTIN(__nvvm_bar0_or, "ii", "")
 BUILTIN(__nvvm_bar_sync, "vi", "n")
+TARGET_BUILTIN(__nvvm_bar_warp_sync, "vUi", "n", "ptx60")
+TARGET_BUILTIN(__nvvm_barrier_sync, "vUi", "n", "ptx60")
+TARGET_BUILTIN(__nvvm_barrier_sync_cnt, "vUiUi", "n", "ptx60")
 
 // Shuffle
 
@@ -399,6 +402,17 @@ TARGET_BUILTIN(__nvvm_shfl_sync_bfly_f32, "fUifii", "", "ptx60")
 TARGET_BUILTIN(__nvvm_shfl_sync_idx_i32, "iUiiii", "", "ptx60")
 TARGET_BUILTIN(__nvvm_shfl_sync_idx_f32, "fUifii", "", "ptx60")
 
+// Vote
+BUILTIN(__nvvm_vote_all, "bb", "")
+BUILTIN(__nvvm_vote_any, "bb", "")
+BUILTIN(__nvvm_vote_uni, "bb", "")
+BUILTIN(__nvvm_vote_ballot, "Uib", "")
+
+TARGET_BUILTIN(__nvvm_vote_all_sync, "bUib", "", "ptx60")
+TARGET_BUILTIN(__nvvm_vote_any_sync, "bUib", "", "ptx60")
+TARGET_BUILTIN(__nvvm_vote_uni_sync, "bUib", "", "ptx60")
+TARGET_BUILTIN(__nvvm_vote_ballot_sync, "UiUib", "", "ptx60")
+
 // Membar
 
 BUILTIN(__nvvm_membar_cta, "v", "")
index c191a530594476b26671b92ed1a42e9885e04e6e..52dfe617aaf98746176bb2771560ef612ac580f2 100644 (file)
@@ -157,6 +157,37 @@ __MAKE_SYNC_SHUFFLES(__shfl_sync_xor, __nvvm_shfl_sync_bfly_i32,
 
 #pragma pop_macro("__MAKE_SYNC_SHUFFLES")
 
+inline __device__ void __syncwarp(unsigned int mask = 0xffffffff) {
+  return __nvvm_bar_warp_sync(mask);
+}
+
+inline __device__ void __barrier_sync(unsigned int id) {
+  __nvvm_barrier_sync(id);
+}
+
+inline __device__ void __barrier_sync_count(unsigned int id,
+                                            unsigned int count) {
+  __nvvm_barrier_sync_cnt(id, count);
+}
+
+inline __device__ int __all_sync(unsigned int mask, int pred) {
+  return __nvvm_vote_sync_all(mask, pred);
+}
+
+inline __device__ int __any_sync(unsigned int mask, int pred) {
+  return __nvvm_vote_sync_any(mask, pred);
+}
+
+inline __device__ int __uni_sync(unsigned int mask, int pred) {
+  return __nvvm_vote_sync_uni(mask, pred);
+}
+
+inline __device__ unsigned int __ballot_sync(unsigned int mask, int pred) {
+  return __nvvm_vote_sync_ballot(mask, pred);
+}
+
+inline __device__ activemask() { return __nvvm_vote.ballot(1); }
+
 #endif // __CUDA_VERSION >= 9000 && (!defined(__CUDA_ARCH__) ||
        // __CUDA_ARCH__ >= 300)
 
index e06c84c150c347270c57ef2ceebc3a1b6d208179..5bd9ca300973bf83eb8e84bf3d05772b8b48820d 100644 (file)
 #define __shared__ __attribute__((shared))
 #define __constant__ __attribute__((constant))
 
-// CHECK-LABEL: nvvm_shfl_sync
-__device__ void nvvm_shfl_sync(unsigned mask, int i, float f, int a, int b) {
+// We have to keep all builtins that depend on particular target feature in the
+// same function, because the codegen will stop after the very first function
+// that encounters an error, so -verify will not be able to find errors in
+// subsequent functions.
+
+// CHECK-LABEL: nvvm_sync
+__device__ void nvvm_sync(unsigned mask, int i, float f, int a, int b,
+                          bool pred) {
+  // CHECK: call void @llvm.nvvm.bar.warp.sync(i32
+  // expected-error@+1 {{'__nvvm_bar_warp_sync' needs target feature ptx60}}
+  __nvvm_bar_warp_sync(mask);
+  // CHECK: call void @llvm.nvvm.barrier.sync(i32
+  // expected-error@+1 {{'__nvvm_barrier_sync' needs target feature ptx60}}
+  __nvvm_barrier_sync(mask);
+  // CHECK: call void @llvm.nvvm.barrier.sync.cnt(i32
+  // expected-error@+1 {{'__nvvm_barrier_sync_cnt' needs target feature ptx60}}
+  __nvvm_barrier_sync_cnt(mask, i);
+
+  //
+  // SHFL.SYNC
+  //
   // CHECK: call i32 @llvm.nvvm.shfl.sync.down.i32(i32 {{%[0-9]+}}, i32
   // expected-error@+1 {{'__nvvm_shfl_sync_down_i32' needs target feature ptx60}}
   __nvvm_shfl_sync_down_i32(mask, i, a, b);
@@ -36,5 +55,23 @@ __device__ void nvvm_shfl_sync(unsigned mask, int i, float f, int a, int b) {
   // CHECK: call float @llvm.nvvm.shfl.sync.idx.f32(i32 {{%[0-9]+}}, float
   // expected-error@+1 {{'__nvvm_shfl_sync_idx_f32' needs target feature ptx60}}
   __nvvm_shfl_sync_idx_f32(mask, f, a, b);
+
+  //
+  // VOTE.SYNC
+  //
+
+  // CHECK: call i1 @llvm.nvvm.vote.all.sync(i32
+  // expected-error@+1 {{'__nvvm_vote_all_sync' needs target feature ptx60}}
+  __nvvm_vote_all_sync(mask, pred);
+  // CHECK: call i1 @llvm.nvvm.vote.any.sync(i32
+  // expected-error@+1 {{'__nvvm_vote_any_sync' needs target feature ptx60}}
+  __nvvm_vote_any_sync(mask, pred);
+  // CHECK: call i1 @llvm.nvvm.vote.uni.sync(i32
+  // expected-error@+1 {{'__nvvm_vote_uni_sync' needs target feature ptx60}}
+  __nvvm_vote_uni_sync(mask, pred);
+  // CHECK: call i32 @llvm.nvvm.vote.ballot.sync(i32
+  // expected-error@+1 {{'__nvvm_vote_ballot_sync' needs target feature ptx60}}
+  __nvvm_vote_ballot_sync(mask, pred);
+
   // CHECK: ret void
 }
index c97b549cbe006bfe7b66e278e85ebf6a20afb5b2..89a982377ad48194835ca89ce827a18f47b94a91 100644 (file)
@@ -657,3 +657,15 @@ __device__ void nvvm_shfl(int i, float f, int a, int b) {
   __nvvm_shfl_idx_f32(f, a, b);
   // CHECK: ret void
 }
+
+__device__ void nvvm_vote(int pred) {
+  // CHECK: call i1 @llvm.nvvm.vote.all(i1
+  __nvvm_vote_all(pred);
+  // CHECK: call i1 @llvm.nvvm.vote.any(i1
+  __nvvm_vote_any(pred);
+  // CHECK: call i1 @llvm.nvvm.vote.uni(i1
+  __nvvm_vote_uni(pred);
+  // CHECK: call i32 @llvm.nvvm.vote.ballot(i1
+  __nvvm_vote_ballot(pred);
+  // CHECK: ret void
+}