]> granicus.if.org Git - clang/commitdiff
NVPTX: Use the nvvm builtins to read SRegs rather than the legacy ptx ones
authorJustin Bogner <mail@justinbogner.com>
Thu, 7 Jul 2016 16:41:08 +0000 (16:41 +0000)
committerJustin Bogner <mail@justinbogner.com>
Thu, 7 Jul 2016 16:41:08 +0000 (16:41 +0000)
The ptx spellings were removed from LLVM in r274769.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@274770 91177308-0d34-0410-b5e6-96231b3b80d8

include/clang/Basic/BuiltinsNVPTX.def
lib/Headers/cuda_builtin_vars.h
test/CodeGen/builtins-nvptx.c
test/CodeGenCUDA/cuda-builtin-vars.cu
test/SemaCUDA/builtins.cu

index 48aa14bbfe102232c4343780659d6b005fa527f3..acfed606d5280c11069ae6e49f126eae8834fa9a 100644 (file)
 
 // The format of this database matches clang/Basic/Builtins.def.
 
-// Builtins retained from previous PTX back-end
-BUILTIN(__builtin_ptx_read_tid_x, "i", "nc")
-BUILTIN(__builtin_ptx_read_tid_y, "i", "nc")
-BUILTIN(__builtin_ptx_read_tid_z, "i", "nc")
-BUILTIN(__builtin_ptx_read_tid_w, "i", "nc")
+// Special Registers
+
+BUILTIN(__nvvm_read_ptx_sreg_tid_x, "i", "nc")
+BUILTIN(__nvvm_read_ptx_sreg_tid_y, "i", "nc")
+BUILTIN(__nvvm_read_ptx_sreg_tid_z, "i", "nc")
+BUILTIN(__nvvm_read_ptx_sreg_tid_w, "i", "nc")
+
+BUILTIN(__nvvm_read_ptx_sreg_ntid_x, "i", "nc")
+BUILTIN(__nvvm_read_ptx_sreg_ntid_y, "i", "nc")
+BUILTIN(__nvvm_read_ptx_sreg_ntid_z, "i", "nc")
+BUILTIN(__nvvm_read_ptx_sreg_ntid_w, "i", "nc")
+
+BUILTIN(__nvvm_read_ptx_sreg_ctaid_x, "i", "nc")
+BUILTIN(__nvvm_read_ptx_sreg_ctaid_y, "i", "nc")
+BUILTIN(__nvvm_read_ptx_sreg_ctaid_z, "i", "nc")
+BUILTIN(__nvvm_read_ptx_sreg_ctaid_w, "i", "nc")
+
+BUILTIN(__nvvm_read_ptx_sreg_nctaid_x, "i", "nc")
+BUILTIN(__nvvm_read_ptx_sreg_nctaid_y, "i", "nc")
+BUILTIN(__nvvm_read_ptx_sreg_nctaid_z, "i", "nc")
+BUILTIN(__nvvm_read_ptx_sreg_nctaid_w, "i", "nc")
+
+BUILTIN(__nvvm_read_ptx_sreg_laneid, "i", "nc")
+BUILTIN(__nvvm_read_ptx_sreg_warpid, "i", "nc")
+BUILTIN(__nvvm_read_ptx_sreg_nwarpid, "i", "nc")
+
+BUILTIN(__nvvm_read_ptx_sreg_smid, "i", "nc")
+BUILTIN(__nvvm_read_ptx_sreg_nsmid, "i", "nc")
+BUILTIN(__nvvm_read_ptx_sreg_gridid, "i", "nc")
+
+BUILTIN(__nvvm_read_ptx_sreg_lanemask_eq, "i", "nc")
+BUILTIN(__nvvm_read_ptx_sreg_lanemask_le, "i", "nc")
+BUILTIN(__nvvm_read_ptx_sreg_lanemask_lt, "i", "nc")
+BUILTIN(__nvvm_read_ptx_sreg_lanemask_ge, "i", "nc")
+BUILTIN(__nvvm_read_ptx_sreg_lanemask_gt, "i", "nc")
+
+BUILTIN(__nvvm_read_ptx_sreg_clock, "i", "n")
+BUILTIN(__nvvm_read_ptx_sreg_clock64, "LLi", "n")
+
+BUILTIN(__nvvm_read_ptx_sreg_pm0, "i", "n")
+BUILTIN(__nvvm_read_ptx_sreg_pm1, "i", "n")
+BUILTIN(__nvvm_read_ptx_sreg_pm2, "i", "n")
+BUILTIN(__nvvm_read_ptx_sreg_pm3, "i", "n")
 
-BUILTIN(__builtin_ptx_read_ntid_x, "i", "nc")
-BUILTIN(__builtin_ptx_read_ntid_y, "i", "nc")
-BUILTIN(__builtin_ptx_read_ntid_z, "i", "nc")
-BUILTIN(__builtin_ptx_read_ntid_w, "i", "nc")
-
-BUILTIN(__builtin_ptx_read_ctaid_x, "i", "nc")
-BUILTIN(__builtin_ptx_read_ctaid_y, "i", "nc")
-BUILTIN(__builtin_ptx_read_ctaid_z, "i", "nc")
-BUILTIN(__builtin_ptx_read_ctaid_w, "i", "nc")
-
-BUILTIN(__builtin_ptx_read_nctaid_x, "i", "nc")
-BUILTIN(__builtin_ptx_read_nctaid_y, "i", "nc")
-BUILTIN(__builtin_ptx_read_nctaid_z, "i", "nc")
-BUILTIN(__builtin_ptx_read_nctaid_w, "i", "nc")
-
-BUILTIN(__builtin_ptx_read_laneid, "i", "nc")
-BUILTIN(__builtin_ptx_read_warpid, "i", "nc")
-BUILTIN(__builtin_ptx_read_nwarpid, "i", "nc")
-
-BUILTIN(__builtin_ptx_read_smid, "i", "nc")
-BUILTIN(__builtin_ptx_read_nsmid, "i", "nc")
-BUILTIN(__builtin_ptx_read_gridid, "i", "nc")
-
-BUILTIN(__builtin_ptx_read_lanemask_eq, "i", "nc")
-BUILTIN(__builtin_ptx_read_lanemask_le, "i", "nc")
-BUILTIN(__builtin_ptx_read_lanemask_lt, "i", "nc")
-BUILTIN(__builtin_ptx_read_lanemask_ge, "i", "nc")
-BUILTIN(__builtin_ptx_read_lanemask_gt, "i", "nc")
-
-BUILTIN(__builtin_ptx_read_clock, "i", "n")
-BUILTIN(__builtin_ptx_read_clock64, "LLi", "n")
-
-BUILTIN(__builtin_ptx_read_pm0, "i", "n")
-BUILTIN(__builtin_ptx_read_pm1, "i", "n")
-BUILTIN(__builtin_ptx_read_pm2, "i", "n")
-BUILTIN(__builtin_ptx_read_pm3, "i", "n")
-
-BUILTIN(__builtin_ptx_bar_sync, "vi", "n")
-
-
-// Builtins exposed as part of NVVM
 // MISC
 
 BUILTIN(__nvvm_clz_i, "ii", "")
@@ -396,11 +393,11 @@ BUILTIN(__nvvm_bitcast_d2ll, "LLid", "")
 
 // Sync
 
-BUILTIN(__syncthreads, "v", "")
 BUILTIN(__nvvm_bar0, "v", "")
 BUILTIN(__nvvm_bar0_popc, "ii", "")
 BUILTIN(__nvvm_bar0_and, "ii", "")
 BUILTIN(__nvvm_bar0_or, "ii", "")
+BUILTIN(__nvvm_bar_sync, "vi", "n")
 
 // Shuffle
 
index ec8308640360eaddfea3f3226f7bfc00485ad84b..6f5eb9c78d85220565649f0f173ab9ddf8af8c00 100644 (file)
@@ -37,7 +37,7 @@ struct dim3;
 // Example:
 //    int x = threadIdx.x;
 // IR output:
-//  %0 = call i32 @llvm.ptx.read.tid.x() #3
+//  %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #3
 // PTX output:
 //  mov.u32     %r2, %tid.x;
 
@@ -64,9 +64,9 @@ struct dim3;
   __attribute__((device)) TypeName *operator&() const __DELETE
 
 struct __cuda_builtin_threadIdx_t {
-  __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_tid_x());
-  __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_tid_y());
-  __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_tid_z());
+  __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_tid_x());
+  __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_tid_y());
+  __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_tid_z());
   // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a
   // uint3).  This function is defined after we pull in vector_types.h.
   __attribute__((device)) operator uint3() const;
@@ -75,9 +75,9 @@ private:
 };
 
 struct __cuda_builtin_blockIdx_t {
-  __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_ctaid_x());
-  __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_ctaid_y());
-  __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_ctaid_z());
+  __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ctaid_x());
+  __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ctaid_y());
+  __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ctaid_z());
   // blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a
   // uint3).  This function is defined after we pull in vector_types.h.
   __attribute__((device)) operator uint3() const;
@@ -86,9 +86,9 @@ private:
 };
 
 struct __cuda_builtin_blockDim_t {
-  __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_ntid_x());
-  __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_ntid_y());
-  __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_ntid_z());
+  __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ntid_x());
+  __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ntid_y());
+  __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ntid_z());
   // blockDim should be convertible to dim3 (in fact in nvcc, it *is* a
   // dim3).  This function is defined after we pull in vector_types.h.
   __attribute__((device)) operator dim3() const;
@@ -97,9 +97,9 @@ private:
 };
 
 struct __cuda_builtin_gridDim_t {
-  __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_nctaid_x());
-  __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_nctaid_y());
-  __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_nctaid_z());
+  __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_nctaid_x());
+  __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_nctaid_y());
+  __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_nctaid_z());
   // gridDim should be convertible to dim3 (in fact in nvcc, it *is* a
   // dim3).  This function is defined after we pull in vector_types.h.
   __attribute__((device)) operator dim3() const;
index 4607f3be2be0200495885a089af1ea0df54e70ff..2cb0ff3e37ff7e16a9bcb2b6fed1c163747f2440 100644 (file)
 
 __device__ int read_tid() {
 
-// CHECK: call i32 @llvm.ptx.read.tid.x()
-// CHECK: call i32 @llvm.ptx.read.tid.y()
-// CHECK: call i32 @llvm.ptx.read.tid.z()
-// CHECK: call i32 @llvm.ptx.read.tid.w()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.w()
 
-  int x = __builtin_ptx_read_tid_x();
-  int y = __builtin_ptx_read_tid_y();
-  int z = __builtin_ptx_read_tid_z();
-  int w = __builtin_ptx_read_tid_w();
+  int x = __nvvm_read_ptx_sreg_tid_x();
+  int y = __nvvm_read_ptx_sreg_tid_y();
+  int z = __nvvm_read_ptx_sreg_tid_z();
+  int w = __nvvm_read_ptx_sreg_tid_w();
 
   return x + y + z + w;
 
@@ -27,15 +27,15 @@ __device__ int read_tid() {
 
 __device__ int read_ntid() {
 
-// CHECK: call i32 @llvm.ptx.read.ntid.x()
-// CHECK: call i32 @llvm.ptx.read.ntid.y()
-// CHECK: call i32 @llvm.ptx.read.ntid.z()
-// CHECK: call i32 @llvm.ptx.read.ntid.w()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.w()
 
-  int x = __builtin_ptx_read_ntid_x();
-  int y = __builtin_ptx_read_ntid_y();
-  int z = __builtin_ptx_read_ntid_z();
-  int w = __builtin_ptx_read_ntid_w();
+  int x = __nvvm_read_ptx_sreg_ntid_x();
+  int y = __nvvm_read_ptx_sreg_ntid_y();
+  int z = __nvvm_read_ptx_sreg_ntid_z();
+  int w = __nvvm_read_ptx_sreg_ntid_w();
 
   return x + y + z + w;
 
@@ -43,15 +43,15 @@ __device__ int read_ntid() {
 
 __device__ int read_ctaid() {
 
-// CHECK: call i32 @llvm.ptx.read.ctaid.x()
-// CHECK: call i32 @llvm.ptx.read.ctaid.y()
-// CHECK: call i32 @llvm.ptx.read.ctaid.z()
-// CHECK: call i32 @llvm.ptx.read.ctaid.w()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.w()
 
-  int x = __builtin_ptx_read_ctaid_x();
-  int y = __builtin_ptx_read_ctaid_y();
-  int z = __builtin_ptx_read_ctaid_z();
-  int w = __builtin_ptx_read_ctaid_w();
+  int x = __nvvm_read_ptx_sreg_ctaid_x();
+  int y = __nvvm_read_ptx_sreg_ctaid_y();
+  int z = __nvvm_read_ptx_sreg_ctaid_z();
+  int w = __nvvm_read_ptx_sreg_ctaid_w();
 
   return x + y + z + w;
 
@@ -59,15 +59,15 @@ __device__ int read_ctaid() {
 
 __device__ int read_nctaid() {
 
-// CHECK: call i32 @llvm.ptx.read.nctaid.x()
-// CHECK: call i32 @llvm.ptx.read.nctaid.y()
-// CHECK: call i32 @llvm.ptx.read.nctaid.z()
-// CHECK: call i32 @llvm.ptx.read.nctaid.w()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.w()
 
-  int x = __builtin_ptx_read_nctaid_x();
-  int y = __builtin_ptx_read_nctaid_y();
-  int z = __builtin_ptx_read_nctaid_z();
-  int w = __builtin_ptx_read_nctaid_w();
+  int x = __nvvm_read_ptx_sreg_nctaid_x();
+  int y = __nvvm_read_ptx_sreg_nctaid_y();
+  int z = __nvvm_read_ptx_sreg_nctaid_z();
+  int w = __nvvm_read_ptx_sreg_nctaid_w();
 
   return x + y + z + w;
 
@@ -75,19 +75,19 @@ __device__ int read_nctaid() {
 
 __device__ int read_ids() {
 
-// CHECK: call i32 @llvm.ptx.read.laneid()
-// CHECK: call i32 @llvm.ptx.read.warpid()
-// CHECK: call i32 @llvm.ptx.read.nwarpid()
-// CHECK: call i32 @llvm.ptx.read.smid()
-// CHECK: call i32 @llvm.ptx.read.nsmid()
-// CHECK: call i32 @llvm.ptx.read.gridid()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.laneid()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.warpid()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nwarpid()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.smid()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nsmid()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.gridid()
 
-  int a = __builtin_ptx_read_laneid();
-  int b = __builtin_ptx_read_warpid();
-  int c = __builtin_ptx_read_nwarpid();
-  int d = __builtin_ptx_read_smid();
-  int e = __builtin_ptx_read_nsmid();
-  int f = __builtin_ptx_read_gridid();
+  int a = __nvvm_read_ptx_sreg_laneid();
+  int b = __nvvm_read_ptx_sreg_warpid();
+  int c = __nvvm_read_ptx_sreg_nwarpid();
+  int d = __nvvm_read_ptx_sreg_smid();
+  int e = __nvvm_read_ptx_sreg_nsmid();
+  int f = __nvvm_read_ptx_sreg_gridid();
 
   return a + b + c + d + e + f;
 
@@ -95,17 +95,17 @@ __device__ int read_ids() {
 
 __device__ int read_lanemasks() {
 
-// CHECK: call i32 @llvm.ptx.read.lanemask.eq()
-// CHECK: call i32 @llvm.ptx.read.lanemask.le()
-// CHECK: call i32 @llvm.ptx.read.lanemask.lt()
-// CHECK: call i32 @llvm.ptx.read.lanemask.ge()
-// CHECK: call i32 @llvm.ptx.read.lanemask.gt()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.eq()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.le()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.lt()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.ge()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt()
 
-  int a = __builtin_ptx_read_lanemask_eq();
-  int b = __builtin_ptx_read_lanemask_le();
-  int c = __builtin_ptx_read_lanemask_lt();
-  int d = __builtin_ptx_read_lanemask_ge();
-  int e = __builtin_ptx_read_lanemask_gt();
+  int a = __nvvm_read_ptx_sreg_lanemask_eq();
+  int b = __nvvm_read_ptx_sreg_lanemask_le();
+  int c = __nvvm_read_ptx_sreg_lanemask_lt();
+  int d = __nvvm_read_ptx_sreg_lanemask_ge();
+  int e = __nvvm_read_ptx_sreg_lanemask_gt();
 
   return a + b + c + d + e;
 
@@ -113,26 +113,26 @@ __device__ int read_lanemasks() {
 
 __device__ long long read_clocks() {
 
-// CHECK: call i32 @llvm.ptx.read.clock()
-// CHECK: call i64 @llvm.ptx.read.clock64()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.clock()
+// CHECK: call i64 @llvm.nvvm.read.ptx.sreg.clock64()
 
-  int a = __builtin_ptx_read_clock();
-  long long b = __builtin_ptx_read_clock64();
+  int a = __nvvm_read_ptx_sreg_clock();
+  long long b = __nvvm_read_ptx_sreg_clock64();
 
   return a + b;
 }
 
 __device__ int read_pms() {
 
-// CHECK: call i32 @llvm.ptx.read.pm0()
-// CHECK: call i32 @llvm.ptx.read.pm1()
-// CHECK: call i32 @llvm.ptx.read.pm2()
-// CHECK: call i32 @llvm.ptx.read.pm3()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.pm0()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.pm1()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.pm2()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.pm3()
 
-  int a = __builtin_ptx_read_pm0();
-  int b = __builtin_ptx_read_pm1();
-  int c = __builtin_ptx_read_pm2();
-  int d = __builtin_ptx_read_pm3();
+  int a = __nvvm_read_ptx_sreg_pm0();
+  int b = __nvvm_read_ptx_sreg_pm1();
+  int c = __nvvm_read_ptx_sreg_pm2();
+  int d = __nvvm_read_ptx_sreg_pm3();
 
   return a + b + c + d;
 
@@ -140,9 +140,9 @@ __device__ int read_pms() {
 
 __device__ void sync() {
 
-// CHECK: call void @llvm.ptx.bar.sync(i32 0)
+// CHECK: call void @llvm.nvvm.bar.sync(i32 0)
 
-  __builtin_ptx_bar_sync(0);
+  __nvvm_bar_sync(0);
 
 }
 
index 834e16d04d67edc9ac682ace75eee1f7b7aa3b2c..c2159f5af14199fdc3dc5a77d5a570874d30ca48 100644 (file)
@@ -6,21 +6,21 @@
 __attribute__((global))
 void kernel(int *out) {
   int i = 0;
-  out[i++] = threadIdx.x; // CHECK: call i32 @llvm.ptx.read.tid.x()
-  out[i++] = threadIdx.y; // CHECK: call i32 @llvm.ptx.read.tid.y()
-  out[i++] = threadIdx.z; // CHECK: call i32 @llvm.ptx.read.tid.z()
+  out[i++] = threadIdx.x; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+  out[i++] = threadIdx.y; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
+  out[i++] = threadIdx.z; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
 
-  out[i++] = blockIdx.x; // CHECK: call i32 @llvm.ptx.read.ctaid.x()
-  out[i++] = blockIdx.y; // CHECK: call i32 @llvm.ptx.read.ctaid.y()
-  out[i++] = blockIdx.z; // CHECK: call i32 @llvm.ptx.read.ctaid.z()
+  out[i++] = blockIdx.x; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
+  out[i++] = blockIdx.y; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
+  out[i++] = blockIdx.z; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
 
-  out[i++] = blockDim.x; // CHECK: call i32 @llvm.ptx.read.ntid.x()
-  out[i++] = blockDim.y; // CHECK: call i32 @llvm.ptx.read.ntid.y()
-  out[i++] = blockDim.z; // CHECK: call i32 @llvm.ptx.read.ntid.z()
+  out[i++] = blockDim.x; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+  out[i++] = blockDim.y; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
+  out[i++] = blockDim.z; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
 
-  out[i++] = gridDim.x; // CHECK: call i32 @llvm.ptx.read.nctaid.x()
-  out[i++] = gridDim.y; // CHECK: call i32 @llvm.ptx.read.nctaid.y()
-  out[i++] = gridDim.z; // CHECK: call i32 @llvm.ptx.read.nctaid.z()
+  out[i++] = gridDim.x; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
+  out[i++] = gridDim.y; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
+  out[i++] = gridDim.z; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
 
   out[i++] = warpSize; // CHECK: store i32 32,
 
index 7e6d014c3fe61c6ec9096bc7dcfd8b8a6a80fa7e..814fda2ac7d34e2cdc85b4cba4400ffb4ac29443 100644 (file)
 
 void hf() {
   int x = __builtin_ia32_rdtsc();
-  int y = __builtin_ptx_read_tid_x(); // expected-note  {{'__builtin_ptx_read_tid_x' declared here}}
-  // expected-error@-1 {{reference to __device__ function '__builtin_ptx_read_tid_x' in __host__ function}}
+  int y = __nvvm_read_ptx_sreg_tid_x(); // expected-note  {{'__nvvm_read_ptx_sreg_tid_x' declared here}}
+  // expected-error@-1 {{reference to __device__ function '__nvvm_read_ptx_sreg_tid_x' in __host__ function}}
   x = __builtin_abs(1);
 }
 
 __attribute__((device)) void df() {
-  int x = __builtin_ptx_read_tid_x();
+  int x = __nvvm_read_ptx_sreg_tid_x();
   int y = __builtin_ia32_rdtsc(); // expected-error {{reference to __host__ function '__builtin_ia32_rdtsc' in __device__ function}}
                                   // expected-note@20 {{'__builtin_ia32_rdtsc' declared here}}
   x = __builtin_abs(1);