[CUDA] Add conversion operators for threadIdx, blockIdx, gridDim, and blockDim to...

author Justin Lebar <jlebar@google.com>

Wed, 24 Feb 2016 21:49:33 +0000 (21:49 +0000)

committer Justin Lebar <jlebar@google.com>

Wed, 24 Feb 2016 21:49:33 +0000 (21:49 +0000)
author Justin Lebar <jlebar@google.com>
Wed, 24 Feb 2016 21:49:33 +0000 (21:49 +0000)
committer Justin Lebar <jlebar@google.com>
Wed, 24 Feb 2016 21:49:33 +0000 (21:49 +0000)
diff --git a/lib/Headers/__clang_cuda_runtime_wrapper.h b/lib/Headers/__clang_cuda_runtime_wrapper.h

index fb527dc9abc1fde5c1cee561fd4f46c03ae7d0db..8753a8ce6e464eccf6cee6c436a6b7aea5c604b2 100644 (file)
--- a/lib/Headers/__clang_cuda_runtime_wrapper.h
+++ b/lib/Headers/__clang_cuda_runtime_wrapper.h
@@ -245,6 +245,33 @@ __device__ static inline void *malloc(size_t __size) {
  }
  } // namespace std
  
+// Out-of-line implementations from cuda_builtin_vars.h.  These need to come
+// after we've pulled in the definition of uint3 and dim3.
+
+__device__ inline __cuda_builtin_threadIdx_t::operator uint3() const {
+  uint3 ret;
+  ret.x = x;
+  ret.y = y;
+  ret.z = z;
+  return ret;
+}
+
+__device__ inline __cuda_builtin_blockIdx_t::operator uint3() const {
+  uint3 ret;
+  ret.x = x;
+  ret.y = y;
+  ret.z = z;
+  return ret;
+}
+
+__device__ inline __cuda_builtin_blockDim_t::operator dim3() const {
+  return dim3(x, y, z);
+}
+
+__device__ inline __cuda_builtin_gridDim_t::operator dim3() const {
+  return dim3(x, y, z);
+}
+
  #include <__clang_cuda_cmath.h>
  
  // curand_mtgp32_kernel helpfully redeclares blockDim and threadIdx in host
diff --git a/lib/Headers/cuda_builtin_vars.h b/lib/Headers/cuda_builtin_vars.h

index 901356b3d5ce1039e26fc543924845938e5081be..ec8308640360eaddfea3f3226f7bfc00485ad84b 100644 (file)
--- a/lib/Headers/cuda_builtin_vars.h
+++ b/lib/Headers/cuda_builtin_vars.h
@@ -24,10 +24,14 @@
  #ifndef __CUDA_BUILTIN_VARS_H
  #define __CUDA_BUILTIN_VARS_H
  
+// Forward declares from vector_types.h.
+struct uint3;
+struct dim3;
+
  // The file implements built-in CUDA variables using __declspec(property).
  // https://msdn.microsoft.com/en-us/library/yhfk0thd.aspx
  // All read accesses of built-in variable fields get converted into calls to a
-// getter function which in turn would call appropriate builtin to fetch the
+// getter function which in turn calls the appropriate builtin to fetch the
  // value.
  //
  // Example:
@@ -63,6 +67,9 @@ struct __cuda_builtin_threadIdx_t {
    __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_tid_x());
    __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_tid_y());
    __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_tid_z());
+  // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a
+  // uint3).  This function is defined after we pull in vector_types.h.
+  __attribute__((device)) operator uint3() const;
  private:
    __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t);
  };
@@ -71,6 +78,9 @@ struct __cuda_builtin_blockIdx_t {
    __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_ctaid_x());
    __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_ctaid_y());
    __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_ctaid_z());
+  // blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a
+  // uint3).  This function is defined after we pull in vector_types.h.
+  __attribute__((device)) operator uint3() const;
  private:
    __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t);
  };
@@ -79,6 +89,9 @@ struct __cuda_builtin_blockDim_t {
    __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_ntid_x());
    __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_ntid_y());
    __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_ntid_z());
+  // blockDim should be convertible to dim3 (in fact in nvcc, it *is* a
+  // dim3).  This function is defined after we pull in vector_types.h.
+  __attribute__((device)) operator dim3() const;
  private:
    __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t);
  };
@@ -87,6 +100,9 @@ struct __cuda_builtin_gridDim_t {
    __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_nctaid_x());
    __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_nctaid_y());
    __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_nctaid_z());
+  // gridDim should be convertible to dim3 (in fact in nvcc, it *is* a
+  // dim3).  This function is defined after we pull in vector_types.h.
+  __attribute__((device)) operator dim3() const;
  private:
    __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t);
  };
author	Justin Lebar <jlebar@google.com>
	Wed, 24 Feb 2016 21:49:33 +0000 (21:49 +0000)
committer	Justin Lebar <jlebar@google.com>
	Wed, 24 Feb 2016 21:49:33 +0000 (21:49 +0000)
lib/Headers/__clang_cuda_runtime_wrapper.h		patch \| blob \| history
lib/Headers/cuda_builtin_vars.h		patch \| blob \| history