}
} // namespace std
+// Out-of-line implementations from cuda_builtin_vars.h. These need to come
+// after we've pulled in the definition of uint3 and dim3.
+
+__device__ inline __cuda_builtin_threadIdx_t::operator uint3() const {
+ uint3 ret;
+ ret.x = x;
+ ret.y = y;
+ ret.z = z;
+ return ret;
+}
+
+__device__ inline __cuda_builtin_blockIdx_t::operator uint3() const {
+ uint3 ret;
+ ret.x = x;
+ ret.y = y;
+ ret.z = z;
+ return ret;
+}
+
+__device__ inline __cuda_builtin_blockDim_t::operator dim3() const {
+ return dim3(x, y, z);
+}
+
+__device__ inline __cuda_builtin_gridDim_t::operator dim3() const {
+ return dim3(x, y, z);
+}
+
#include <__clang_cuda_cmath.h>
// curand_mtgp32_kernel helpfully redeclares blockDim and threadIdx in host
#ifndef __CUDA_BUILTIN_VARS_H
#define __CUDA_BUILTIN_VARS_H
+// Forward declares from vector_types.h.
+struct uint3;
+struct dim3;
+
// The file implements built-in CUDA variables using __declspec(property).
// https://msdn.microsoft.com/en-us/library/yhfk0thd.aspx
// All read accesses of built-in variable fields get converted into calls to a
-// getter function which in turn would call appropriate builtin to fetch the
+// getter function which in turn calls the appropriate builtin to fetch the
// value.
//
// Example:
__CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_tid_x());
__CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_tid_y());
__CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_tid_z());
+ // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a
+ // uint3). This function is defined after we pull in vector_types.h.
+ __attribute__((device)) operator uint3() const;
private:
__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t);
};
__CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_ctaid_x());
__CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_ctaid_y());
__CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_ctaid_z());
+ // blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a
+ // uint3). This function is defined after we pull in vector_types.h.
+ __attribute__((device)) operator uint3() const;
private:
__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t);
};
__CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_ntid_x());
__CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_ntid_y());
__CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_ntid_z());
+ // blockDim should be convertible to dim3 (in fact in nvcc, it *is* a
+ // dim3). This function is defined after we pull in vector_types.h.
+ __attribute__((device)) operator dim3() const;
private:
__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t);
};
__CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_nctaid_x());
__CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_nctaid_y());
__CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_nctaid_z());
+ // gridDim should be convertible to dim3 (in fact in nvcc, it *is* a
+ // dim3). This function is defined after we pull in vector_types.h.
+ __attribute__((device)) operator dim3() const;
private:
__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t);
};