Currently clang set kernel calling convention for CUDA/HIP after
arranging function, which causes incorrect kernel function type since
it depends on calling convention.
This patch moves setting kernel convention before arranging
function.
Differential Revision: https://reviews.llvm.org/D47733
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@334457
91177308-0d34-0410-b5e6-
96231b3b80d8
FTP->getCanonicalTypeUnqualified().getAs<FunctionProtoType>(), MD);
}
+/// Set calling convention for CUDA/HIP kernel.
+static void setCUDAKernelCallingConvention(CanQualType &FTy, CodeGenModule &CGM,
+ const FunctionDecl *FD) {
+ if (FD->hasAttr<CUDAGlobalAttr>()) {
+ const FunctionType *FT = FTy->getAs<FunctionType>();
+ CGM.getTargetCodeGenInfo().setCUDAKernelCallingConvention(FT);
+ FTy = FT->getCanonicalTypeUnqualified();
+ }
+}
+
/// Arrange the argument and result information for a declaration or
/// definition of the given C++ non-static member function. The
/// member function must be an ordinary function, i.e. not a
assert(!isa<CXXConstructorDecl>(MD) && "wrong method for constructors!");
assert(!isa<CXXDestructorDecl>(MD) && "wrong method for destructors!");
- CanQual<FunctionProtoType> prototype = GetFormalType(MD);
+ CanQualType FT = GetFormalType(MD).getAs<Type>();
+ setCUDAKernelCallingConvention(FT, CGM, MD);
+ auto prototype = FT.getAs<FunctionProtoType>();
if (MD->isInstance()) {
// The abstract case is perfectly fine.
CanQualType FTy = FD->getType()->getCanonicalTypeUnqualified();
assert(isa<FunctionType>(FTy));
+ setCUDAKernelCallingConvention(FTy, CGM, FD);
// When declaring a function without a prototype, always use a
// non-variadic type.
MaybeHandleStaticInExternC(D, Fn);
- if (D->hasAttr<CUDAGlobalAttr>())
- getTargetCodeGenInfo().setCUDAKernelCallingConvention(Fn);
maybeSetTrivialComdat(*D, *Fn);
llvm::Function *BlockInvokeFunc,
llvm::Value *BlockLiteral) const override;
bool shouldEmitStaticExternCAliases() const override;
- void setCUDAKernelCallingConvention(llvm::Function *F) const override;
+ void setCUDAKernelCallingConvention(const FunctionType *&FT) const override;
};
}
}
void AMDGPUTargetCodeGenInfo::setCUDAKernelCallingConvention(
- llvm::Function *F) const {
- F->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
+ const FunctionType *&FT) const {
+ FT = getABIInfo().getContext().adjustFunctionType(
+ FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel));
}
//===----------------------------------------------------------------------===//
/// as 'used', and having internal linkage.
virtual bool shouldEmitStaticExternCAliases() const { return true; }
- virtual void setCUDAKernelCallingConvention(llvm::Function *F) const {}
+ virtual void setCUDAKernelCallingConvention(const FunctionType *&FT) const {}
};
} // namespace CodeGen
--- /dev/null
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
+// RUN: -emit-llvm %s -o - | FileCheck -check-prefix=AMDGCN %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda- -fcuda-is-device \
+// RUN: -emit-llvm %s -o - | FileCheck -check-prefix=NVPTX %s
+#include "Inputs/cuda.h"
+
+struct A {
+ int a[32];
+};
+
+// AMDGCN: define amdgpu_kernel void @_Z6kernel1A(%struct.A %x.coerce)
+// NVPTX: define void @_Z6kernel1A(%struct.A* byval align 4 %x)
+__global__ void kernel(A x) {
+}
+
+class Kernel {
+public:
+ // AMDGCN: define amdgpu_kernel void @_ZN6Kernel12memberKernelE1A(%struct.A %x.coerce)
+ // NVPTX: define void @_ZN6Kernel12memberKernelE1A(%struct.A* byval align 4 %x)
+ static __global__ void memberKernel(A x){}
+ template<typename T> static __global__ void templateMemberKernel(T x) {}
+};
+
+
+template <typename T>
+__global__ void templateKernel(T x) {}
+
+void launch(void*);
+
+void test() {
+ Kernel K;
+ // AMDGCN: define amdgpu_kernel void @_Z14templateKernelI1AEvT_(%struct.A %x.coerce)
+ // NVPTX: define void @_Z14templateKernelI1AEvT_(%struct.A* byval align 4 %x)
+ launch((void*)templateKernel<A>);
+
+ // AMDGCN: define amdgpu_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A %x.coerce)
+ // NVPTX: define void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A* byval align 4 %x)
+ launch((void*)Kernel::templateMemberKernel<A>);
+}