[CUDA][HIP] Set kernel calling convention before arrange function

author Yaxun Liu <Yaxun.Liu@amd.com>

Tue, 12 Jun 2018 00:16:33 +0000 (00:16 +0000)

committer Yaxun Liu <Yaxun.Liu@amd.com>

Tue, 12 Jun 2018 00:16:33 +0000 (00:16 +0000)
author Yaxun Liu <Yaxun.Liu@amd.com>
Tue, 12 Jun 2018 00:16:33 +0000 (00:16 +0000)
committer Yaxun Liu <Yaxun.Liu@amd.com>
Tue, 12 Jun 2018 00:16:33 +0000 (00:16 +0000)
diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp

index 2d8339ef697010440824ab4f3608b9e89ecf9d5f..064b5fcb102ff4438703bf2e9d61ac99ace4cfd1 100644 (file)
--- a/lib/CodeGen/CGCall.cpp
+++ b/lib/CodeGen/CGCall.cpp
@@ -255,6 +255,16 @@ CodeGenTypes::arrangeCXXMethodType(const CXXRecordDecl *RD,
        FTP->getCanonicalTypeUnqualified().getAs<FunctionProtoType>(), MD);
  }
  
+/// Set calling convention for CUDA/HIP kernel.
+static void setCUDAKernelCallingConvention(CanQualType &FTy, CodeGenModule &CGM,
+                                           const FunctionDecl *FD) {
+  if (FD->hasAttr<CUDAGlobalAttr>()) {
+    const FunctionType *FT = FTy->getAs<FunctionType>();
+    CGM.getTargetCodeGenInfo().setCUDAKernelCallingConvention(FT);
+    FTy = FT->getCanonicalTypeUnqualified();
+  }
+}
+
  /// Arrange the argument and result information for a declaration or
  /// definition of the given C++ non-static member function.  The
  /// member function must be an ordinary function, i.e. not a
@@ -264,7 +274,9 @@ CodeGenTypes::arrangeCXXMethodDeclaration(const CXXMethodDecl *MD) {
    assert(!isa<CXXConstructorDecl>(MD) && "wrong method for constructors!");
    assert(!isa<CXXDestructorDecl>(MD) && "wrong method for destructors!");
  
-  CanQual<FunctionProtoType> prototype = GetFormalType(MD);
+  CanQualType FT = GetFormalType(MD).getAs<Type>();
+  setCUDAKernelCallingConvention(FT, CGM, MD);
+  auto prototype = FT.getAs<FunctionProtoType>();
  
    if (MD->isInstance()) {
      // The abstract case is perfectly fine.
@@ -424,6 +436,7 @@ CodeGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD) {
    CanQualType FTy = FD->getType()->getCanonicalTypeUnqualified();
  
    assert(isa<FunctionType>(FTy));
+  setCUDAKernelCallingConvention(FTy, CGM, FD);
  
    // When declaring a function without a prototype, always use a
    // non-variadic type.
diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp

index 814eda4381b546ca179bf9f5b404fa915e08af5b..6595694ebd533e6a8c3ab92ffa033037d21ac3b2 100644 (file)
--- a/lib/CodeGen/CodeGenModule.cpp
+++ b/lib/CodeGen/CodeGenModule.cpp
@@ -3671,8 +3671,6 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD,
  
    MaybeHandleStaticInExternC(D, Fn);
  
-  if (D->hasAttr<CUDAGlobalAttr>())
-    getTargetCodeGenInfo().setCUDAKernelCallingConvention(Fn);
  
    maybeSetTrivialComdat(*D, *Fn);
  
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp

index 3ec00553607cd116aaf0f0234230d9c2ec5ee213..b29bcce237cffd232f75ba300aa900ae398939eb 100644 (file)
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -7646,7 +7646,7 @@ public:
                              llvm::Function *BlockInvokeFunc,
                              llvm::Value *BlockLiteral) const override;
    bool shouldEmitStaticExternCAliases() const override;
-  void setCUDAKernelCallingConvention(llvm::Function *F) const override;
+  void setCUDAKernelCallingConvention(const FunctionType *&FT) const override;
  };
  }
  
@@ -7783,8 +7783,9 @@ bool AMDGPUTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
  }
  
  void AMDGPUTargetCodeGenInfo::setCUDAKernelCallingConvention(
-    llvm::Function *F) const {
-  F->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
+    const FunctionType *&FT) const {
+  FT = getABIInfo().getContext().adjustFunctionType(
+      FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel));
  }
  
  //===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/TargetInfo.h b/lib/CodeGen/TargetInfo.h

index 5c19c7141243cccee8ab97cf247be1690b7153a8..b530260ea48f8df258e16fe744ee800579f9b14d 100644 (file)
--- a/lib/CodeGen/TargetInfo.h
+++ b/lib/CodeGen/TargetInfo.h
@@ -302,7 +302,7 @@ public:
    /// as 'used', and having internal linkage.
    virtual bool shouldEmitStaticExternCAliases() const { return true; }
  
-  virtual void setCUDAKernelCallingConvention(llvm::Function *F) const {}
+  virtual void setCUDAKernelCallingConvention(const FunctionType *&FT) const {}
  };
  
  } // namespace CodeGen
diff --git a/test/CodeGenCUDA/kernel-args.cu b/test/CodeGenCUDA/kernel-args.cu

new file mode 100644 (file)

index 0000000..d098662
--- /dev/null
+++ b/test/CodeGenCUDA/kernel-args.cu
@@ -0,0 +1,39 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
+// RUN:     -emit-llvm %s -o - | FileCheck -check-prefix=AMDGCN %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda- -fcuda-is-device \
+// RUN:     -emit-llvm %s -o - | FileCheck -check-prefix=NVPTX %s
+#include "Inputs/cuda.h"
+
+struct A {
+  int a[32];
+};
+
+// AMDGCN: define amdgpu_kernel void @_Z6kernel1A(%struct.A %x.coerce)
+// NVPTX: define void @_Z6kernel1A(%struct.A* byval align 4 %x)
+__global__ void kernel(A x) {
+}
+
+class Kernel {
+public:
+  // AMDGCN: define amdgpu_kernel void @_ZN6Kernel12memberKernelE1A(%struct.A %x.coerce)
+  // NVPTX: define void @_ZN6Kernel12memberKernelE1A(%struct.A* byval align 4 %x)
+  static __global__ void memberKernel(A x){}
+  template<typename T> static __global__ void templateMemberKernel(T x) {}
+};
+
+
+template <typename T>
+__global__ void templateKernel(T x) {}
+
+void launch(void*);
+
+void test() {
+  Kernel K;
+  // AMDGCN: define amdgpu_kernel void @_Z14templateKernelI1AEvT_(%struct.A %x.coerce)
+  // NVPTX: define void @_Z14templateKernelI1AEvT_(%struct.A* byval align 4 %x)
+  launch((void*)templateKernel<A>);
+
+  // AMDGCN: define amdgpu_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A %x.coerce)
+  // NVPTX: define void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A* byval align 4 %x)
+  launch((void*)Kernel::templateMemberKernel<A>);
+}
author	Yaxun Liu <Yaxun.Liu@amd.com>
	Tue, 12 Jun 2018 00:16:33 +0000 (00:16 +0000)
committer	Yaxun Liu <Yaxun.Liu@amd.com>
	Tue, 12 Jun 2018 00:16:33 +0000 (00:16 +0000)
lib/CodeGen/CGCall.cpp		patch \| blob \| history
lib/CodeGen/CodeGenModule.cpp		patch \| blob \| history
lib/CodeGen/TargetInfo.cpp		patch \| blob \| history
lib/CodeGen/TargetInfo.h		patch \| blob \| history
test/CodeGenCUDA/kernel-args.cu	[new file with mode: 0644]	patch \| blob