Set calling convention for CUDA kernel

author Yaxun Liu <Yaxun.Liu@amd.com>

Thu, 29 Mar 2018 15:02:08 +0000 (15:02 +0000)

committer Yaxun Liu <Yaxun.Liu@amd.com>

Thu, 29 Mar 2018 15:02:08 +0000 (15:02 +0000)
author Yaxun Liu <Yaxun.Liu@amd.com>
Thu, 29 Mar 2018 15:02:08 +0000 (15:02 +0000)
committer Yaxun Liu <Yaxun.Liu@amd.com>
Thu, 29 Mar 2018 15:02:08 +0000 (15:02 +0000)
diff --git a/include/clang/Basic/Specifiers.h b/include/clang/Basic/Specifiers.h

index 377534baab06ba1994e6a3d61a4f82e84a0327c6..c5d2e26ecf26530adb2639ce24dc199e841c8fdb 100644 (file)
--- a/include/clang/Basic/Specifiers.h
+++ b/include/clang/Basic/Specifiers.h
@@ -231,23 +231,24 @@ namespace clang {
  
    /// \brief CallingConv - Specifies the calling convention that a function uses.
    enum CallingConv {
-    CC_C,           // __attribute__((cdecl))
-    CC_X86StdCall,  // __attribute__((stdcall))
-    CC_X86FastCall, // __attribute__((fastcall))
-    CC_X86ThisCall, // __attribute__((thiscall))
+    CC_C,             // __attribute__((cdecl))
+    CC_X86StdCall,    // __attribute__((stdcall))
+    CC_X86FastCall,   // __attribute__((fastcall))
+    CC_X86ThisCall,   // __attribute__((thiscall))
      CC_X86VectorCall, // __attribute__((vectorcall))
-    CC_X86Pascal,   // __attribute__((pascal))
-    CC_Win64,       // __attribute__((ms_abi))
-    CC_X86_64SysV,  // __attribute__((sysv_abi))
-    CC_X86RegCall, // __attribute__((regcall))
-    CC_AAPCS,       // __attribute__((pcs("aapcs")))
-    CC_AAPCS_VFP,   // __attribute__((pcs("aapcs-vfp")))
-    CC_IntelOclBicc, // __attribute__((intel_ocl_bicc))
-    CC_SpirFunction, // default for OpenCL functions on SPIR target
-    CC_OpenCLKernel, // inferred for OpenCL kernels
-    CC_Swift,        // __attribute__((swiftcall))
-    CC_PreserveMost, // __attribute__((preserve_most))
-    CC_PreserveAll,  // __attribute__((preserve_all))
+    CC_X86Pascal,     // __attribute__((pascal))
+    CC_Win64,         // __attribute__((ms_abi))
+    CC_X86_64SysV,    // __attribute__((sysv_abi))
+    CC_X86RegCall,    // __attribute__((regcall))
+    CC_AAPCS,         // __attribute__((pcs("aapcs")))
+    CC_AAPCS_VFP,     // __attribute__((pcs("aapcs-vfp")))
+    CC_IntelOclBicc,  // __attribute__((intel_ocl_bicc))
+    CC_SpirFunction,  // default for OpenCL functions on SPIR target
+    CC_OpenCLKernel,  // inferred for OpenCL kernels
+    CC_Swift,         // __attribute__((swiftcall))
+    CC_PreserveMost,  // __attribute__((preserve_most))
+    CC_PreserveAll,   // __attribute__((preserve_all))
+    CC_CUDAKernel,    // inferred for CUDA kernels
    };
  
    /// \brief Checks whether the given calling convention supports variadic
diff --git a/lib/AST/ItaniumMangle.cpp b/lib/AST/ItaniumMangle.cpp

index d42d705d09064270df5466a0cdb2abba37d2ad7c..0f22bf275cbacb33885b55c9040fe075423de00b 100644 (file)
--- a/lib/AST/ItaniumMangle.cpp
+++ b/lib/AST/ItaniumMangle.cpp
@@ -2628,6 +2628,7 @@ StringRef CXXNameMangler::getCallingConvQualifierName(CallingConv CC) {
    case CC_OpenCLKernel:
    case CC_PreserveMost:
    case CC_PreserveAll:
+  case CC_CUDAKernel:
      // FIXME: we should be mangling all of the above.
      return "";
  
diff --git a/lib/AST/Type.cpp b/lib/AST/Type.cpp

index d8b9967cdc86d4cff835b98ac963a42b674e03aa..021699f3be81c4d8b96021072761c5b2ce02f2db 100644 (file)
--- a/lib/AST/Type.cpp
+++ b/lib/AST/Type.cpp
@@ -2752,6 +2752,7 @@ StringRef FunctionType::getNameForCallConv(CallingConv CC) {
    case CC_Swift: return "swiftcall";
    case CC_PreserveMost: return "preserve_most";
    case CC_PreserveAll: return "preserve_all";
+  case CC_CUDAKernel: return "cuda_kernel";
    }
  
    llvm_unreachable("Invalid calling convention.");
diff --git a/lib/AST/TypePrinter.cpp b/lib/AST/TypePrinter.cpp

index d46f8d3348e0c1a809f5ad1277b0468b93646be9..fe67f4e3b27595c8e14da06ba6e197b5248db042 100644 (file)
--- a/lib/AST/TypePrinter.cpp
+++ b/lib/AST/TypePrinter.cpp
@@ -780,6 +780,10 @@ void TypePrinter::printFunctionAfter(const FunctionType::ExtInfo &Info,
      case CC_OpenCLKernel:
        // Do nothing. These CCs are not available as attributes.
        break;
+    case CC_CUDAKernel:
+      // ToDo: print this before the function.
+      OS << " __global__";
+      break;
      case CC_Swift:
        OS << " __attribute__((swiftcall))";
        break;
diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp

index 01caadba92db50658ba2b81965599e64ba3555e2..148a12eac84b9ab0f41bdaa295266d2a399fdbd7 100644 (file)
--- a/lib/CodeGen/CGCall.cpp
+++ b/lib/CodeGen/CGCall.cpp
@@ -64,6 +64,7 @@ unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) {
    case CC_PreserveMost: return llvm::CallingConv::PreserveMost;
    case CC_PreserveAll: return llvm::CallingConv::PreserveAll;
    case CC_Swift: return llvm::CallingConv::Swift;
+  case CC_CUDAKernel: return CGM.getTargetCodeGenInfo().getCUDAKernelCallingConv();
    }
  }
  
diff --git a/lib/CodeGen/CGDebugInfo.cpp b/lib/CodeGen/CGDebugInfo.cpp

index 77d832cc45b68ee2a469088e562445011161f686..42c0210e1df4547c8919cf75c8429ec5d8fc654f 100644 (file)
--- a/lib/CodeGen/CGDebugInfo.cpp
+++ b/lib/CodeGen/CGDebugInfo.cpp
@@ -1022,6 +1022,9 @@ static unsigned getDwarfCC(CallingConv CC) {
      return llvm::dwarf::DW_CC_LLVM_PreserveAll;
    case CC_X86RegCall:
      return llvm::dwarf::DW_CC_LLVM_X86RegCall;
+  case CC_CUDAKernel:
+    // ToDo: Add llvm::dwarf::DW_CC_LLVM_CUDAKernel;
+    return 0;
    }
    return 0;
  }
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp

index b6f265a3579f80cb4ee871cc923e7b3b1fb2035c..02dd21483b45befa635e2a0c1ec59d4dcb96643a 100644 (file)
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -431,6 +431,10 @@ unsigned TargetCodeGenInfo::getOpenCLKernelCallingConv() const {
    return llvm::CallingConv::SPIR_KERNEL;
  }
  
+unsigned TargetCodeGenInfo::getCUDAKernelCallingConv() const {
+  return llvm::CallingConv::C;
+}
+
  llvm::Constant *TargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
      llvm::PointerType *T, QualType QT) const {
    return llvm::ConstantPointerNull::get(T);
@@ -7635,6 +7639,7 @@ public:
    void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                             CodeGen::CodeGenModule &M) const override;
    unsigned getOpenCLKernelCallingConv() const override;
+  unsigned getCUDAKernelCallingConv() const override;
  
    llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM,
        llvm::PointerType *T, QualType QT) const override;
@@ -7722,6 +7727,10 @@ unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const {
    return llvm::CallingConv::AMDGPU_KERNEL;
  }
  
+unsigned AMDGPUTargetCodeGenInfo::getCUDAKernelCallingConv() const {
+  return llvm::CallingConv::AMDGPU_KERNEL;
+}
+
  // Currently LLVM assumes null pointers always have value 0,
  // which results in incorrectly transformed IR. Therefore, instead of
  // emitting null pointers in private and local address spaces, a null
diff --git a/lib/CodeGen/TargetInfo.h b/lib/CodeGen/TargetInfo.h

index 533e14514ff4185f2640d131f556be6b8ce70e99..ab463c5dc6ec1e745677bab35b62b0cffe627166 100644 (file)
--- a/lib/CodeGen/TargetInfo.h
+++ b/lib/CodeGen/TargetInfo.h
@@ -223,6 +223,9 @@ public:
    /// Get LLVM calling convention for OpenCL kernel.
    virtual unsigned getOpenCLKernelCallingConv() const;
  
+  /// Get LLVM calling convention for CUDA kernel.
+  virtual unsigned getCUDAKernelCallingConv() const;
+
    /// Get target specific null pointer.
    /// \param T is the LLVM type of the null pointer.
    /// \param QT is the clang QualType of the null pointer.
diff --git a/lib/Sema/SemaExpr.cpp b/lib/Sema/SemaExpr.cpp

index c0533f7cc7dbb58cf1901b42ca489288d36a191d..d87c21c0b177764fa9fcff0ada4f5222de3c3ef1 100644 (file)
--- a/lib/Sema/SemaExpr.cpp
+++ b/lib/Sema/SemaExpr.cpp
@@ -25,6 +25,7 @@
  #include "clang/AST/ExprObjC.h"
  #include "clang/AST/ExprOpenMP.h"
  #include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/AST/Type.h"
  #include "clang/AST/TypeLoc.h"
  #include "clang/Basic/PartialDiagnostic.h"
  #include "clang/Basic/SourceManager.h"
@@ -1657,6 +1658,16 @@ Sema::BuildDeclRefExpr(ValueDecl *D, QualType Ty, ExprValueKind VK,
        isa<VarDecl>(D) &&
        NeedToCaptureVariable(cast<VarDecl>(D), NameInfo.getLoc());
  
+  // Drop CUDA kernel calling convention since it is invisible to the user
+  // in DRE.
+  if (const auto *FT = Ty->getAs<FunctionType>()) {
+    if (FT->getCallConv() == CC_CUDAKernel) {
+      FT = Context.adjustFunctionType(FT,
+                                      FT->getExtInfo().withCallingConv(CC_C));
+      Ty = QualType(FT, Ty.getQualifiers().getAsOpaqueValue());
+    }
+  }
+
    DeclRefExpr *E;
    if (isa<VarTemplateSpecializationDecl>(D)) {
      VarTemplateSpecializationDecl *VarSpec =
diff --git a/lib/Sema/SemaOverload.cpp b/lib/Sema/SemaOverload.cpp

index dea1c2efe9dff9703ced2f51550f56a03a7db2ea..e917bcc1cc2c3a058d6bbd0e0bf210b92c3d9114 100644 (file)
--- a/lib/Sema/SemaOverload.cpp
+++ b/lib/Sema/SemaOverload.cpp
@@ -1481,7 +1481,6 @@ bool Sema::IsFunctionConversion(QualType FromType, QualType ToType,
                   .getTypePtr());
        Changed = true;
      }
-
      // Convert FromFPT's ExtParameterInfo if necessary. The conversion is valid
      // only if the ExtParameterInfo lists of the two function prototypes can be
      // merged and the merged list is identical to ToFPT's ExtParameterInfo list.
diff --git a/lib/Sema/SemaType.cpp b/lib/Sema/SemaType.cpp

index 00bb21ff307fafb7a1c171845e5e8204bc5e386a..7bcc5b66fb728186a0b9d566ccc94e6e7c1bb4bb 100644 (file)
--- a/lib/Sema/SemaType.cpp
+++ b/lib/Sema/SemaType.cpp
@@ -3316,6 +3316,18 @@ getCCForDeclaratorChunk(Sema &S, Declarator &D,
    CallingConv CC = S.Context.getDefaultCallingConvention(FTI.isVariadic,
                                                           IsCXXInstanceMethod);
  
+  // Attribute AT_CUDAGlobal affects the calling convention for AMDGPU targets.
+  // This is the simplest place to infer calling convention for CUDA kernels.
+  if (S.getLangOpts().CUDA && S.getLangOpts().CUDAIsDevice) {
+    for (const AttributeList *Attr = D.getDeclSpec().getAttributes().getList();
+         Attr; Attr = Attr->getNext()) {
+      if (Attr->getKind() == AttributeList::AT_CUDAGlobal) {
+        CC = CC_CUDAKernel;
+        break;
+      }
+    }
+  }
+
    // Attribute AT_OpenCLKernel affects the calling convention for SPIR
    // and AMDGPU targets, hence it cannot be treated as a calling
    // convention attribute. This is the simplest place to infer
diff --git a/test/CodeGenCUDA/kernel-amdgcn.cu b/test/CodeGenCUDA/kernel-amdgcn.cu

new file mode 100644 (file)

index 0000000..a7369f9
--- /dev/null
+++ b/test/CodeGenCUDA/kernel-amdgcn.cu
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm %s -o - | FileCheck %s
+#include "Inputs/cuda.h"
+
+// CHECK: define amdgpu_kernel void @_ZN1A6kernelEv
+class A {
+public:
+  static __global__ void kernel(){}
+};
+
+// CHECK: define void @_Z10non_kernelv
+__device__ void non_kernel(){}
+
+// CHECK: define amdgpu_kernel void @_Z6kerneli
+__global__ void kernel(int x) {
+  non_kernel();
+}
+
+// CHECK: define amdgpu_kernel void @_Z15template_kernelI1AEvT_
+template<class T>
+__global__ void template_kernel(T x) {}
+
+void launch(void *f);
+
+int main() {
+  launch((void*)A::kernel);
+  launch((void*)kernel);
+  launch((void*)template_kernel<A>);
+  return 0;
+}
diff --git a/tools/libclang/CXType.cpp b/tools/libclang/CXType.cpp

index dfc0152477685f949963a80edae14b8452919f18..25bbde0c8a29d797b85edd8ad0daa4a12e7c057e 100644 (file)
--- a/tools/libclang/CXType.cpp
+++ b/tools/libclang/CXType.cpp
@@ -626,6 +626,7 @@ CXCallingConv clang_getFunctionTypeCallingConv(CXType X) {
        TCALLINGCONV(PreserveAll);
      case CC_SpirFunction: return CXCallingConv_Unexposed;
      case CC_OpenCLKernel: return CXCallingConv_Unexposed;
+    case CC_CUDAKernel: return CXCallingConv_Unexposed;
        break;
      }
  #undef TCALLINGCONV
author	Yaxun Liu <Yaxun.Liu@amd.com>
	Thu, 29 Mar 2018 15:02:08 +0000 (15:02 +0000)
committer	Yaxun Liu <Yaxun.Liu@amd.com>
	Thu, 29 Mar 2018 15:02:08 +0000 (15:02 +0000)
include/clang/Basic/Specifiers.h		patch \| blob \| history
lib/AST/ItaniumMangle.cpp		patch \| blob \| history
lib/AST/Type.cpp		patch \| blob \| history
lib/AST/TypePrinter.cpp		patch \| blob \| history
lib/CodeGen/CGCall.cpp		patch \| blob \| history
lib/CodeGen/CGDebugInfo.cpp		patch \| blob \| history
lib/CodeGen/TargetInfo.cpp		patch \| blob \| history
lib/CodeGen/TargetInfo.h		patch \| blob \| history
lib/Sema/SemaExpr.cpp		patch \| blob \| history
lib/Sema/SemaOverload.cpp		patch \| blob \| history
lib/Sema/SemaType.cpp		patch \| blob \| history
test/CodeGenCUDA/kernel-amdgcn.cu	[new file with mode: 0644]	patch \| blob
tools/libclang/CXType.cpp		patch \| blob \| history