/// \brief CallingConv - Specifies the calling convention that a function uses.
enum CallingConv {
- CC_C, // __attribute__((cdecl))
- CC_X86StdCall, // __attribute__((stdcall))
- CC_X86FastCall, // __attribute__((fastcall))
- CC_X86ThisCall, // __attribute__((thiscall))
+ CC_C, // __attribute__((cdecl))
+ CC_X86StdCall, // __attribute__((stdcall))
+ CC_X86FastCall, // __attribute__((fastcall))
+ CC_X86ThisCall, // __attribute__((thiscall))
CC_X86VectorCall, // __attribute__((vectorcall))
- CC_X86Pascal, // __attribute__((pascal))
- CC_Win64, // __attribute__((ms_abi))
- CC_X86_64SysV, // __attribute__((sysv_abi))
- CC_X86RegCall, // __attribute__((regcall))
- CC_AAPCS, // __attribute__((pcs("aapcs")))
- CC_AAPCS_VFP, // __attribute__((pcs("aapcs-vfp")))
- CC_IntelOclBicc, // __attribute__((intel_ocl_bicc))
- CC_SpirFunction, // default for OpenCL functions on SPIR target
- CC_OpenCLKernel, // inferred for OpenCL kernels
- CC_Swift, // __attribute__((swiftcall))
- CC_PreserveMost, // __attribute__((preserve_most))
- CC_PreserveAll, // __attribute__((preserve_all))
+ CC_X86Pascal, // __attribute__((pascal))
+ CC_Win64, // __attribute__((ms_abi))
+ CC_X86_64SysV, // __attribute__((sysv_abi))
+ CC_X86RegCall, // __attribute__((regcall))
+ CC_AAPCS, // __attribute__((pcs("aapcs")))
+ CC_AAPCS_VFP, // __attribute__((pcs("aapcs-vfp")))
+ CC_IntelOclBicc, // __attribute__((intel_ocl_bicc))
+ CC_SpirFunction, // default for OpenCL functions on SPIR target
+ CC_OpenCLKernel, // inferred for OpenCL kernels
+ CC_Swift, // __attribute__((swiftcall))
+ CC_PreserveMost, // __attribute__((preserve_most))
+ CC_PreserveAll, // __attribute__((preserve_all))
+ CC_CUDAKernel, // inferred for CUDA kernels
};
/// \brief Checks whether the given calling convention supports variadic
case CC_OpenCLKernel:
case CC_PreserveMost:
case CC_PreserveAll:
+ case CC_CUDAKernel:
// FIXME: we should be mangling all of the above.
return "";
case CC_Swift: return "swiftcall";
case CC_PreserveMost: return "preserve_most";
case CC_PreserveAll: return "preserve_all";
+ case CC_CUDAKernel: return "cuda_kernel";
}
llvm_unreachable("Invalid calling convention.");
case CC_OpenCLKernel:
// Do nothing. These CCs are not available as attributes.
break;
+ case CC_CUDAKernel:
+ // ToDo: print this before the function.
+ OS << " __global__";
+ break;
case CC_Swift:
OS << " __attribute__((swiftcall))";
break;
case CC_PreserveMost: return llvm::CallingConv::PreserveMost;
case CC_PreserveAll: return llvm::CallingConv::PreserveAll;
case CC_Swift: return llvm::CallingConv::Swift;
+ case CC_CUDAKernel: return CGM.getTargetCodeGenInfo().getCUDAKernelCallingConv();
}
}
return llvm::dwarf::DW_CC_LLVM_PreserveAll;
case CC_X86RegCall:
return llvm::dwarf::DW_CC_LLVM_X86RegCall;
+ case CC_CUDAKernel:
+ // ToDo: Add llvm::dwarf::DW_CC_LLVM_CUDAKernel;
+ return 0;
}
return 0;
}
return llvm::CallingConv::SPIR_KERNEL;
}
+unsigned TargetCodeGenInfo::getCUDAKernelCallingConv() const {
+ return llvm::CallingConv::C;
+}
+
llvm::Constant *TargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
llvm::PointerType *T, QualType QT) const {
return llvm::ConstantPointerNull::get(T);
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &M) const override;
unsigned getOpenCLKernelCallingConv() const override;
+ unsigned getCUDAKernelCallingConv() const override;
llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM,
llvm::PointerType *T, QualType QT) const override;
return llvm::CallingConv::AMDGPU_KERNEL;
}
+unsigned AMDGPUTargetCodeGenInfo::getCUDAKernelCallingConv() const {
+ return llvm::CallingConv::AMDGPU_KERNEL;
+}
+
// Currently LLVM assumes null pointers always have value 0,
// which results in incorrectly transformed IR. Therefore, instead of
// emitting null pointers in private and local address spaces, a null
/// Get LLVM calling convention for OpenCL kernel.
virtual unsigned getOpenCLKernelCallingConv() const;
+ /// Get LLVM calling convention for CUDA kernel.
+ virtual unsigned getCUDAKernelCallingConv() const;
+
/// Get target specific null pointer.
/// \param T is the LLVM type of the null pointer.
/// \param QT is the clang QualType of the null pointer.
#include "clang/AST/ExprObjC.h"
#include "clang/AST/ExprOpenMP.h"
#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/AST/Type.h"
#include "clang/AST/TypeLoc.h"
#include "clang/Basic/PartialDiagnostic.h"
#include "clang/Basic/SourceManager.h"
isa<VarDecl>(D) &&
NeedToCaptureVariable(cast<VarDecl>(D), NameInfo.getLoc());
+ // Drop CUDA kernel calling convention since it is invisible to the user
+ // in DRE.
+ if (const auto *FT = Ty->getAs<FunctionType>()) {
+ if (FT->getCallConv() == CC_CUDAKernel) {
+ FT = Context.adjustFunctionType(FT,
+ FT->getExtInfo().withCallingConv(CC_C));
+ Ty = QualType(FT, Ty.getQualifiers().getAsOpaqueValue());
+ }
+ }
+
DeclRefExpr *E;
if (isa<VarTemplateSpecializationDecl>(D)) {
VarTemplateSpecializationDecl *VarSpec =
.getTypePtr());
Changed = true;
}
-
// Convert FromFPT's ExtParameterInfo if necessary. The conversion is valid
// only if the ExtParameterInfo lists of the two function prototypes can be
// merged and the merged list is identical to ToFPT's ExtParameterInfo list.
CallingConv CC = S.Context.getDefaultCallingConvention(FTI.isVariadic,
IsCXXInstanceMethod);
+ // Attribute AT_CUDAGlobal affects the calling convention for AMDGPU targets.
+ // This is the simplest place to infer calling convention for CUDA kernels.
+ if (S.getLangOpts().CUDA && S.getLangOpts().CUDAIsDevice) {
+ for (const AttributeList *Attr = D.getDeclSpec().getAttributes().getList();
+ Attr; Attr = Attr->getNext()) {
+ if (Attr->getKind() == AttributeList::AT_CUDAGlobal) {
+ CC = CC_CUDAKernel;
+ break;
+ }
+ }
+ }
+
// Attribute AT_OpenCLKernel affects the calling convention for SPIR
// and AMDGPU targets, hence it cannot be treated as a calling
// convention attribute. This is the simplest place to infer
--- /dev/null
+// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm %s -o - | FileCheck %s
+#include "Inputs/cuda.h"
+
+// CHECK: define amdgpu_kernel void @_ZN1A6kernelEv
+class A {
+public:
+ static __global__ void kernel(){}
+};
+
+// CHECK: define void @_Z10non_kernelv
+__device__ void non_kernel(){}
+
+// CHECK: define amdgpu_kernel void @_Z6kerneli
+__global__ void kernel(int x) {
+ non_kernel();
+}
+
+// CHECK: define amdgpu_kernel void @_Z15template_kernelI1AEvT_
+template<class T>
+__global__ void template_kernel(T x) {}
+
+void launch(void *f);
+
+int main() {
+ launch((void*)A::kernel);
+ launch((void*)kernel);
+ launch((void*)template_kernel<A>);
+ return 0;
+}
TCALLINGCONV(PreserveAll);
case CC_SpirFunction: return CXCallingConv_Unexposed;
case CC_OpenCLKernel: return CXCallingConv_Unexposed;
+ case CC_CUDAKernel: return CXCallingConv_Unexposed;
break;
}
#undef TCALLINGCONV